#### Analytics: new data collection using CRAVE

In [1]:
import pandas as pd
import numpy as np

In [2]:
# template from Flavio: only CP variables already defined for collection
cp_to_collect_file = "TM_template_CP_only.xlsx"
cp_to_collect = pd.read_excel(cp_to_collect_file, sheet_name=0, header=4, usecols="A:D")
name_col = "Title and name of the variable"

In [3]:
# gross variables logic
logic_total = np.where(cp_to_collect[name_col][:-1].str.lower().str.contains("total").values)[0]
# breakdown by sex logic (+2)
logic_by_sex = np.where(cp_to_collect[name_col][:-1].str.contains("BY SEX").values)[0]
# breakdown by age logic (check +18 or +4)
logic_by_age = np.where(cp_to_collect[name_col][:-1].str.contains("BY AGE").values)[0]
# breakdown with disabilities (+1 if not in total)
logic_with_disab = np.where(cp_to_collect[name_col][:-1].str.lower().str.contains("with disab").values)[0]
logic_with_disab = np.setdiff1d(logic_with_disab, logic_total)
# breakdown by category (+3)
logic_by_categ = np.where(cp_to_collect[name_col][:-1].str.contains("BY CATEG").values)[0]
# breakdown by reason (check +7 three or +6 two)
logic_by_reason = np.where(cp_to_collect[name_col][:-1].str.contains("REASON").values)[0]
# list all logics
list_by_breaks = [logic_total, logic_by_sex, logic_by_age,
                logic_with_disab, logic_by_categ, logic_by_reason]

In [4]:
# work on breakdowns! Function
def get_break_in_sub_top(sub_top, rows_sub_top, row_end, logic_by_break):
    # init dict
    break_in_st = dict.fromkeys(sub_top)
    # breakdowns list (pre-defined as logic_by_break)
    break_list = ["sex", "age", "disab", "category", "reason"]
    for i, row in enumerate(rows_sub_top[1:]):
        break_in_st[sub_top[i]] = dict.fromkeys(break_list)
        for j, logic_array in enumerate(logic_by_break[1:]):
            break_in_sub = (logic_array < row) & (logic_array > rows_sub_top[i])
            break_in_st[sub_top[i]][break_list[j]] = break_in_sub.sum()
    # do the last! (in case one sub_top --> initialize i)
    if len(rows_sub_top) == 1:
        i = -1
    break_in_st[sub_top[i+1]] = dict.fromkeys(break_list)
    for j, logic_array in enumerate(logic_by_break[1:]):
            break_in_sub = (logic_array < row_end) & (logic_array > rows_sub_top[i+1])
            break_in_st[sub_top[i+1]][break_list[j]] = break_in_sub.sum()

    return break_in_st

##### Alternative care

In [5]:
# search for the topic
topic_ini = "children in alternative care"
topic_end = "access to justice for children"
row_ini = np.where(cp_to_collect[name_col][:-1].str.lower().str.contains(topic_ini).values)[0][0]
row_end = np.where(cp_to_collect[name_col][:-1].str.lower().str.contains(topic_end).values)[0][0]
print(f"Alt. Care: {[row_ini, row_end]}")

Alt. Care: [1, 624]


In [6]:
# subtopics
sub_top = [
    "children in formal resdiential care",
    "children in formal family-based care",
    "adoptions of children"
]
rows_sub_top = []
for st in sub_top:
    rows_sub_top.append(np.where(cp_to_collect[name_col][:-1].str.lower().str.contains(st).values)[0][0])
print(f"Alt. Care Sub: {rows_sub_top}")

Alt. Care Sub: [2, 158, 531]


In [7]:
# assign logic total to sub_top
gross_in_sub_top = []
for i, row in enumerate(rows_sub_top[1:]):
    in_sub = (logic_total < row) & (logic_total > rows_sub_top[i])
    gross_in_sub_top.append(in_sub.sum())
# last sub_top always limits with row_end
in_sub = (logic_total < row_end) & (logic_total > rows_sub_top[i+1])
gross_in_sub_top.append(in_sub.sum())
print(dict(zip(sub_top,gross_in_sub_top)))

{'children in formal resdiential care': 8, 'children in formal family-based care': 16, 'adoptions of children': 4}


In [8]:
# work on breakdowns!
print(get_break_in_sub_top(sub_top, rows_sub_top, row_end, list_by_breaks))

{'children in formal resdiential care': {'sex': 8, 'age': 6, 'disab': 2, 'category': 0, 'reason': 1}, 'children in formal family-based care': {'sex': 16, 'age': 14, 'disab': 11, 'category': 0, 'reason': 4}, 'adoptions of children': {'sex': 4, 'age': 4, 'disab': 0, 'category': 0, 'reason': 0}}


##### Access to Justice

In [9]:
# search for the topic
topic_ini = "access to justice for children"
topic_end = "violence against children and harmful practices"
row_ini = np.where(cp_to_collect[name_col][:-1].str.lower().str.contains(topic_ini).values)[0][0]
row_end = np.where(cp_to_collect[name_col][:-1].str.lower().str.contains(topic_end).values)[0][0]
print(f"Access Justice: {[row_ini, row_end]}")

Access Justice: [624, 798]


In [10]:
# subtopics
sub_top = [
    "child victims and witnesses of crime",
    "diversion, sentencing and detention of children",
    "access to independent human rights mechanisms"
]
rows_sub_top = []
for st in sub_top:
    rows_sub_top.append(np.where(cp_to_collect[name_col][:-1].str.lower().str.contains(st).values)[0][0])
print(f"Alt. Care Sub: {rows_sub_top}")

Alt. Care Sub: [625, 718, 773]


In [11]:
# assign logic total to sub_top
gross_in_sub_top = []
for i, row in enumerate(rows_sub_top[1:]):
    in_sub = (logic_total < row) & (logic_total > rows_sub_top[i])
    gross_in_sub_top.append(in_sub.sum())
# last sub_top always limits with row_end
in_sub = (logic_total < row_end) & (logic_total > rows_sub_top[i+1])
gross_in_sub_top.append(in_sub.sum())
print(dict(zip(sub_top,gross_in_sub_top)))

{'child victims and witnesses of crime': 4, 'diversion, sentencing and detention of children': 6, 'access to independent human rights mechanisms': 1}


In [12]:
# work on breakdowns!
print(get_break_in_sub_top(sub_top, rows_sub_top, row_end, list_by_breaks))

{'child victims and witnesses of crime': {'sex': 4, 'age': 4, 'disab': 0, 'category': 0, 'reason': 0}, 'diversion, sentencing and detention of children': {'sex': 6, 'age': 6, 'disab': 0, 'category': 0, 'reason': 0}, 'access to independent human rights mechanisms': {'sex': 1, 'age': 1, 'disab': 1, 'category': 0, 'reason': 0}}


##### Violence

In [13]:
# search for the last topic
row_ini = row_end
row_end = len(cp_to_collect[:-1])
print(f"Violence: {[row_ini, row_end]}")

Violence: [798, 883]


In [14]:
# no subtopics, just violence
sub_top = ["violence"]

In [15]:
# assign logic total to topic violence
in_top = (logic_total < row_end) & (logic_total > row_ini)
gross_in_sub_top = [in_top.sum()]
print(dict(zip(sub_top,gross_in_sub_top)))

{'violence': 3}


In [16]:
# work on breakdowns!
rows_sub_top = [row_ini]
print(get_break_in_sub_top(sub_top, rows_sub_top, row_end, list_by_breaks))

{'violence': {'sex': 3, 'age': 3, 'disab': 3, 'category': 3, 'reason': 0}}
