In [None]:
import numpy as np
import pandas as pd

## Load Data

In [None]:
df = pd.read_excel('../Data/qualitative_data.xlsx', index_col=0) 

In [None]:
original_list = [
    "SC_S1_conversa", "SC_S1_interactuaTUTOR", "SC_S1_interactuaPROFESOR", "SC_S1_participa", 
    "SC_S1_horas sueño", "SC_S1_siesta", "SC_S1_nuevos aprendizajes", "PSI_S1_ayuda", "PSI_S1_atención", 
    "PSI_S1_comparte", "PSI_S1_acompaña", "PSI_S1_presta", "PSI_S1_acerca", "PSI_S1_disposición", 
    "PSI_S1_escucha", "PSI_S1_burla", "PSI_S1_agresión", "PSI_S1_desinterés", "RS_S1_serenidad", 
    "RS_S1_persevera", "RS_S1_empático", "RS_S1_optimista", "RS_S1_adapta", "RS_S1_aprecia", 
    "RS_S1_capaz", "Prosocialidad_S1_colaborar", "Prosocialidad_S1_ayuda", "Prosocialidad_S1_identifi", 
    "Prosocialidad_S1_incluir", "Prosocialidad_S1_objetivocomun", "Prosocialidad_S1_valores"
]

extended_list = []
for i in range(1, 13):
    new_entries = [item.replace("_S1_", f"_S{i}_") for item in original_list]
    extended_list.extend(new_entries)

In [None]:
columns_to_modify = [col for col in df.columns if "horas sueño" not in col]

mapping = {1: 1, 2: 0}
df[columns_to_modify] = df[columns_to_modify].replace(mapping)

## S1-6

In [None]:
import pandas as pd
import re 

columns_S1_S6 = [col for col in df.columns if any(f"_S{i}_" in col for i in range(1, 7))]

category_names = set([re.sub(r".*_S\d+_", "", col) for col in columns_S1_S6])

categories_S1_6 = []
for category in category_names:
    relevant_cols = [col for col in df.columns if any(f"_S{i}_" in col for i in range(1, 7)) and col.endswith(category)]
    display(relevant_cols)

    df[f"{category}_S1_6"] = df[relevant_cols].sum(axis=1, min_count=1)
    categories_S1_6.append([f"{category}_S1_6"][0])



In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

bubble_data = []
horas_sueno_data = []

negativas = ["burla", "agresión", "desinterés"]

for col in categories_S1_6:
    freq = df[col].value_counts().reset_index()
    freq.columns = ['Valor', 'Frecuencia']
    freq['Variable'] = col  
    
    freq['Color'] = 'blue'
    if any(neg in col for neg in negativas):
        freq['Color'] = 'red'

    if "horas sueño" in col:
        horas_sueno_data.append(freq)
    else:
        bubble_data.append(freq)

bubble_df = pd.concat(bubble_data)
bubble_df_report_01 = bubble_df.copy()

fig, ax = plt.subplots(figsize=(12, 7))

sns.scatterplot(
    data=bubble_df, 
    x="Variable", 
    y="Valor", 
    size="Frecuencia", 
    sizes=(50, 1000),  
    alpha=0.6, 
    hue="Color",  
    palette={'red': 'red', 'blue': 'blue'},  
    legend=False,
    ax=ax
)

ax.set_xticklabels(ax.get_xticklabels(), rotation=90, ha="right")
ax.set_xlabel("Variables")
ax.set_ylabel("Values")
ax.set_title("S 1-6")
ax.grid(axis="y", linestyle="--", alpha=0.7)

if horas_sueno_data:
    horas_sueno_df = pd.concat(horas_sueno_data)
    
    ax_inset = fig.add_axes([0.9, 0.58, 0.2, 0.3])  # [left, bottom, width, height]
    
    sns.scatterplot(
        data=horas_sueno_df, 
        x="Variable", 
        y="Valor", 
        size="Frecuencia", 
        sizes=(30, 300),  
        alpha=0.6, 
        color='orange',  
        legend=False,
        ax=ax_inset
    )
    
    ax_inset.set_xticklabels(ax_inset.get_xticklabels(), rotation=90, ha="right", fontsize=8)
    ax_inset.set_xlabel("")
    ax_inset.set_ylabel("")
    ax_inset.set_title ("", fontsize=10)
    ax_inset.grid(axis="y", linestyle="--", alpha=0.5)




plt.show()



## S7-12

In [None]:
import pandas as pd
import re  

columns_S1_S12 = [col for col in df.columns if any(f"_S{i}_" in col for i in range(7, 13))]

category_names = set([re.sub(r".*_S\d+_", "", col) for col in columns_S1_S12])

categories_S1_S12 = []
for category in category_names:
    relevant_cols = [col for col in df.columns if any(f"_S{i}_" in col for i in range(7, 13)) and col.endswith(category)]
    display(relevant_cols)

    df[f"{category}_S1_S12"] = df[relevant_cols].sum(axis=1, min_count=1)
    categories_S1_S12.append([f"{category}_S1_S12"][0])



In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

bubble_data = []
horas_sueno_data = []

negativas = ["burla", "agresión", "desinterés"]

for col in categories_S1_S12:
    freq = df[col].value_counts().reset_index()
    freq.columns = ['Valor', 'Frecuencia']
    freq['Variable'] = col  
    
    freq['Color'] = 'blue'
    if any(neg in col for neg in negativas):
        freq['Color'] = 'red'

    if "horas sueño" in col:
        horas_sueno_data.append(freq)
    else:
        bubble_data.append(freq)

bubble_df = pd.concat(bubble_data)
bubble_df_report_02 = bubble_df.copy()

fig, ax = plt.subplots(figsize=(12, 7))

sns.scatterplot(
    data=bubble_df, 
    x="Variable", 
    y="Valor", 
    size="Frecuencia", 
    sizes=(50, 1000), 
    alpha=0.6, 
    hue="Color",  
    palette={'red': 'red', 'blue': 'blue'},  
    legend=False,
    ax=ax
)

ax.set_xticklabels(ax.get_xticklabels(), rotation=90, ha="right")
ax.set_xlabel("Variables")
ax.set_ylabel("Values")
ax.set_title("S 7-12")
ax.grid(axis="y", linestyle="--", alpha=0.7)

if horas_sueno_data:
    horas_sueno_df = pd.concat(horas_sueno_data)
    
    ax_inset = fig.add_axes([0.9, 0.58, 0.2, 0.3])  
    
    sns.scatterplot(
        data=horas_sueno_df, 
        x="Variable", 
        y="Valor", 
        size="Frecuencia", 
        sizes=(30, 300),  
        alpha=0.6, 
        color='orange', 
        legend=False,
        ax=ax_inset
    )
    
    ax_inset.set_xticklabels(ax_inset.get_xticklabels(), rotation=90, ha="right", fontsize=8)
    ax_inset.set_xlabel("")
    ax_inset.set_ylabel("")
    ax_inset.set_title ("", fontsize=10)
    ax_inset.grid(axis="y", linestyle="--", alpha=0.5)


plt.show()



## Using mean to groupby

In [None]:
bubble_df_report_01['Variable'] = bubble_df_report_01['Variable'].str.replace('_S1_6', '', regex=False)
bubble_df_report_02['Variable'] = bubble_df_report_02['Variable'].str.replace('_S1_S12', '', regex=False)


mean_1 = bubble_df_report_01.groupby(['Variable', 'Valor'])['Frecuencia'].mean()
mean_2 = bubble_df_report_02.groupby(['Variable', 'Valor'])['Frecuencia'].mean()

df_concat = pd.concat([mean_1, mean_2], axis=1)
df_concat.columns = ['S_1-6', 'S_7-12']


In [None]:
df_concat

In [None]:
df_interest_vars = df_concat.loc[['desinterés', 'agresión', 'burla', 'acerca', 'ayuda', 'disposición', 'siesta',
              'identifi', 'agua', 'participa', 'presta', 'colaborar', 'nuevos aprendizajes',
              'incluir', 'objetivocomun', 'conversa']]

In [None]:
traducciones = {
    'desinterés': 'disinterest',
    'agresión': 'aggression',
    'burla': 'mockery',
    'acerca': 'approaches',
    'ayuda': 'help',
    'disposición': 'willingness',
    'siesta': 'nap',
    'identifi': 'identifies',
    'agua': 'water',
    'participa': 'participateS',
    'presta': 'lends',
    'colaborar': 'collaborate',
    'nuevos aprendizajes': 'new learnings',
    'incluir': 'includes',
    'objetivocomun': 'shared goal',
    'conversa': 'communicates'
}


In [None]:
df_concat_traducido = df_interest_vars.copy()

df_concat_traducido.index = pd.MultiIndex.from_arrays([
    df_interest_vars.index.get_level_values('Variable').map(traducciones),
    df_interest_vars.index.get_level_values('Valor')
], names=['Variable_EN', 'Valor'])


In [None]:
df_interest_vars_new  = df_interest_vars.reset_index()

## Using count to groupby

In [None]:
df_interest_vars_new['Total count S_1-6'] = df_interest_vars_new['Valor']*df_interest_vars_new['S_1-6']
df_interest_vars_new['Total count S_7-12'] = df_interest_vars_new['Valor']*df_interest_vars_new['S_7-12']

In [None]:
df_interest_vars_new[df_interest_vars_new['Variable'] == 'desinterés'].groupby(['Variable'])['Total count S_1-6'].sum()


In [None]:
bubble_df_report_01_ = df_interest_vars_new[['Variable', 'Total count S_1-6']]
bubble_df_report_02_ = df_interest_vars_new[['Variable', 'Total count S_7-12']]


mean_1 = bubble_df_report_01_.groupby(['Variable'])['Total count S_1-6'].sum()
mean_2 = bubble_df_report_02_.groupby(['Variable'])['Total count S_7-12'].sum()

df_concat = pd.concat([mean_1, mean_2], axis=1)
df_concat.columns = ['S_1-6', 'S_7-12']


In [None]:
df_interest_vars = df_concat.loc[['desinterés', 'agresión', 'burla', 'acerca', 'ayuda', 'disposición', 'siesta',
              'identifi', 'agua', 'participa', 'presta', 'colaborar', 'nuevos aprendizajes',
              'incluir', 'objetivocomun', 'conversa']]

In [None]:
traducciones = {
    'desinterés': 'disinterest',
    'agresión': 'aggression',
    'burla': 'mockery',
    'acerca': 'approaches',
    'ayuda': 'help',
    'disposición': 'willingness',
    'siesta': 'nap',
    'identifi': 'identifies',
    'agua': 'water',
    'participa': 'participateS',
    'presta': 'lends',
    'colaborar': 'collaborate',
    'nuevos aprendizajes': 'new learnings',
    'incluir': 'includes',
    'objetivocomun': 'shared goal',
    'conversa': 'communicates'
}


In [None]:
df_concat_traducido = df_interest_vars.rename(index=traducciones)


In [None]:
df_concat_traducido['ratio'] = df_concat_traducido['S_7-12']/df_concat_traducido['S_1-6']