In [None]:
import os
import numpy as np
import pandas as pd
from scipy.stats import spearmanr
import seaborn as sns
import matplotlib.pyplot as plt
os.chdir('/Users/andrei-macpro/Documents/Data/Results/clustering_results')

In [None]:
def calculate_pvalues(df):
    df = df._get_numeric_data()
    dfcols = pd.DataFrame(columns=df.columns)
    pvalues = dfcols.transpose().join(dfcols, how='outer')
    for r in df.columns:
        for c in df.columns:
            pvalues[r][c] = round(spearmanr(df[r], df[c],nan_policy='omit')[1], 4)
    return pvalues

In [None]:
data = pd.read_excel('proportion_speech.xlsx', index_col=0, engine='openpyxl')


In [None]:
condition = [(data["Age"]>35) , (data["Age"]<=35)]

In [None]:
values=['old', 'young']

In [None]:
data['age_bracket']= np.select(condition, values)


In [None]:
older = pd.Series(data.Age>35)
younger = pd.Series(data.Age<=35)

In [None]:
data[younger].corr(method='spearman')

In [None]:
calculate_pvalues(data[younger])

In [None]:
data[older].corr(method='spearman')

In [None]:
calculate_pvalues(data[older])

In [None]:
data.groupby('Gender').corr(method='spearman')

In [None]:
calculate_pvalues(data.groupby('Gender').get_group("F"))

In [None]:
calculate_pvalues(data.groupby('Gender').get_group("M"))

In [None]:
boys = data.groupby('Gender').get_group("M")

In [None]:
girls = data.groupby('Gender').get_group("F")

In [None]:
older = data.groupby('age_bracket').get_group("old")
young = data.groupby('age_bracket').get_group("young")


In [None]:
len(older.loc[older['Proportion speech child meal'] == 0]) # 1 in girls and 2 in boys

In [None]:
rad = data.groupby('label').get_group("rad")
no_rad = data.groupby('label').get_group("no_rad")

In [None]:
len(rad.loc[rad['Proportion speech child meal'] == 0]), len(no_rad.loc[no_rad['Proportion speech child meal'] == 0])

In [None]:
len(rad.loc[rad['Proportion speech child play'] == 0]), len(no_rad.loc[no_rad['Proportion speech child play'] == 0])

In [None]:
older.groupby('Gender').get_group("F")

In [None]:
len(young.loc[young['Proportion speech child meal'] == 0]) # 3 in girls

In [None]:
len(older.loc[older['Proportion speech child play'] == 0]) 

In [None]:
len(young.loc[young['Proportion speech child play'] == 0]) # girls mostly 0 

In [None]:
len(boys.loc[boys['Proportion speech child meal'] == 0])

In [None]:
len(girls.loc[girls['Proportion speech child meal'] == 0])

In [None]:
len(boys.loc[boys['Proportion speech child play'] == 0])

In [None]:
len(girls.loc[girls['Proportion speech child play'] == 0])

In [None]:
len(boys)

In [None]:
len(girls)

In [None]:
rinab_cg_meal = sns.lmplot(x="RINAB", y="Proportion speech caregiver meal",  truncate=False  , data=data )

In [None]:
rinab_cg_play = sns.lmplot(x="RINAB", y="Proportion speech caregiver play",  truncate=False  , data=data )

In [None]:
g = sns.lmplot(x="DAI", y='speech child meal', hue='Gender',  truncate=False  , data=data, legend=False, height=7, aspect=1.6) 
new_labels = ['Female, r = 0.43, p<0.5', 'Male, r = -0.45, p<0.5']
plt.ylabel("speech child meal", fontsize=30)
plt.xlabel("DAI", fontsize=30)
#for t, l in zip(g._legend.texts, new_labels): t.set_text(l)
plt.legend(title='Gender', loc='upper left', labels= new_labels, fontsize='large')
os.chdir('/Users/andrei-macpro/Documents/Data/Results/clustering_results')
plt.savefig('child_meal_dai.png')

In [None]:
g = sns.lmplot(x="RINAB", y='Proportion speech child play', hue='age_bracket',  truncate=False  , data=data, legend=False, height=7, aspect=1.6) 
new_labels = ['young', 'old']
plt.ylabel("speech child play", fontsize=30)
plt.xlabel("DAI", fontsize=30)
#for t, l in zip(g._legend.texts, new_labels): t.set_text(l)
plt.legend(title='Gender', loc='upper left', labels= new_labels, fontsize='large')

In [None]:
g = sns.lmplot(x="RINAB", y='Proportion speech child meal', hue='Gender', markers=markers ,
               truncate=False, legend=False , data=data, height=7, aspect=1.6, x_jitter=.2) 
new_labels = ['Female' ,'Male']
markers = ['o', '^']
plt.ylabel("speech child meal", fontsize=30)
plt.xlabel("RINAB", fontsize=30)
#for t, l in zip(g._legend.texts, new_labels): t.set_text(l)
plt.legend(title='Gender', loc='upper right', labels = new_labels, fontsize='large')

In [None]:
g = sns.lmplot(x="RINAB", y='speech caregiver ', hue='age_bracket',  truncate=False, legend=False , data=data, height=7, aspect=1.6) 
new_labels = ['Older' ,'Younger']
plt.ylabel("speech child meal", fontsize=30)
plt.xlabel("RINAB", fontsize=30)
#for t, l in zip(g._legend.texts, new_labels): t.set_text(l)
plt.legend(title='Age bracket', loc='upper right', labels= new_labels, fontsize='large')

In [None]:
age_play = sns.lmplot(x="Age", y="Proportion speech child play",hue='Gender',  truncate=False  , data=data )

In [None]:
dai_rinab = sns.lmplot(x="DAI", y="RINAB",hue='Gender',  truncate=False  , data=data )

In [None]:
rinab_meal = sns.lmplot(x="RINAB", y="Proportion speech caregiver meal",hue='Gender',  truncate=False  , data=data )

In [None]:
rinab_meal = sns.lmplot(x="RINAB", y="Proportion speech child meal",hue='Gender',  truncate=False  , data=data )

In [None]:
rinab_meal = sns.lmplot(x="RINAB", y="Proportion speech child play",hue='Gender',  truncate=False  , data=data )

In [None]:
data['Proportion speech caregiver meal'].hist(), data['Proportion speech caregiver play'].hist()

In [None]:
data['Proportion speech child meal'].plot.density(), data['Proportion speech child play'].plot.density()

In [None]:
data['Proportion speech caregiver meal'].plot.density(), data['Proportion speech caregiver play'].plot.density()

In [None]:
data.iloc[:,0:4].plot.density(figsize=(10,5))
plt.xlim([-10, 100])

In [None]:
data = data.rename(columns={"Proportion speech caregiver meal": "speech caregiver meal", 
                     "Proportion speech caregiver play": "speech caregiver play",
                    "Proportion speech child meal": "speech child meal",
                    "Proportion speech child play": "speech child play"})

In [None]:
# Density Plot and Histogram of all arrival delays
sns.distplot(data.iloc[:,0], hist=True, kde=True, 
             bins=int(180/5), color = 'darkblue', 
             hist_kws={'edgecolor':'black'},
             kde_kws={'linewidth': 4})

In [None]:
sns.kdeplot(data=data, x="speech caregiver meal", hue="age_bracket", multiple="stack")

In [None]:
sns.kdeplot(data=data, x="speech child meal", hue="age_bracket", multiple="stack")

In [None]:
g = sns.kdeplot(data=data, x="speech child meal", hue="Gender", multiple="stack", cut=0)
j = sns.kdeplot(data=data, x="speech child meal", hue="age_bracket", multiple="stack", cut=0)

In [None]:
# sns.kdeplot(data=data, x="speech caregiver meal", hue="Gender", multiple="stack", cut=0)

In [None]:
g = sns.PairGrid(data[['speech caregiver meal', 'speech child meal', 'speech caregiver play',
                      'speech child play', 'age_bracket', 'Age']], vars=["speech caregiver meal", "speech child meal"],
                hue = 'age_bracket')
g.map_diag(sns.histplot)
g.map_offdiag(sns.scatterplot)

In [None]:
sns.pairplot(data[['speech caregiver meal', 'speech child meal', 'speech caregiver play',
                      'speech child play', 'age_bracket', 'RINAB', 'Age']], hue="age_bracket", height=2.5)


In [None]:
data

In [None]:
data['age_bracket'].to_excel("proportion_speech.xlsx")  

In [None]:
data.to_excel("proportion_speech.xlsx")  

In [None]:
from statsmodels.multivariate.manova import MANOVA

In [None]:
maov = MANOVA.from_formula(endog = data.iloc[:,0:4], exog = [data['age_bracket'], data['Gender'], data['label']])

In [None]:
sns.displot(data, x = 'Proportion speech child meal', kind='kde', color = 'black')

In [None]:
data['Proportion speech caregiver meal']