# Summary info about the trilingual participants

In [1]:
import pandas as pd

In [27]:
df_full = pd.read_csv("data/blp_cleaned.csv")
len(df_full)

109

Pass the list of partIDs that completed the three language tasks (trilinguals) or the one language task (monolinguals).

In [28]:
parts = pd.read_csv("data/parts.csv")
parts.head(2)
parts['partID'] = parts['partID'].map(str.lower)

In [29]:
df = df_full[df_full['partID'].isin(parts['partID'])].copy()
len(df)

65

In [30]:
df.head(5)

Unnamed: 0,Age,Education,Education_6_TEXT,Finished,Gender,Q1#1_1,Q1#1_2,Q1#1_3,Q10#1_1,Q10#1_2,...,Q8#1_2,Q8#1_3,Q8#1_4,Q9#1_1,Q9#1_2,Q9#1_3,Q9#1_4,Residence,prof,lang_profile
1,69,College,,True,Female,,,,,,...,,,,,,,,California,I am a monolingual speaker of English (I only ...,mono
2,61,Doctorate,,True,Female,,,,,,...,,,,,,,,Texas,I am a monolingual speaker of English (I only ...,mono
3,62,Doctorate,,True,Male,,,,,,...,,,,,,,,Texas,I am a monolingual speaker of English (I only ...,mono
5,30,Masters,,True,Female,,,,,,...,,,,,,,,"New York, USA",I am a monolingual speaker of English (I only ...,mono
6,25,College,,True,Female,,,,,,...,,,,,,,,"CA, USA",I am a monolingual speaker of English (I only ...,mono


## Gender
Breakdown by language profile and gender.

In [31]:
df.groupby(['lang_profile', 'Gender']).Gender.count()

lang_profile  Gender
l1_eng        Female    14
              Male      10
              Other      1
l3_eng        Female    18
              Male       7
mono          Female    10
              Male       5
Name: Gender, dtype: int64

## Age
Breakdown by language profile and age.

In [32]:
df['Age'] = df['Age'].astype("int64")
df.groupby(['lang_profile']).Age.agg(['mean', 'std'])

Unnamed: 0_level_0,mean,std
lang_profile,Unnamed: 1_level_1,Unnamed: 2_level_1
l1_eng,28.12,6.153048
l3_eng,25.52,7.309355
mono,36.533333,15.463629


## By gender and age

In [33]:
df.groupby(['lang_profile', 'Gender']).Age.agg(['mean', 'std'])

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std
lang_profile,Gender,Unnamed: 2_level_1,Unnamed: 3_level_1
l1_eng,Female,29.5,7.366765
l1_eng,Male,26.4,3.977716
l1_eng,Other,26.0,
l3_eng,Female,26.111111,8.449775
l3_eng,Male,24.0,2.768875
mono,Female,37.4,16.31768
mono,Male,34.8,15.23811


In [34]:
df = df[df['lang_profile']!="mono"].copy()

## Age of acquisition
Get age of language acquisition, where `Q1#1_1` is age Spanish was acquired, `Q1#1_2` is age Catalan was acquired, and `Q1#1_3` is age English was acquired.

In [35]:
df.groupby(['lang_profile','Q1#1_1', 'Q1#1_2', 'Q1#1_3']).partID.count()

lang_profile  Q1#1_1       Q1#1_2       Q1#1_3     
l1_eng        10           20+          Since birth    3
              11           18           Since birth    1
                           19           Since birth    1
                           20+          Since birth    1
              12           20+          Since birth    3
              13           20+          Since birth    1
              14           18           Since birth    1
                           20+          Since birth    4
              15           20+          Since birth    1
              16           20+          Since birth    1
              17           20+          Since birth    1
              20+          20+          Since birth    1
              6            19           Since birth    1
              7            20+          Since birth    1
              8            19           Since birth    1
                           20+          Since birth    1
              9            19       

For both trilinguals, average age of acquisition of English:

In [36]:
df.loc[df['Q1#1_3']=='Since birth', 'Q1#1_3'] = 0
df.loc[df['Q1#1_2']=='Since birth', 'Q1#1_2'] = 0
df.loc[df['Q1#1_1']=='Since birth', 'Q1#1_1'] = 0
df['Q1#1_3'] = df['Q1#1_3'].astype('int64')
df.groupby('lang_profile')['Q1#1_3'].agg(['mean', 'std'])

Unnamed: 0_level_0,mean,std
lang_profile,Unnamed: 1_level_1,Unnamed: 2_level_1
l1_eng,0.0,0.0
l3_eng,5.8,2.723356


In [37]:
l3 = df[df['lang_profile']=="l3_eng"].copy()
l3['Q1#1_2'] = l3['Q1#1_2'].astype('int64')
l3['Q1#1_1'] = l3['Q1#1_1'].astype('int64')

For L3 English trilinguals, average (std) age of acquisition of Catalan:

In [38]:
l3.groupby('lang_profile')['Q1#1_2'].agg(['mean', 'std'])

Unnamed: 0_level_0,mean,std
lang_profile,Unnamed: 1_level_1,Unnamed: 2_level_1
l3_eng,0.36,0.994987


For L3 English trilinguals, average (std) age of acquisition of Spanish:

In [39]:
l3.groupby('lang_profile')['Q1#1_1'].agg(['mean', 'std'])

Unnamed: 0_level_0,mean,std
lang_profile,Unnamed: 1_level_1,Unnamed: 2_level_1
l3_eng,0.84,1.841195


For L1 English trilinguals, average (std) age of acquisition of Spanish (excepting the one participant who learned Spanish at 20+ years):

In [40]:
l1 = df[df['lang_profile']=="l1_eng"].copy()
temp = l1[l1['Q1#1_1']!="20+"].copy()
temp['Q1#1_1'] = temp['Q1#1_1'].astype('int64')
temp.groupby('lang_profile')['Q1#1_1'].agg(['mean', 'std'])

Unnamed: 0_level_0,mean,std
lang_profile,Unnamed: 1_level_1,Unnamed: 2_level_1
l1_eng,11.166667,3.702721


For L1 English trilinguals, average (std) age of acquisition of Catalan (excepting those that said 20+):

In [43]:
temp0 = l1[l1['Q1#1_2']!="20+"].copy()
temp0['Q1#1_2'] = temp0['Q1#1_2'].astype('int64')
temp0.groupby('lang_profile')['Q1#1_2'].agg(['mean', 'std'])

Unnamed: 0_level_0,mean,std
lang_profile,Unnamed: 1_level_1,Unnamed: 2_level_1
l1_eng,18.666667,0.516398


## Age of comfort

In [44]:
df.loc[df['Q2#1_1']=='As early as I can remember', 'Q2#1_1'] = 0
df.loc[df['Q2#1_2']=='As early as I can remember', 'Q2#1_2'] = 0
df.loc[df['Q2#1_3']=='As early as I can remember', 'Q2#1_3'] = 0
df.groupby(['lang_profile','Q2#1_1', 'Q2#1_2', 'Q2#1_3']).partID.count()

lang_profile  Q2#1_1  Q2#1_2                 Q2#1_3               
l1_eng        0       20+                    0                        1
              11      20+                    0                        1
              16      20+                    0                        2
                      Still not comfortable  0                        1
              17      20+                    0                        4
                      Still not comfortable  0                        1
              18      20+                    0                        3
                                             5                        1
              20+     20+                    0                        7
                      Still not comfortable  0                        3
              7       19                     0                        1
l3_eng        0       0                      12                       2
                                             13                      

For L1 English trilinguals, average (std) age of comfort with Spanish (excepting the one participant who said 20+ years):

In [45]:
l1 = df[df['lang_profile']=="l1_eng"].copy()
temp2 = l1[l1['Q2#1_1']!="20+"].copy()
temp2['Q2#1_1'] = temp2['Q2#1_1'].astype('int64')
temp2.groupby('lang_profile')['Q2#1_1'].agg(['mean', 'std'])

Unnamed: 0_level_0,mean,std
lang_profile,Unnamed: 1_level_1,Unnamed: 2_level_1
l1_eng,14.866667,5.097151


For L1 English trilinguals, average (std) age of comfort with English:

In [51]:
l1['Q2#1_3'] = l1['Q2#1_3'].astype('int64')
l1.groupby('lang_profile')['Q2#1_3'].agg(['mean', 'std'])

Unnamed: 0_level_0,mean,std
lang_profile,Unnamed: 1_level_1,Unnamed: 2_level_1
l1_eng,0.2,1.0


For L3 English trilinguals, average (std) age of comfort with English (excepting the three participants who said 20+ years and the one who said still not comfortable):

In [46]:
l3 = df[df['lang_profile']=="l3_eng"].copy()
temp3 = l3[(l3['Q2#1_3']!="20+") & (l3['Q2#1_3']!="Still not comfortable")].copy()
temp3['Q2#1_3'] = temp3['Q2#1_3'].astype('int64')
temp3.groupby('lang_profile')['Q2#1_3'].agg(['mean', 'std'])

Unnamed: 0_level_0,mean,std
lang_profile,Unnamed: 1_level_1,Unnamed: 2_level_1
l3_eng,13.772727,3.337119


For L3 English trilinguals, average (std) age of comfort with Spanish:

In [47]:
l3['Q2#1_2'] = l3['Q2#1_2'].astype('int64')
l3['Q2#1_1'] = l3['Q2#1_1'].astype('int64')
l3.groupby('lang_profile')['Q2#1_1'].agg(['mean', 'std'])

Unnamed: 0_level_0,mean,std
lang_profile,Unnamed: 1_level_1,Unnamed: 2_level_1
l3_eng,2.72,5.326975


For L3 English trilinguals, average (std) age of comfort with Catalan:

In [48]:
l3.groupby('lang_profile')['Q2#1_2'].agg(['mean', 'std'])

Unnamed: 0_level_0,mean,std
lang_profile,Unnamed: 1_level_1,Unnamed: 2_level_1
l3_eng,1.6,4.387482


## Years of classes

In [52]:
df.groupby(['lang_profile','Q3#1_1', 'Q3#1_2', 'Q3#1_3']).partID.count()

lang_profile  Q3#1_1  Q3#1_2  Q3#1_3
l1_eng        0       0       20+       1
                      2       15        1
                              20+       1
              1       0       20+       1
              10      0       20+       1
                      2       16        1
              11      0       20+       1
                      1       18        1
              12      4       20+       1
              15      4       20+       1
              16      2       16        1
              17      1       20+       1
                      2       20+       1
              2       0       18        1
              20+     6       20+       1
              4       0       17        1
                      1       16        1
                      2       20+       1
              5       2       15        1
              6       1       12        1
                      2       20+       1
              9       0       16        3
                      1       20+      

For L1 English trilinguals, average (std) years of classes in Spanish (excepting the one participant who said 20+ years):

In [53]:
l1 = df[df['lang_profile']=="l1_eng"].copy()
temp4 = l1[l1['Q3#1_1']!="20+"].copy()
temp4['Q3#1_1'] = temp4['Q3#1_1'].astype('int64')
temp4.groupby('lang_profile')['Q3#1_1'].agg(['mean', 'std'])

Unnamed: 0_level_0,mean,std
lang_profile,Unnamed: 1_level_1,Unnamed: 2_level_1
l1_eng,7.791667,5.364774


For L1 English trilinguals, average (std) years of classes in English minus 13 that were more:

In [55]:
temp04 = l1[l1['Q3#1_3']!="20+"].copy()
temp04['Q3#1_3'] = temp04['Q3#1_3'].astype('int64')
temp04.groupby('lang_profile')['Q3#1_3'].agg(['mean', 'std'])

Unnamed: 0_level_0,mean,std
lang_profile,Unnamed: 1_level_1,Unnamed: 2_level_1
l1_eng,15.916667,1.564279


For L1 English trilinguals, average (std) years of classes in Catalan:

In [56]:
l1['Q3#1_2'] = l1['Q3#1_2'].astype('int64')
l1.groupby('lang_profile')['Q3#1_2'].agg(['mean', 'std'])

Unnamed: 0_level_0,mean,std
lang_profile,Unnamed: 1_level_1,Unnamed: 2_level_1
l1_eng,1.4,1.527525


For L3 English trilinguals, average (std) years of classes in Spanish (minus the five participants who said 20+):

In [57]:
l3 = df[df['lang_profile']=="l3_eng"].copy()
temp5 = l3[l3['Q3#1_1']!="20+"].copy()
temp5['Q3#1_1'] = temp5['Q3#1_1'].astype('int64')
temp5.groupby('lang_profile')['Q3#1_1'].agg(['mean', 'std'])

Unnamed: 0_level_0,mean,std
lang_profile,Unnamed: 1_level_1,Unnamed: 2_level_1
l3_eng,12.05,5.605214


For L3 English trilinguals, average (std) years of classes in Catalan (minus the five participants who said 20+):

In [58]:
temp6 = l3[l3['Q3#1_2']!="20+"].copy()
temp6['Q3#1_2'] = temp6['Q3#1_2'].astype('int64')
temp6.groupby('lang_profile')['Q3#1_2'].agg(['mean', 'std'])

Unnamed: 0_level_0,mean,std
lang_profile,Unnamed: 1_level_1,Unnamed: 2_level_1
l3_eng,12.05,5.915857


For L3 English trilinguals, average (std) years of classes in English (minus the two participants that said 20+):

In [59]:
temp7 = l3[l3['Q3#1_3']!="20+"].copy()
temp7['Q3#1_3'] = temp7['Q3#1_3'].astype('int64')
temp7.groupby('lang_profile')['Q3#1_3'].agg(['mean', 'std'])

Unnamed: 0_level_0,mean,std
lang_profile,Unnamed: 1_level_1,Unnamed: 2_level_1
l3_eng,10.043478,4.828686


## Years spent in a region that speaks a certain language

In [60]:
df.groupby(['lang_profile','Q4#1_1', 'Q4#1_2', 'Q4#1_3']).partID.count()

lang_profile  Q4#1_1  Q4#1_2  Q4#1_3
l1_eng        0       0       20+       2
              1       0       20+       4
                      1       20+       3
              2       0       20+       1
                      1       20+       1
                      2       19        1
              3       2       20+       3
                      3       20+       1
              4       2       20+       1
                      3       20+       1
                      4       20+       1
              5       3       19        1
                              20+       1
                      5       20+       1
              6       1       20+       1
              7       6       20+       1
              8       6       20+       1
l3_eng        16      16      11        1
              18      18      2         1
              19      19      0         1
              20+     19      3         1
                      20+     0         9
                              1        

For L1 English trilinguals, years spent in a region that speaks Spanish:

In [61]:
l1['Q4#1_1'] = l1['Q4#1_1'].astype('int64')
l1['Q4#1_2'] = l1['Q4#1_2'].astype('int64')
l1.groupby('lang_profile')['Q4#1_1'].agg(['mean', 'std'])

Unnamed: 0_level_0,mean,std
lang_profile,Unnamed: 1_level_1,Unnamed: 2_level_1
l1_eng,2.92,2.177919


For L1 English trilinguals, years spent in a region that speaks Catalan:

In [62]:
l1.groupby('lang_profile')['Q4#1_2'].agg(['mean', 'std'])

Unnamed: 0_level_0,mean,std
lang_profile,Unnamed: 1_level_1,Unnamed: 2_level_1
l1_eng,1.92,1.846619


For L3 English trilinguals, years spent in a region that speaks English:

In [64]:
temp07 = l3[l3['Q4#1_3']!="20+"].copy()
temp07['Q4#1_3'] = temp07['Q4#1_3'].astype('int64')
temp07.groupby('lang_profile')['Q4#1_3'].agg(['mean', 'std'])

Unnamed: 0_level_0,mean,std
lang_profile,Unnamed: 1_level_1,Unnamed: 2_level_1
l3_eng,2.375,3.585144


For L3 English trilinguals, years spent in a region that speaks Spanish:

In [70]:
temp08 = l3[l3['Q4#1_1']!="20+"].copy()
temp08['Q4#1_1'] = temp08['Q4#1_1'].astype('int64')
temp08.groupby('lang_profile')['Q4#1_1'].agg(['mean', 'std'])

Unnamed: 0_level_0,mean,std
lang_profile,Unnamed: 1_level_1,Unnamed: 2_level_1
l3_eng,15.25,4.99166


For L3 English trilinguals, years spent in a region that speaks Catalan:

In [71]:
temp09 = l3[l3['Q4#1_2']!="20+"].copy()
temp09['Q4#1_2'] = temp09['Q4#1_2'].astype('int64')
temp09.groupby('lang_profile')['Q4#1_2'].agg(['mean', 'std'])

Unnamed: 0_level_0,mean,std
lang_profile,Unnamed: 1_level_1,Unnamed: 2_level_1
l3_eng,16,4.636809


## Years in family that speaks a certain language

In [67]:
df.groupby(['lang_profile','Q5#1_1', 'Q5#1_2', 'Q5#1_3']).partID.count()

lang_profile  Q5#1_1  Q5#1_2  Q5#1_3
l1_eng        0       0       20+        8
                      3       20+        1
              1       0       20+        6
                      1       20+        1
                      2       20+        1
              2       2       20+        3
              20+     0       20+        2
              3       0       20+        1
              6       0       20+        1
              7       0       20+        1
l3_eng        0       20+     0          4
                              2          1
              19      0       0          1
              2       20+     0          1
              20+     0       0          3
                      20+     0         12
                              13         1
                              5          1
              8       20+     0          1
Name: partID, dtype: int64

For L1 English trilinguals, years spent in a family that speaks Spanish (excluding two participants that said 20+):

In [72]:
temp8 = l1[l1['Q5#1_1']!="20+"].copy()
temp8['Q5#1_1'] = temp8['Q5#1_1'].astype('int64')
temp8.groupby('lang_profile')['Q5#1_1'].agg(['mean', 'std'])

Unnamed: 0_level_0,mean,std
lang_profile,Unnamed: 1_level_1,Unnamed: 2_level_1
l1_eng,1.304348,1.844766


For L1 English trilinguals, years spent in a family that speaks Catalan:

In [73]:
l1['Q5#1_2'] = l1['Q5#1_2'].astype('int64')
l1.groupby('lang_profile')['Q5#1_2'].agg(['mean', 'std'])

Unnamed: 0_level_0,mean,std
lang_profile,Unnamed: 1_level_1,Unnamed: 2_level_1
l1_eng,0.48,0.918332


For L3 English trilinguals, years spent in a family that speaks English:

In [74]:
l3['Q5#1_3'] = l3['Q5#1_3'].astype('int64')
l3.groupby('lang_profile')['Q5#1_3'].agg(['mean', 'std'])

Unnamed: 0_level_0,mean,std
lang_profile,Unnamed: 1_level_1,Unnamed: 2_level_1
l3_eng,0.8,2.753785


For L3 English trilinguals, years spent in a family that speaks Spanish (minus 17):

In [75]:
temp02 = l3[l3['Q5#1_1']!="20+"].copy()
temp02['Q5#1_1'] = temp02['Q5#1_1'].astype('int64')
temp02.groupby('lang_profile')['Q5#1_1'].agg(['mean', 'std'])

Unnamed: 0_level_0,mean,std
lang_profile,Unnamed: 1_level_1,Unnamed: 2_level_1
l3_eng,3.625,6.802048


For L3 English trilinguals, years spent in a family that speaks Catalan (minus 21):

In [76]:
temp03 = l3[l3['Q5#1_2']!="20+"].copy()
temp03['Q5#1_2'] = temp03['Q5#1_2'].astype('int64')
temp03.groupby('lang_profile')['Q5#1_2'].agg(['mean', 'std'])

Unnamed: 0_level_0,mean,std
lang_profile,Unnamed: 1_level_1,Unnamed: 2_level_1
l3_eng,0,0.0


## Years spent in a work environment where a certain language was spoken

In [77]:
df.groupby(['lang_profile','Q6#1_1', 'Q6#1_2', 'Q6#1_3']).partID.count()

lang_profile  Q6#1_1  Q6#1_2  Q6#1_3
l1_eng        0       0       20+       3
                              3         1
                              4         1
              1       0       4         1
                              6         1
                      1       12        1
                              2         1
              11      1       20+       1
              12      3       15        1
              2       0       20+       1
              3       0       13        1
                              5         1
                      1       20+       1
                      2       7         1
                              8         1
              4       1       6         1
                      2       20+       1
                              4         1
                      4       20+       1
                              3         1
              6       5       14        1
              7       0       12        1
              8       3       15       

For L1 English, years spent in work environment where Spanish was spoken:

In [78]:
l1['Q6#1_1'] = l1['Q6#1_1'].astype('int64')
l1['Q6#1_2'] = l1['Q6#1_2'].astype('int64')
l1.groupby('lang_profile')['Q6#1_1'].agg(['mean', 'std'])

Unnamed: 0_level_0,mean,std
lang_profile,Unnamed: 1_level_1,Unnamed: 2_level_1
l1_eng,3.4,3.278719


For L1 English trilinguals, years spent in work environment where Catalan was spoken:

In [79]:
l1.groupby('lang_profile')['Q6#1_2'].agg(['mean', 'std'])

Unnamed: 0_level_0,mean,std
lang_profile,Unnamed: 1_level_1,Unnamed: 2_level_1
l1_eng,1.28,1.514376


For L1 English trilinguals, years spent in work environment where English was spoken:

In [80]:
temp04 = l1[l1['Q6#1_3']!="20+"].copy()
temp04['Q6#1_3'] = temp04['Q6#1_3'].astype('int64')
temp04.groupby('lang_profile')['Q6#1_3'].agg(['mean', 'std'])

Unnamed: 0_level_0,mean,std
lang_profile,Unnamed: 1_level_1,Unnamed: 2_level_1
l1_eng,7.823529,4.62649


For L3 English trilinguals, years spent in work environment where English was spoken:

In [81]:
l3['Q6#1_3'] = l3['Q6#1_3'].astype('int64')
l3.groupby('lang_profile')['Q6#1_3'].agg(['mean', 'std'])

Unnamed: 0_level_0,mean,std
lang_profile,Unnamed: 1_level_1,Unnamed: 2_level_1
l3_eng,2.72,2.761642


For L3 English trilinguals, years with Spanish:

In [82]:
temp05 = l3[l3['Q6#1_1']!="20+"].copy()
temp05['Q6#1_1'] = temp05['Q6#1_1'].astype('int64')
temp05.groupby('lang_profile')['Q6#1_1'].agg(['mean', 'std'])

Unnamed: 0_level_0,mean,std
lang_profile,Unnamed: 1_level_1,Unnamed: 2_level_1
l3_eng,4.45,4.559259


For L3 English trilinguals, years with Catalan:

In [83]:
temp06 = l1[l1['Q6#1_2']!="20+"].copy()
temp06['Q6#1_2'] = temp06['Q6#1_2'].astype('int64')
temp06.groupby('lang_profile')['Q6#1_2'].agg(['mean', 'std'])

Unnamed: 0_level_0,mean,std
lang_profile,Unnamed: 1_level_1,Unnamed: 2_level_1
l1_eng,1.28,1.514376


## Percentage use of languages with friends

In [84]:
df['Q7#1_1'] = df['Q7#1_1'].astype('int64')
df['Q7#1_2'] = df['Q7#1_2'].astype('int64')
df['Q7#1_3'] = df['Q7#1_3'].astype('int64')
df['Q8#1_1'] = df['Q8#1_1'].astype('int64')
df['Q8#1_2'] = df['Q8#1_2'].astype('int64')
df['Q8#1_3'] = df['Q8#1_3'].astype('int64')
df['Q9#1_1'] = df['Q9#1_1'].astype('int64')
df['Q9#1_2'] = df['Q9#1_2'].astype('int64')
df['Q9#1_3'] = df['Q9#1_3'].astype('int64')
df.groupby('lang_profile')[['Q7#1_1', 'Q7#1_2', 'Q7#1_3']].agg(['mean', 'std'])

Unnamed: 0_level_0,Q7#1_1,Q7#1_1,Q7#1_2,Q7#1_2,Q7#1_3,Q7#1_3
Unnamed: 0_level_1,mean,std,mean,std,mean,std
lang_profile,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
l1_eng,23.2,18.193405,14.0,15.545632,57.6,27.730849
l3_eng,31.6,24.440404,37.6,32.695565,31.6,28.089144


## Percentage of language use with family

In [85]:
df.groupby('lang_profile')[['Q8#1_1', 'Q8#1_2', 'Q8#1_3']].agg(['mean', 'std'])

Unnamed: 0_level_0,Q8#1_1,Q8#1_1,Q8#1_2,Q8#1_2,Q8#1_3,Q8#1_3
Unnamed: 0_level_1,mean,std,mean,std,mean,std
lang_profile,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
l1_eng,8,16.329932,3.2,8.524475,85.2,26.789301
l3_eng,40,44.064347,58.4,44.31704,6.0,17.078251


## Percentage of language use at school/work

In [86]:
df.groupby('lang_profile')[['Q9#1_1', 'Q9#1_2', 'Q9#1_3']].agg(['mean', 'std'])

Unnamed: 0_level_0,Q9#1_1,Q9#1_1,Q9#1_2,Q9#1_2,Q9#1_3,Q9#1_3
Unnamed: 0_level_1,mean,std,mean,std,mean,std
lang_profile,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
l1_eng,22,25.33114,11.6,13.747727,54.0,31.22499
l3_eng,24,27.080128,26.4,32.388269,50.8,38.179401


## Percentage you talk to yourself in each language

In [87]:
df['Q10#1_1'] = df['Q10#1_1'].astype('int64')
df['Q10#1_2'] = df['Q10#1_2'].astype('int64')
df['Q10#1_3'] = df['Q10#1_3'].astype('int64')
df['Q11#1_1'] = df['Q11#1_1'].astype('int64')
df['Q11#1_2'] = df['Q11#1_2'].astype('int64')
df['Q11#1_3'] = df['Q11#1_3'].astype('int64')
df.groupby('lang_profile')[['Q10#1_1', 'Q10#1_2', 'Q10#1_3']].agg(['mean', 'std'])

Unnamed: 0_level_0,Q10#1_1,Q10#1_1,Q10#1_2,Q10#1_2,Q10#1_3,Q10#1_3
Unnamed: 0_level_1,mean,std,mean,std,mean,std
lang_profile,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
l1_eng,19.2,11.518102,11.2,10.92398,63.2,21.548395
l3_eng,28.4,29.676028,47.2,34.94281,24.8,23.650229


## Percentage you count in each language

In [88]:
df.groupby('lang_profile')[['Q11#1_1', 'Q11#1_2', 'Q11#1_3']].agg(['mean', 'std'])

Unnamed: 0_level_0,Q11#1_1,Q11#1_1,Q11#1_2,Q11#1_2,Q11#1_3,Q11#1_3
Unnamed: 0_level_1,mean,std,mean,std,mean,std
lang_profile,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
l1_eng,12.8,12.083046,6.4,9.949874,78.4,18.184242
l3_eng,29.2,33.655114,60.0,35.237291,10.0,15.275252


## How well do you speak each language

In [89]:
# first cast to numeric
df['Q12_1'] = df['Q12_1'].astype('int64')
df['Q12_2'] = df['Q12_2'].astype('int64')
df['Q12_3'] = df['Q12_3'].astype('int64')
df['Q13_1'] = df['Q13_1'].astype('int64')
df['Q13_2'] = df['Q13_2'].astype('int64')
df['Q13_3'] = df['Q13_3'].astype('int64')
df['Q14_1'] = df['Q14_1'].astype('int64')
df['Q14_2'] = df['Q14_2'].astype('int64')
df['Q14_3'] = df['Q14_3'].astype('int64')
df['Q15_1'] = df['Q15_1'].astype('int64')
df['Q15_2'] = df['Q15_2'].astype('int64')
df['Q15_3'] = df['Q15_3'].astype('int64')
df['Q16_1'] = df['Q16_1'].astype('int64')
df['Q16_2'] = df['Q16_2'].astype('int64')
df['Q16_3'] = df['Q16_3'].astype('int64')
df['Q16_5_1'] = df['Q16_5_1'].astype('int64')
df['Q16_5_2'] = df['Q16_5_2'].astype('int64')
df['Q16_5_3'] = df['Q16_5_3'].astype('int64')
df['Q17_1'] = df['Q17_1'].astype('int64')
df['Q17_2'] = df['Q17_2'].astype('int64')
df['Q17_3'] = df['Q17_3'].astype('int64')
df['Q18_1'] = df['Q18_1'].astype('int64')
df['Q18_2'] = df['Q18_2'].astype('int64')
df['Q18_3'] = df['Q18_3'].astype('int64')

In [129]:
# adjust Likert scale
# def likert_scale(x):
#     return x - 3
# df[['Q12_1', 'Q12_2', 'Q12_3', 'Q13_1', 'Q13_2', 'Q13_3', 
#    'Q14_1', 'Q14_2', 'Q14_3', 'Q15_1', 'Q15_2', 'Q15_3',
#    'Q16_1', 'Q16_2', 'Q16_3', 'Q16_5_1', 'Q16_5_2', 'Q16_5_3',
#    'Q17_1', 'Q17_2', 'Q17_3', 'Q18_1', 'Q18_2', 'Q18_3']] = df[['Q12_1', 'Q12_2', 'Q12_3', 'Q13_1', 'Q13_2', 'Q13_3', 
#    'Q14_1', 'Q14_2', 'Q14_3', 'Q15_1', 'Q15_2', 'Q15_3',
#    'Q16_1', 'Q16_2', 'Q16_3', 'Q16_5_1', 'Q16_5_2', 'Q16_5_3',
#    'Q17_1', 'Q17_2', 'Q17_3', 'Q18_1', 'Q18_2', 'Q18_3']].apply(likert_scale)

In [90]:
df.groupby('lang_profile')[['Q12_1', 'Q12_2', 'Q12_3']].agg(['mean', 'std'])

Unnamed: 0_level_0,Q12_1,Q12_1,Q12_2,Q12_2,Q12_3,Q12_3
Unnamed: 0_level_1,mean,std,mean,std,mean,std
lang_profile,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
l1_eng,4.96,1.019804,3.56,1.416569,5.96,0.2
l3_eng,5.84,0.374166,5.76,0.522813,4.88,1.013246


## How well do you understand each language

In [91]:
df.groupby('lang_profile')[['Q13_1', 'Q13_2', 'Q13_3']].agg(['mean', 'std'])

Unnamed: 0_level_0,Q13_1,Q13_1,Q13_2,Q13_2,Q13_3,Q13_3
Unnamed: 0_level_1,mean,std,mean,std,mean,std
lang_profile,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
l1_eng,5.32,0.9,4.32,1.314027,6.0,0.0
l3_eng,6.0,0.0,5.96,0.2,5.32,1.029563


## How well do you read in each language

In [92]:
df.groupby('lang_profile')[['Q14_1', 'Q14_2', 'Q14_3']].agg(['mean', 'std'])

Unnamed: 0_level_0,Q14_1,Q14_1,Q14_2,Q14_2,Q14_3,Q14_3
Unnamed: 0_level_1,mean,std,mean,std,mean,std
lang_profile,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
l1_eng,5.28,0.890693,4.56,1.193035,6.0,0.0
l3_eng,6.0,0.0,5.92,0.276887,5.2,0.866025


## How well do you write in each language

In [93]:
df.groupby('lang_profile')[['Q15_1', 'Q15_2', 'Q15_3']].agg(['mean', 'std'])

Unnamed: 0_level_0,Q15_1,Q15_1,Q15_2,Q15_2,Q15_3,Q15_3
Unnamed: 0_level_1,mean,std,mean,std,mean,std
lang_profile,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
l1_eng,4.64,1.287116,3.24,1.479865,6.0,0.0
l3_eng,5.76,0.597216,5.48,0.918332,4.84,0.986577


## You feel like yourself when speaking each language

In [94]:
df.groupby('lang_profile')[['Q16_1', 'Q16_2', 'Q16_3']].agg(['mean', 'std'])

Unnamed: 0_level_0,Q16_1,Q16_1,Q16_2,Q16_2,Q16_3,Q16_3
Unnamed: 0_level_1,mean,std,mean,std,mean,std
lang_profile,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
l1_eng,4.68,1.749286,4.2,1.870829,5.76,0.663325
l3_eng,4.36,1.955335,5.6,0.763763,4.32,1.492202


## You identify with this language culture

In [95]:
df.groupby('lang_profile')[['Q16_5_1', 'Q16_5_2', 'Q16_5_3']].agg(['mean', 'std'])

Unnamed: 0_level_0,Q16_5_1,Q16_5_1,Q16_5_2,Q16_5_2,Q16_5_3,Q16_5_3
Unnamed: 0_level_1,mean,std,mean,std,mean,std
lang_profile,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
l1_eng,3.48,1.686219,3.56,1.804624,5.56,0.916515
l3_eng,3.8,1.979057,5.64,0.757188,3.2,1.322876


## It is important to me to use the language like a native speaker

In [96]:
df.groupby('lang_profile')[['Q17_1', 'Q17_2', 'Q17_3']].agg(['mean', 'std'])

Unnamed: 0_level_0,Q17_1,Q17_1,Q17_2,Q17_2,Q17_3,Q17_3
Unnamed: 0_level_1,mean,std,mean,std,mean,std
lang_profile,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
l1_eng,5.56,0.960902,5.08,1.497776,5.8,0.645497
l3_eng,5.28,1.671327,5.88,0.439697,5.28,1.275408


## I want others to think I'm a native speaker of this language

In [97]:
df.groupby('lang_profile')[['Q18_1', 'Q18_2', 'Q18_3']].agg(['mean', 'std'])

Unnamed: 0_level_0,Q18_1,Q18_1,Q18_2,Q18_2,Q18_3,Q18_3
Unnamed: 0_level_1,mean,std,mean,std,mean,std
lang_profile,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
l1_eng,4.68,1.573743,4.24,1.690168,5.04,1.337909
l3_eng,5.08,1.846619,6.0,0.0,4.2,2.362908
