# Analysis of frequency distribution of the answers by languages

* answer_rel_freq.p is a parquet file coming from 'wikimotifs2/notebooks/analysis/similarity_heat_map.ipynb' 
* answer_rel_freq.p show the relative freq distribution of answers grouped by languages 

## Compute distances between pairs of languages

In [40]:
%matplotlib inline
import pandas as pd
from sklearn.metrics import pairwise_distances

responses = pd.read_pickle('answer_rel_freq.p')
responses.index.name =''

In [41]:
# Answers grouped by question (motiviation, familiarty, information depth)
questions = {'info_depth':["fact", "in-depth", "overview"],'motivation':['motivation_intrinsic_learning', 'motivation_media', 'motivation_bored/random',
       'motivation_conversation', 'motivation_current_event', 'motivation_personal_decision', 'motivation_work/school', 'motivation_other'],'familiarty':['familiar', 'unfamiliar']}

In [151]:
#all the distance metrics for sparse vectors implemented in sklearn
#metrics = ['cityblock', 'cosine', 'euclidean', 'l1', 'l2', 'manhattan']
metrics = ['cosine', 'euclidean', 'l1'] #selected 3 just for visualization purposes

In [189]:
# compute pairwise distances
results = {}
for question,answers in questions.items():
    results[question] = {}
    for metric in metrics:
        results[question][metric] = pd.DataFrame(pairwise_distances(responses[answers],metric=metric).round(2),responses.index,responses.index)

## Visualization

### Visualizing distributions 


In [224]:
for question,answers in questions.items():
    display(responses[answers].style.background_gradient( low=0, high=1,axis=1))

Unnamed: 0,fact,in-depth,overview
,,,
ar,0.320624,0.341601,0.337774
bn,0.266495,0.360919,0.368491
de,0.430681,0.208277,0.361042
en,0.377275,0.259612,0.363113
es,0.347871,0.307009,0.34512
he,0.267004,0.25665,0.476345
hi,0.194948,0.677854,0.127198
hu,0.424649,0.238138,0.337213
ja,0.294525,0.349336,0.356139


Unnamed: 0,familiar,unfamiliar
,,
ar,0.46967,0.53033
bn,0.375873,0.614365
de,0.520131,0.479869
en,0.522804,0.477196
es,0.51331,0.48669
he,0.59211,0.40789
hi,0.44885,0.55115
hu,0.725021,0.274979
ja,0.534733,0.465267


Unnamed: 0,motivation_intrinsic_learning,motivation_media,motivation_bored/random,motivation_conversation,motivation_current_event,motivation_personal_decision,motivation_work/school,motivation_other
,,,,,,,,
ar,0.439278,0.27851,0.246633,0.18329,0.237035,0.142975,0.137219,0.0443927
bn,0.54833,0.195927,0.121325,0.309081,0.309311,0.236975,0.249234,0.051178
de,0.311934,0.238805,0.146312,0.217664,0.115091,0.102162,0.21596,0.0849367
en,0.268912,0.333193,0.200111,0.205217,0.126707,0.0964685,0.0997539,0.086249
es,0.346677,0.211907,0.156995,0.221241,0.147702,0.0853688,0.305932,0.0731987
he,0.295599,0.25829,0.17755,0.280747,0.113274,0.100167,0.160406,0.0931851
hi,0.478412,0.0924416,0.0877094,0.243353,0.179087,0.129431,0.239233,0.0412398
hu,0.381441,0.286445,0.182997,0.277908,0.217265,0.257465,0.155882,0.0436678
ja,0.247616,0.271976,0.220281,0.169178,0.156246,0.115519,0.109238,0.0518171


### Viz Pairwise metrics

In [222]:
# Code for visualization, print dataframes side by side
from IPython.display import display_html
def display_side_by_side(title,dfs):
    html_str=''
    for df in dfs:
        html_str+=df.style.set_properties(**{'font-size':'1pt'}).background_gradient(low=0,high=.8,axis=0).set_table_styles([{'selector': 'th', 'props': [('font-size', '7pt')]}]).render()
    titleHtml = "<h2>%s</h2>" % title
    display_html(titleHtml+html_str.replace('table','table style="display:inline"'),raw=True)

In [223]:
for question,metric in results.items():
    title = question + ' ('+ ','.join([m for m in metric.keys()]) + ' )'
    display_side_by_side(title=title,dfs=metric.values())

Unnamed: 0,ar,bn,de,en,es,he,hi,hu,ja,nl,ro,ru,uk,zh
,,,,,,,,,,,,,,
ar,0.0,0.01,0.04,0.02,0.0,0.04,0.19,0.03,0.0,0.06,0.0,0.0,0.01,0.01
bn,0.01,0.0,0.07,0.03,0.01,0.03,0.17,0.06,0.0,0.09,0.01,0.02,0.03,0.04
de,0.04,0.07,0.0,0.01,0.02,0.06,0.37,0.0,0.05,0.0,0.04,0.03,0.02,0.05
en,0.02,0.03,0.01,0.0,0.01,0.03,0.29,0.0,0.02,0.02,0.01,0.01,0.01,0.03
es,0.0,0.01,0.02,0.01,0.0,0.04,0.23,0.02,0.01,0.03,0.0,0.0,0.0,0.02
he,0.04,0.03,0.06,0.03,0.04,0.0,0.34,0.06,0.03,0.09,0.04,0.05,0.05,0.1
hi,0.19,0.17,0.37,0.29,0.23,0.34,0.0,0.32,0.18,0.36,0.2,0.2,0.23,0.16
hu,0.03,0.06,0.0,0.0,0.02,0.06,0.32,0.0,0.04,0.0,0.02,0.02,0.01,0.03
ja,0.0,0.0,0.05,0.02,0.01,0.03,0.18,0.04,0.0,0.07,0.0,0.01,0.01,0.02

Unnamed: 0,ar,bn,de,en,es,he,hi,hu,ja,nl,ro,ru,uk,zh
,,,,,,,,,,,,,,
ar,0.0,0.07,0.17,0.1,0.04,0.17,0.42,0.15,0.03,0.2,0.02,0.04,0.07,0.1
bn,0.07,0.0,0.22,0.15,0.1,0.15,0.4,0.2,0.03,0.26,0.08,0.1,0.13,0.16
de,0.17,0.22,0.0,0.07,0.13,0.21,0.58,0.04,0.2,0.06,0.16,0.14,0.12,0.19
en,0.1,0.15,0.07,0.0,0.06,0.16,0.51,0.06,0.12,0.11,0.09,0.08,0.06,0.14
es,0.04,0.1,0.13,0.06,0.0,0.16,0.46,0.1,0.07,0.16,0.03,0.03,0.04,0.1
he,0.17,0.15,0.21,0.16,0.16,0.0,0.55,0.21,0.15,0.26,0.18,0.19,0.19,0.26
hi,0.42,0.4,0.58,0.51,0.46,0.55,0.0,0.54,0.41,0.57,0.43,0.43,0.46,0.39
hu,0.15,0.2,0.04,0.06,0.1,0.21,0.54,0.0,0.17,0.06,0.13,0.11,0.08,0.15
ja,0.03,0.03,0.2,0.12,0.07,0.15,0.41,0.17,0.0,0.23,0.05,0.07,0.1,0.13

Unnamed: 0,ar,bn,de,en,es,he,hi,hu,ja,nl,ro,ru,uk,zh
,,,,,,,,,,,,,,
ar,0.0,0.1,0.27,0.16,0.07,0.28,0.67,0.21,0.05,0.3,0.03,0.06,0.11,0.15
bn,0.1,0.0,0.32,0.22,0.16,0.21,0.63,0.31,0.05,0.4,0.13,0.17,0.21,0.22
de,0.27,0.32,0.0,0.11,0.2,0.33,0.94,0.06,0.28,0.09,0.25,0.23,0.19,0.3
en,0.16,0.22,0.11,0.0,0.09,0.23,0.84,0.09,0.18,0.19,0.14,0.13,0.09,0.2
es,0.07,0.16,0.2,0.09,0.0,0.26,0.74,0.15,0.11,0.25,0.05,0.04,0.05,0.17
he,0.28,0.21,0.33,0.23,0.26,0.0,0.84,0.32,0.24,0.41,0.29,0.3,0.31,0.43
hi,0.67,0.63,0.94,0.84,0.74,0.84,0.0,0.88,0.66,0.93,0.69,0.71,0.75,0.64
hu,0.21,0.31,0.06,0.09,0.15,0.32,0.88,0.0,0.26,0.09,0.19,0.17,0.13,0.24
ja,0.05,0.05,0.28,0.18,0.11,0.24,0.66,0.26,0.0,0.35,0.08,0.11,0.16,0.19


Unnamed: 0,ar,bn,de,en,es,he,hi,hu,ja,nl,ro,ru,uk,zh
,,,,,,,,,,,,,,
ar,0.0,0.05,0.05,0.05,0.07,0.06,0.09,0.03,0.03,0.1,0.05,0.02,0.04,0.02
bn,0.05,0.0,0.06,0.14,0.06,0.08,0.02,0.04,0.11,0.13,0.02,0.06,0.01,0.07
de,0.05,0.06,0.0,0.05,0.01,0.01,0.07,0.04,0.05,0.03,0.02,0.04,0.04,0.02
en,0.05,0.14,0.05,0.0,0.09,0.03,0.19,0.05,0.01,0.03,0.09,0.04,0.09,0.02
es,0.07,0.06,0.01,0.09,0.0,0.04,0.06,0.07,0.08,0.06,0.02,0.07,0.05,0.05
he,0.06,0.08,0.01,0.03,0.04,0.0,0.11,0.04,0.03,0.02,0.04,0.04,0.06,0.02
hi,0.09,0.02,0.07,0.19,0.06,0.11,0.0,0.09,0.17,0.17,0.02,0.08,0.04,0.11
hu,0.03,0.04,0.04,0.05,0.07,0.04,0.09,0.0,0.04,0.07,0.05,0.04,0.03,0.03
ja,0.03,0.11,0.05,0.01,0.08,0.03,0.17,0.04,0.0,0.05,0.09,0.04,0.08,0.01

Unnamed: 0,ar,bn,de,en,es,he,hi,hu,ja,nl,ro,ru,uk,zh
,,,,,,,,,,,,,,
ar,0.0,0.28,0.23,0.23,0.25,0.24,0.28,0.17,0.21,0.31,0.22,0.15,0.18,0.13
bn,0.28,0.0,0.35,0.44,0.32,0.37,0.22,0.24,0.43,0.45,0.25,0.32,0.19,0.32
de,0.23,0.35,0.0,0.17,0.11,0.09,0.25,0.23,0.17,0.13,0.14,0.16,0.19,0.19
en,0.23,0.44,0.17,0.0,0.26,0.13,0.38,0.24,0.09,0.14,0.26,0.17,0.27,0.17
es,0.25,0.32,0.11,0.26,0.0,0.18,0.21,0.26,0.25,0.21,0.13,0.22,0.19,0.21
he,0.24,0.37,0.09,0.13,0.18,0.0,0.29,0.22,0.15,0.12,0.18,0.17,0.22,0.17
hi,0.28,0.22,0.25,0.38,0.21,0.29,0.0,0.29,0.36,0.36,0.14,0.26,0.17,0.32
hu,0.17,0.24,0.23,0.24,0.26,0.22,0.29,0.0,0.24,0.29,0.21,0.2,0.16,0.17
ja,0.21,0.43,0.17,0.09,0.25,0.15,0.36,0.24,0.0,0.17,0.26,0.19,0.26,0.18

Unnamed: 0,ar,bn,de,en,es,he,hi,hu,ja,nl,ro,ru,uk,zh
,,,,,,,,,,,,,,
ar,0.0,0.73,0.58,0.53,0.63,0.57,0.62,0.38,0.38,0.72,0.51,0.34,0.39,0.31
bn,0.73,0.0,0.79,1.11,0.73,0.86,0.53,0.56,1.03,1.01,0.53,0.8,0.47,0.8
de,0.58,0.79,0.0,0.34,0.23,0.2,0.56,0.57,0.41,0.3,0.31,0.36,0.45,0.46
en,0.53,1.11,0.34,0.0,0.51,0.28,0.87,0.6,0.23,0.34,0.63,0.34,0.72,0.42
es,0.63,0.73,0.23,0.51,0.0,0.39,0.52,0.61,0.53,0.48,0.31,0.42,0.49,0.5
he,0.57,0.86,0.2,0.28,0.39,0.0,0.7,0.44,0.37,0.29,0.44,0.4,0.56,0.44
hi,0.62,0.53,0.56,0.87,0.52,0.7,0.0,0.67,0.79,0.76,0.26,0.54,0.36,0.61
hu,0.38,0.56,0.57,0.6,0.61,0.44,0.67,0.0,0.55,0.66,0.51,0.42,0.39,0.34
ja,0.38,1.03,0.41,0.23,0.53,0.37,0.79,0.55,0.0,0.43,0.57,0.35,0.65,0.43


Unnamed: 0,ar,bn,de,en,es,he,hi,hu,ja,nl,ro,ru,uk,zh
,,,,,,,,,,,,,,
ar,0.0,0.02,0.01,0.01,0.0,0.03,0.0,0.11,0.01,0.07,0.03,0.01,0.12,0.01
bn,0.02,0.0,0.04,0.04,0.03,0.09,0.01,0.21,0.05,0.15,0.09,0.05,0.21,0.0
de,0.01,0.04,0.0,0.0,0.0,0.01,0.01,0.07,0.0,0.04,0.01,0.0,0.07,0.03
en,0.01,0.04,0.0,0.0,0.0,0.01,0.01,0.07,0.0,0.04,0.01,0.0,0.07,0.03
es,0.0,0.03,0.0,0.0,0.0,0.01,0.01,0.08,0.0,0.04,0.01,0.0,0.08,0.02
he,0.03,0.09,0.01,0.01,0.01,0.0,0.04,0.03,0.01,0.01,0.0,0.01,0.03,0.07
hi,0.0,0.01,0.01,0.01,0.01,0.04,0.0,0.13,0.01,0.09,0.04,0.02,0.14,0.0
hu,0.11,0.21,0.07,0.07,0.08,0.03,0.13,0.0,0.06,0.01,0.03,0.06,0.0,0.18
ja,0.01,0.05,0.0,0.0,0.0,0.01,0.01,0.06,0.0,0.03,0.01,0.0,0.06,0.03

Unnamed: 0,ar,bn,de,en,es,he,hi,hu,ja,nl,ro,ru,uk,zh
,,,,,,,,,,,,,,
ar,0.0,0.13,0.07,0.08,0.06,0.17,0.03,0.36,0.09,0.28,0.18,0.1,0.36,0.09
bn,0.13,0.0,0.2,0.2,0.19,0.3,0.1,0.49,0.22,0.4,0.3,0.22,0.49,0.03
de,0.07,0.2,0.0,0.0,0.01,0.1,0.1,0.29,0.02,0.2,0.1,0.03,0.29,0.17
en,0.08,0.2,0.0,0.0,0.01,0.1,0.1,0.29,0.02,0.2,0.1,0.02,0.29,0.17
es,0.06,0.19,0.01,0.01,0.0,0.11,0.09,0.3,0.03,0.21,0.11,0.03,0.3,0.16
he,0.17,0.3,0.1,0.1,0.11,0.0,0.2,0.19,0.08,0.1,0.0,0.08,0.19,0.27
hi,0.03,0.1,0.1,0.1,0.09,0.2,0.0,0.39,0.12,0.31,0.21,0.13,0.39,0.06
hu,0.36,0.49,0.29,0.29,0.3,0.19,0.39,0.0,0.27,0.09,0.18,0.26,0.0,0.46
ja,0.09,0.22,0.02,0.02,0.03,0.08,0.12,0.27,0.0,0.18,0.08,0.0,0.27,0.19

Unnamed: 0,ar,bn,de,en,es,he,hi,hu,ja,nl,ro,ru,uk,zh
,,,,,,,,,,,,,,
ar,0.0,0.18,0.1,0.11,0.09,0.24,0.04,0.51,0.13,0.39,0.25,0.14,0.52,0.13
bn,0.18,0.0,0.28,0.28,0.27,0.42,0.14,0.69,0.31,0.57,0.43,0.31,0.69,0.04
de,0.1,0.28,0.0,0.01,0.01,0.14,0.14,0.41,0.03,0.29,0.15,0.04,0.41,0.23
en,0.11,0.28,0.01,0.0,0.02,0.14,0.15,0.4,0.02,0.28,0.14,0.03,0.41,0.24
es,0.09,0.27,0.01,0.02,0.0,0.16,0.13,0.42,0.04,0.3,0.16,0.05,0.43,0.22
he,0.24,0.42,0.14,0.14,0.16,0.0,0.29,0.27,0.11,0.15,0.0,0.11,0.27,0.38
hi,0.04,0.14,0.14,0.15,0.13,0.29,0.0,0.55,0.17,0.43,0.29,0.18,0.56,0.09
hu,0.51,0.69,0.41,0.4,0.42,0.27,0.55,0.0,0.38,0.12,0.26,0.37,0.0,0.64
ja,0.13,0.31,0.03,0.02,0.04,0.11,0.17,0.38,0.0,0.26,0.12,0.01,0.39,0.26


### Correlations

In [243]:
responsesShortName = responses.rename(columns=dict([(col,col.replace('motivation','m')) for col in responses.columns]))

In [245]:
responsesShortName.corr().style.background_gradient(low=-0.5,high=.5,axis=0)

Unnamed: 0,m_intrinsic_learning,m_media,m_bored/random,m_conversation,m_current_event,m_personal_decision,m_work/school,m_other,overview,fact,in-depth,familiar,unfamiliar
m_intrinsic_learning,1.0,-0.553167,-0.320287,0.443594,0.786511,0.602044,0.418336,-0.695585,-0.356596,-0.515512,0.557053,-0.380738,0.37111
m_media,-0.553167,1.0,0.679473,-0.285836,-0.194191,-0.155595,-0.688785,0.203643,0.442203,0.630176,-0.689154,0.165813,-0.161382
m_bored/random,-0.320287,0.679473,1.0,-0.410577,0.000221248,-0.00917467,-0.52275,-0.00533014,0.280901,0.105349,-0.247862,-0.270797,0.278704
m_conversation,0.443594,-0.285836,-0.410577,1.0,0.384476,0.54899,0.442348,-0.026159,0.0975257,-0.139362,0.0210073,0.05315,-0.0673708
m_current_event,0.786511,-0.194191,0.000221248,0.384476,1.0,0.807111,0.200199,-0.693558,-0.182412,-0.261961,0.280353,-0.242071,0.228714
m_personal_decision,0.602044,-0.155595,-0.00917467,0.54899,0.807111,1.0,0.0850555,-0.514881,-0.0133491,-0.121606,0.081477,0.0596435,-0.0739398
m_work/school,0.418336,-0.688785,-0.52275,0.442348,0.200199,0.0850555,1.0,-0.0144304,-0.246281,-0.179889,0.272197,-0.211474,0.205931
m_other,-0.695585,0.203643,-0.00533014,-0.026159,-0.693558,-0.514881,-0.0144304,1.0,0.459532,0.330918,-0.509594,0.13519,-0.134239
overview,-0.356596,0.442203,0.280901,0.0975257,-0.182412,-0.0133491,-0.246281,0.459532,1.0,0.202573,-0.779849,0.239785,-0.245966
fact,-0.515512,0.630176,0.105349,-0.139362,-0.261961,-0.121606,-0.179889,0.330918,0.202573,1.0,-0.770905,0.51045,-0.508434
