In [None]:
import numpy as np
import pandas as pd

from kmeans import KMeans
from pca import print_pca, print_nmf

from data_common import EMOTIONS_FEATURES, PERSONALITY_FEATURES, DAA, SEX

df = pd.read_csv('data/data_ellsberg.csv')

In [None]:
df['Sex'] = np.where(df['Sex'] == 'Male', 0, 1)

df = df[PERSONALITY_FEATURES + EMOTIONS_FEATURES + [DAA, SEX]]

not_daa_df = df.loc[df[DAA] == 0]
not_daa_df = not_daa_df[PERSONALITY_FEATURES + EMOTIONS_FEATURES + [SEX]]

daa_df = df.loc[df[DAA] == 1]
daa_df = daa_df[PERSONALITY_FEATURES + EMOTIONS_FEATURES + [SEX]]

In [None]:
all_mean = df.mean(axis=0)
all_std = df.std(axis=0)

not_daa_mean = not_daa_df.mean(axis=0)
not_daa_std = not_daa_df.std(axis=0)

daa_mean = daa_df.mean(axis=0)
daa_std = daa_df.std(axis=0)

In [None]:
def to_table(not_daa, daa):
    summary = pd.concat([not_daa, daa], axis=1)
    summary = summary.T
    summary.index = ['notDAA', 'DAA']
    return summary

def to_summary(mean, std):
    values = zip(mean.index, mean, std)
    for i, m, s in values:
        print(f'{i}: {m:.3f}({s:.3f})')


In [None]:
to_summary(all_mean, all_std)

In [None]:
to_summary(not_daa_mean, not_daa_std)

In [None]:
to_summary(daa_mean, daa_std)

In [None]:
(daa_mean - not_daa_mean) / daa_mean

In [None]:
(not_daa_mean - daa_mean) / not_daa_mean

In [None]:
person_df = df[PERSONALITY_FEATURES + [DAA]]
person_km = KMeans(person_df, 1, labels=PERSONALITY_FEATURES, random_state=69)

In [None]:
print_pca(person_df, person_km)

In [None]:
print_nmf(person_df, person_km)

In [None]:
emotions_df = df[EMOTIONS_FEATURES + [DAA]]
emotions_km = KMeans(emotions_df, 1, labels=EMOTIONS_FEATURES, random_state=69)

In [None]:
print_pca(emotions_df, emotions_km)

In [None]:
print_nmf(emotions_df, emotions_km)