In [1]:
import pandas as pd
import numpy as np


In [5]:
VOCAB = pd.read_csv('./data/VOCAB.csv', index_col='term_str')
SALEX = pd.read_csv('data/salex_nrc.csv', index_col='term_str')
BOW = pd.read_csv('./data/BOW.csv', index_col=['company_id', 'term_str'])
TFIDF = pd.read_csv('./data/TFIDF.csv', index_col='company_id')

In [6]:
SALEX.columns = [col.replace('nrc_','') for col in SALEX.columns]
SALEX['polarity'] = SALEX.positive - SALEX.negative

V = pd.concat([VOCAB.reset_index().set_index('term_str'), SALEX], join='inner', axis=1) 

In [20]:
# V.to_csv('./data/VOCAB_emotions.csv') # index = term_str

In [7]:
DTCM = BOW.n.unstack() # Create Doc-Term Count Matrix
TF = (DTCM.T / DTCM.T.max()).T

BOW['tf'] = TF.stack()
BOW['tfidf'] = TFIDF.stack()

In [10]:
emo_cols = "anger anticipation disgust fear joy sadness surprise trust polarity".split()
B = BOW.join(V[['max_pos'] + emo_cols], on='term_str', rsuffix='_v').dropna()

for col in emo_cols:
    B[col] = B[col] * B.tfidf

In [11]:
B

Unnamed: 0_level_0,Unnamed: 1_level_0,n,tf,tfidf,max_pos,anger,anticipation,disgust,fear,joy,sadness,surprise,trust,polarity
company_id,term_str,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
3,absence,1,0.166667,0.362682,NN,0.000000,0.000000,0.000000,0.362682,0.000000,0.362682,0.000000,0.000000,-0.362682
3,art,1,0.166667,0.116495,NN,0.000000,0.116495,0.000000,0.000000,0.116495,0.116495,0.116495,0.000000,0.116495
3,cad,1,0.166667,0.177025,NNP,0.177025,0.000000,0.177025,0.000000,0.000000,0.000000,0.000000,0.000000,-0.177025
3,delivery,1,0.166667,0.076681,NN,0.000000,0.076681,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.076681
3,die,2,0.333333,0.187769,NNP,0.000000,0.000000,0.000000,0.187769,0.000000,0.187769,0.000000,0.000000,-0.187769
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1222,production,5,0.357143,0.099451,NN,0.000000,0.099451,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.099451
1222,strength,3,0.214286,0.138131,NN,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.138131,0.138131
1222,structure,2,0.142857,0.147138,NN,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.147138,0.147138
1222,success,1,0.071429,0.049926,NN,0.000000,0.049926,0.000000,0.000000,0.049926,0.000000,0.000000,0.000000,0.049926


In [14]:
EMO_COMPANIES = B.groupby(['company_id'])[emo_cols].mean()

EMO_COMPANIES

Unnamed: 0_level_0,anger,anticipation,disgust,fear,joy,sadness,surprise,trust,polarity
company_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
3,0.017702,0.073398,0.017702,0.103462,0.037883,0.066695,0.047054,0.024619,0.038491
10,0.003017,0.013994,0.067001,0.026747,0.004148,0.022966,0.002162,0.078906,-0.000848
33,0.011571,0.032340,0.005952,0.020835,0.015322,0.014273,0.004161,0.058635,0.053112
34,0.007090,0.008848,0.003116,0.022349,0.008800,0.018270,0.004755,0.021560,0.006599
49,0.012034,0.048089,0.008929,0.016234,0.028484,0.040981,0.030089,0.036404,-0.000265
...,...,...,...,...,...,...,...,...,...
1191,0.024191,0.008404,0.002502,0.029154,0.005350,0.007142,0.015860,0.053905,0.030582
1200,0.007641,0.046143,0.007442,0.013446,0.036624,0.004121,0.015056,0.032881,0.037634
1201,0.000000,0.217609,0.000000,0.000000,0.217609,0.000000,0.217609,0.273940,0.273940
1216,0.003478,0.011383,0.006695,0.008252,0.008464,0.006491,0.007331,0.018148,0.011404


In [22]:
# EMO_COMPANIES.to_csv('Company_emotions.csv') # index = company_id

In [16]:
EMO_COMPANIES.sort_values('anger', ascending=False)

Unnamed: 0_level_0,anger,anticipation,disgust,fear,joy,sadness,surprise,trust,polarity
company_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
858,0.402851,0.000000,0.402851,0.402851,0.000000,0.402851,0.000000,0.000000,-0.402851
187,0.283326,0.056735,0.000000,0.000000,0.000000,0.014771,0.000000,0.068082,-0.230015
392,0.283162,0.302301,0.000000,0.000000,0.000000,0.000000,0.016495,0.020585,-0.259932
902,0.216675,0.058520,0.062502,0.216675,0.000000,0.062502,0.000000,0.025371,-0.132785
503,0.196754,0.023183,0.000000,0.000000,0.015270,0.000000,0.002760,0.014855,-0.146206
...,...,...,...,...,...,...,...,...,...
933,0.000000,0.015135,0.000000,0.014964,0.030796,0.011749,0.000000,0.076276,0.064698
952,0.000000,0.006855,0.008013,0.002407,0.000000,0.002407,0.002453,0.030678,0.032673
991,0.000000,0.030861,0.013353,0.020231,0.018470,0.020231,0.019564,0.091385,0.094874
992,0.000000,0.019776,0.002303,0.004385,0.023035,0.004620,0.013207,0.024119,0.029337


In [17]:
EMO_COMPANIES.sort_values('joy', ascending=False)

Unnamed: 0_level_0,anger,anticipation,disgust,fear,joy,sadness,surprise,trust,polarity
company_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
221,0.000000,0.000000,0.000000,0.299818,0.299818,0.000000,0.000000,0.299818,0.299818
1201,0.000000,0.217609,0.000000,0.000000,0.217609,0.000000,0.217609,0.273940,0.273940
856,0.037934,0.169647,0.037934,0.077718,0.169647,0.000000,0.066966,0.066966,0.131713
704,0.040728,0.126529,0.000000,0.009388,0.157780,0.032687,0.064027,0.071979,0.148392
66,0.045335,0.053820,0.000000,0.116439,0.109376,0.116439,0.000000,0.116088,0.008134
...,...,...,...,...,...,...,...,...,...
218,0.130565,0.005569,0.000000,0.000000,0.000000,0.000000,0.000000,0.131943,0.006947
217,0.023446,0.010954,0.023446,0.023446,0.000000,0.023446,0.067438,0.108486,0.082111
1022,0.000000,0.063781,0.000000,0.000000,0.000000,0.000000,0.063781,0.208340,0.208340
902,0.216675,0.058520,0.062502,0.216675,0.000000,0.062502,0.000000,0.025371,-0.132785


In [18]:
EMO_COMPANIES.sort_values('trust', ascending=False)

Unnamed: 0_level_0,anger,anticipation,disgust,fear,joy,sadness,surprise,trust,polarity
company_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
221,0.000000,0.000000,0.000000,0.299818,0.299818,0.000000,0.000000,0.299818,0.299818
793,0.040452,0.088650,0.000000,0.040452,0.077424,0.040452,0.040452,0.299256,0.291955
1201,0.000000,0.217609,0.000000,0.000000,0.217609,0.000000,0.217609,0.273940,0.273940
857,0.072813,0.072813,0.039941,0.072813,0.072813,0.000000,0.000000,0.240727,0.200786
1022,0.000000,0.063781,0.000000,0.000000,0.000000,0.000000,0.063781,0.208340,0.208340
...,...,...,...,...,...,...,...,...,...
896,0.000000,0.009417,0.000000,0.000000,0.008798,0.000000,0.005208,0.007821,0.010319
948,0.007285,0.028869,0.021782,0.007891,0.001402,0.028115,0.022998,0.005361,0.023676
1055,0.000000,0.042621,0.000000,0.000000,0.049886,0.019416,0.048920,0.000000,0.102595
211,0.075868,0.019890,0.075868,0.000000,0.000000,0.000000,0.000000,0.000000,-0.055977
