In [2]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('data/Grades.csv')

In [15]:
# take all rows of the first 12 columns

df = df.iloc[:, :12]
df.columns = ['Song', 'Album', 'Happy/Sad', 'Relationship', 'Feelings of self', 'Glass half full', 'Stages', 'Tempo', 'Seriousness', 'Future prospects', 'Feelings of male', 'Togetherness']




In [18]:
df = df.dropna(axis=0).reset_index(drop=True)




In [20]:
df.to_csv('data/Grades_cleaned.csv', index=False)

In [54]:
file_name = [
    'anger.txt',
    'fear.txt',
    'joy.txt',
    'sadness.txt'
]

# Training data

In [63]:
dfs = []

path = 'training'

for file in file_name:
    df = pd.read_csv(f'data/emoint_2017/{path}/{file}', sep='\t', header=None)
    df.columns = ['id', 'text', 'emotion', 'intensity']
    
    dfs.append(df)

training_data = pd.concat(dfs, ignore_index=True)


In [81]:
pivoted_training = pd.pivot_table(training_data, index='text', values='intensity', columns='emotion')

In [87]:
pivoted_training = pivoted_training.fillna(0)

In [89]:
pivoted_training

emotion,anger,fear,joy,sadness
text,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
I can't guess if you holding a grudge against the best',0.425,0.000,0.000,0.000
"# ISIS REFERENCES SCRUBBED? Federal complaint against suspect in NYC, NJ bombings appears to omit terror names in bloody journ... #news",0.000,0.729,0.000,0.000
#Aleppo city is burning. The head of #terrorism #Assad regime &amp; #Russia are bombarding the city right now with #whitephosphrus #bombs !,0.000,0.729,0.000,0.000
#Always #borrow #money from a #pessimist. He won't #expect it #back.,0.000,0.000,0.000,0.354
"#America finding #gratitude amidst the sadness and frustration about race, #fear, anger and #racism, i remain hopeful _ i'm an earth fixer'",0.000,0.583,0.000,0.000
...,...,...,...,...
ï˜« ughh I just want all this to be over.. it's like a nightmare! can we all just get along?,0.000,0.797,0.000,0.000
ðŸ˜± @cailamarsai you've had me ðŸ˜‚ ðŸ˜‚ the whole time watching @black_ishABC after you've lost your #glasses! It was #hilarious! @mrbabyboogaloo,0.000,0.000,0.900,0.000
ðŸ˜³The intensity that @sydneyswans play at is extraordinary #relentless #AFLCatsSwans #AFLFinals ðŸ‰ðŸ‘ðŸ¿,0.292,0.000,0.000,0.000
ðŸ’¥âš–ï¸Yeahâ€¼ï¸ PAULâ€¼ï¸âš–ï¸ðŸ’¥ #glorious #BB18,0.000,0.000,0.917,0.000


# Development data

In [90]:

dfs = []

path = 'development'

for file in file_name:
    df = pd.read_csv(f'data/emoint_2017/{path}/{file}', sep='\t', header=None)
    df.columns = ['id', 'text', 'emotion', 'intensity']
    
    dfs.append(df)

dev_data = pd.concat(dfs, ignore_index=True)

In [91]:
pivoted_dev = pd.pivot_table(dev_data, index='text', values='intensity', columns='emotion')

pivoted_dev = pivoted_dev.fillna(0)

# Test data

In [93]:

dfs = []

path = 'test'

for file in file_name:
    df = pd.read_csv(f'data/emoint_2017/{path}/{file}', sep='\t', header=None)
    df.columns = ['id', 'text', 'emotion', 'intensity']
    
    dfs.append(df)

test_data = pd.concat(dfs, ignore_index=True)

In [95]:
pivoted_test = pd.pivot_table(test_data, index='text', values='intensity', columns='emotion')
pivoted_test = pivoted_test.fillna(0)

# Save data

In [100]:
pivoted_training.to_csv('data/emoint_2017/emoint_training.csv')
pivoted_dev.to_csv('data/emoint_2017/emoint_dev.csv')
pivoted_test.to_csv('data/emoint_2017/emoint_test.csv')

# Emotion NLP

In [9]:
df = pd.read_csv('data/emotion_nlp/val.txt', sep=';', header=None)
df.columns = ['text', 'emotion']

In [15]:
df = df.groupby(['text', 'emotion']).size().unstack(fill_value=0)
df.to_csv('data/cleaned_data/emotion_nlp_val.csv')



In [18]:
df = pd.read_csv('data/emotion_nlp/test.txt', sep=';', header=None)
df.columns = ['text', 'emotion']
df = df.groupby(['text', 'emotion']).size().unstack(fill_value=0)
df.to_csv('data/cleaned_data/emotion_nlp_test.csv')

In [19]:
df = pd.read_csv('data/emotion_nlp/train.txt', sep=';', header=None)
df.columns = ['text', 'emotion']
df = df.groupby(['text', 'emotion']).size().unstack(fill_value=0)
df.to_csv('data/cleaned_data/emotion_nlp_train.csv')

# KAGGLE EMOTION

In [24]:
df = pd.read_csv('data/kaggle_emotion.csv', index_col=0)

In [31]:
df['label'].replace({0:'sadness', 1:'joy', 2:'love', 3:'anger', 4:'fear', 5:'surprise'}, inplace=True)
df = df.groupby(['text', 'label']).size().unstack(fill_value=0)

df.to_csv('data/cleaned_data/kaggle_emotion.csv')



# SAMPLE 

In [8]:
df = pd.read_csv('data/cleaned_data/emoint_test.csv', index_col=0)

In [40]:
df.sample(300, replace=False).to_csv('data/cleaned_data/emoint_test_sample.csv')

In [9]:
df.sample(1000, replace=False).to_csv('data/cleaned_data/emoint_test_1000_sample.csv')

In [10]:
df.sample(3000, replace=False).to_csv('data/cleaned_data/emoint_test_3000_sample.csv')

In [3]:
df = pd.read_csv('data/cleaned_data/kaggle_emotion.csv', index_col=0)


In [43]:
df.sample(300, replace=False).to_csv('data/cleaned_data/kaggle_emotion_sample.csv')

In [65]:
df.sample(1000, replace=False).to_csv('data/cleaned_data/kaggle_emotion_1000_sample.csv')

In [4]:
df.sample(3000, replace=False).to_csv('data/cleaned_data/kaggle_emotion_3000_sample.csv')

# FEW SHOTS

In [44]:
df = pd.read_csv('data/cleaned_data/emoint_test_sample.csv', index_col=0)

In [49]:
sampled_df = df.sample(3)


In [61]:
'['+', '.join(sampled_df.iloc[0].astype(str).values)+']'

'[0.0, 0.0, 0.562, 0.0]'