In [1]:
import nltk
import sklearn
import pandas as pd

from functions import *
from nltk.stem.porter import *
from tokenizer import tokenizer
from nltk.corpus import stopwords
from sklearn.linear_model import *
from sklearn.cluster import KMeans
from sklearn.svm import LinearSVC, SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# 1. Twitter sentiment prediction - with train dataset
In this first part, we are going to use the train dataset in order to predict whether the tweets in the test dataset reflect a positive or a negative sentiment. To achieve this goal, we use `TfidfVectorizer` to preprocess the data and  a tuned `linearSVC` model for the predictions. Note that the optimal parameters of the model are found using grid search on the small dataset for time constraints.
In this section we have access to a train dataset where the sentiment reflected by the tweets are already labelized as positive or negative.

### 1.1 Preprocess the data
We start the preprocessing of the data by importing the datasets. We then create a dataframe containing all train tweets with the sentiment prediction associated, which is 0 for negative sentiment and 1 for positive sentiment. Then we created a TfidfVectorizer with tuned toknizer, stopwords, ngram_range and df_min. We applied the vectorizer to the tweets and splitted the vectorized train tweets into a train set and a test set, in order to compute accuracy localy.

In [2]:
# Import the small dataset
tweet_pos, tweet_neg, tweet_test = import_data(full = False)

# Construct train and test set
tweet_TR = construct_train_set(tweet_pos, tweet_neg)
tweet_TE = construct_test_set(tweet_test)

# Import stopwords
stopw = pd.read_csv('data/twitter-stopwords.txt').values.flatten().tolist()

In [3]:
# Print the train dataframe
tweet_TR.head()

Unnamed: 0,tweet,pred
0,vinco tresorpack 6 ( difficulty 10 of 10 objec...,0
1,glad i dot have taks tomorrow ! ! #thankful #s...,0
2,1-3 vs celtics in the regular season = were fu...,0
3,<user> i could actually kill that girl i'm so ...,0
4,<user> <user> <user> i find that very hard to ...,0


In [4]:
# Print the test dataframe
pd.DataFrame(tweet_TE, columns=['tweet']).head()

Unnamed: 0,tweet
0,sea doo pro sea scooter ( sports with the port...
1,<user> shucks well i work all week so now i ca...
2,i cant stay away from bug thats my baby
3,<user> no ma'am ! ! ! lol im perfectly fine an...
4,"whenever i fall asleep watching the tv , i alw..."


In [5]:
# Create TfidfVectorizer
vectorizer = TfidfVectorizer(analyzer='word', stop_words=stopw, tokenizer=tokenize, ngram_range=(1,4), min_df=10)

# Apply TfidfVectorizer to the small train set
X = vectorizer.fit_transform(tweet_TR.values[:, 0])
Y = tweet_TR.values[:, 1].astype(int)

In [6]:
# Split the small train set for local accuracy computation
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size = 0.2)

### 1.2. Define the standard classifiers
We define a LinearSVC classifier and computed the accuracy with the classifier's standard parameters on the small dataset.

In [7]:
# Define a standard LinearSVC classifier
clf = LinearSVC(random_state=42)
clf.fit(x_train, y_train)

# Compute the predicitions of x_test
y_pred = clf.predict(x_test)

In [8]:
# Compute the standard classifier's accuracy (LinearSVC)
print('Dataset: Small\nClassifier: LinearSVC\nParameters: Standard\nAccuracy: {:2.2%}'.format(accuracy_score(y_pred, y_test)))

Dataset: Small
Classifier: LinearSVC
Parameters: Standard
Accuracy: 84.04%


### 1.3. Find out the best parameters using the small dataset and Grid Search
We perform a grid search using 5-fold crossvalidation in order to find the parameters that optimize the classifier. Note that we only choose 3 parameters to test for each parameters, in order to keep the computation runable on our computers.

In [9]:
# Define the list of parameters to test
losses = ['hinge', 'squared_hinge']
tols = [1e-5, 1e-4, 1e-3]
Cs = [0.1, 1, 10]

# Create the parameter grid
param_grid = {'loss': losses, 'tol': tols, 'C': Cs}

# Find the best parameters
best_parameters = param_selection(x_train, y_train, 5, param_grid, clf)



In [10]:
# Apply the best parameters
loss_opt = best_parameters['loss']
tol_opt = best_parameters['tol']
C_opt = best_parameters['C']

In [11]:
# Print best parameters
print('Best parameters:\nloss: {}\ntol: {}\nC: {}'.format(loss_opt, tol_opt, C_opt))                       

Best parameters:
loss: hinge
tol: 1e-05
C: 1


We construct a new LinearSVC classifier with the optimal parameters obtained with the grid search. After that we compute the new accuracy of the optimal classifier on the small dataset.

In [12]:
# Create a new LinearSVC classifier with the optimal parameters
clf_optimal = LinearSVC(C=C_opt, tol=tol_opt, loss=loss_opt, random_state=42)
model_optimal = clf_optimal.fit(x_train, y_train)

# Compute the predictions of x_test
y_pred_optimal = model_optimal.predict(x_test)



In [13]:
# Compute the optimal classifier's accuracy (LinearSVC)
print('Dataset: Small\nClassifier: LinearSVC\nParameters: Optimal\nAccuracy: {:2.2%}'.format(accuracy_score(y_pred_optimal, y_test)))

Dataset: Small
Classifier: LinearSVC
Parameters: Optimal
Accuracy: 84.67%


### 1.4. Get the predictions using optimal classifier on full dataset

We load the full dataset, create the tweets' vector using the same TfidfVectorizer as above, and run the optimal LinearSVC classifier on the full dataset.

In [14]:
# Import the full dataset
tweet_pos_full, tweet_neg_full, tweet_test_full = import_data(full = True)

# Construct train and test set
tweet_TR_full = construct_train_set(tweet_pos_full, tweet_neg_full)
tweet_TE_full = construct_test_set(tweet_test_full)

In [15]:
# Apply TfidfVectorizer to the full train set
X_full = vectorizer.fit_transform(tweet_TR_full.values[:, 0])
Y_full = tweet_TR_full.values[:, 1].astype(int)

In [16]:
# Split the full train set for local accuracy computation
x_train_full, x_test_full, y_train_full, y_test_full = train_test_split(X_full, Y_full, test_size = 0.2)

In [17]:
# Apply the optimal classifier to the full dataset
model_optimal_full = clf_optimal.fit(x_train_full, y_train_full)

# Compute the predictions of x_test
y_pred_full = model_optimal_full.predict(x_test_full)



In [18]:
# Compute the optimal classifier's accuracy (LinearSVC)
print('Dataset: Full\nClassifier: LinearSVC\nParameters: Optimal\nAccuracy: {:2.2%}'.format(accuracy_score(y_pred_full, y_test_full)))

Dataset: Full
Classifier: LinearSVC
Parameters: Optimal
Accuracy: 87.37%


### 1.5. Output the final predicitons

We output the final predictions on the full dataset.

In [19]:
# Apply TfidfVectorizer to the test set
X_TE = vectorizer.transform(tweet_TE)

# Apply the optimal classifier to the test set
y_pred_TE = model_optimal.predict(X_TE)

# 2. Twitter sentiment prediction - without train dataset
In the second part, the goal to achieve is the same as the first one, but this time we won't have acces to a train dataset. For this task we are going to combine the small train dataset and the small test dataset to use their tweets as data.

### 2.1 Preprocess the data
Here we will not use any vectorizer, so we clean the tweets by removing all special characters, punctuations and numbers.

In [20]:
# We concatenate both datasets
all_tweets = pd.concat([tweet_TR, pd.DataFrame(tweet_TE, columns=['tweet'])], sort=False).drop(['pred'], axis=1)
# We remove the special characters
all_tweets['tweet'] = all_tweets['tweet'].str.replace("[^a-zA-Z#]", " ")
all_tweets = all_tweets.reset_index().drop(['index'], axis=1)
all_tweets.drop_duplicates(inplace=True)
all_tweets.head()

Unnamed: 0,tweet
0,vinco tresorpack difficulty of objec...
1,glad i dot have taks tomorrow #thankful #s...
2,vs celtics in the regular season were fu...
3,user i could actually kill that girl i m so ...
4,user user user i find that very hard to ...


### 2.2 Sentiment prediction
In this part we are going to use `SentimentIntensityAnalyzer` from the `vaderSentiment` library with its `polarity_scores` function. This function attributes a negative, a positive, a neutral and a compound score to each of the tweet.We will then use the compound score to determine whether the tweet reflect a positive, negative or neutral sentiment.

In [21]:
sent_analyzer = SentimentIntensityAnalyzer()
all_tweets['sentiment'] = all_tweets['tweet'].apply(lambda tweet: sent_analyzer.polarity_scores(tweet))
all_tweets['comp'] = all_tweets['sentiment'].apply(lambda sent: sent['compound'])
all_tweets['pred'] = all_tweets['comp'].apply(lambda score: get_prediction_from_score(score))
all_tweets.head()

Unnamed: 0,tweet,sentiment,comp,pred
0,vinco tresorpack difficulty of objec...,"{'neg': 0.12, 'neu': 0.649, 'pos': 0.231, 'com...",0.4981,Positive
1,glad i dot have taks tomorrow #thankful #s...,"{'neg': 0.0, 'neu': 0.667, 'pos': 0.333, 'comp...",0.4588,Positive
2,vs celtics in the regular season were fu...,"{'neg': 0.222, 'neu': 0.657, 'pos': 0.121, 'co...",-0.4588,Negative
3,user i could actually kill that girl i m so ...,"{'neg': 0.518, 'neu': 0.482, 'pos': 0.0, 'comp...",-0.7537,Negative
4,user user user i find that very hard to ...,"{'neg': 0.145, 'neu': 0.855, 'pos': 0.0, 'comp...",-0.1761,Negative


# 3. Twitter emotion extraction
Whereas in the first two parts the goal was to predict whether the tweet reflect a positive sentiment or a negative sentiment, in this part we are going to group the tweets together into 5 clusters. By manualy investigate this 5 clusters, we will try to extract the 5 dominant emotions, 2 positive, 2 negative and 1 neutral. Note that in this part, our dataset will also consist of the small train dataset combined with the small test dataset, since we are again in a unsupervised learning situation.

### 3.1 Preprocess the data
In this part we are going to reuse the dataset created in point 2.2. We will split the column `sentiment` in order to have one column per value.

In [22]:
# We separate the value in the dictionary in the column sentiment
all_tweets['neg'] = all_tweets['sentiment'].apply(lambda sent: sent['neg'])
all_tweets['neu'] = all_tweets['sentiment'].apply(lambda sent:  sent['neu'])
all_tweets['pos'] = all_tweets['sentiment'].apply(lambda sent: sent['pos'])
all_tweets_kmeans = all_tweets.drop(['pred', 'sentiment'], axis=1)
all_tweets_kmeans.head()

Unnamed: 0,tweet,comp,neg,neu,pos
0,vinco tresorpack difficulty of objec...,0.4981,0.12,0.649,0.231
1,glad i dot have taks tomorrow #thankful #s...,0.4588,0.0,0.667,0.333
2,vs celtics in the regular season were fu...,-0.4588,0.222,0.657,0.121
3,user i could actually kill that girl i m so ...,-0.7537,0.518,0.482,0.0
4,user user user i find that very hard to ...,-0.1761,0.145,0.855,0.0


### 3.2 Clustering using Kmeans
Here we group the tweets into 5 clusters using Kmeans applied to the compound score found in the part 2.2 above.

In [23]:
# Create the kmeans with 5 clusters
num_clusters = 5
# Use random_state to ensure to always have the same labeling of clusters
random_state = 42
km = KMeans(n_clusters=num_clusters)
# Apply the kmean to the vectorized tweets
km.fit(all_tweets[['neg', 'neu', 'pos', 'comp']].values)
clusters = km.labels_.tolist()
# And we create a new columns for the cluster number
all_tweets['cluster'] = clusters
all_tweets.head()

Unnamed: 0,tweet,sentiment,comp,pred,neg,neu,pos,cluster
0,vinco tresorpack difficulty of objec...,"{'neg': 0.12, 'neu': 0.649, 'pos': 0.231, 'com...",0.4981,Positive,0.12,0.649,0.231,2
1,glad i dot have taks tomorrow #thankful #s...,"{'neg': 0.0, 'neu': 0.667, 'pos': 0.333, 'comp...",0.4588,Positive,0.0,0.667,0.333,2
2,vs celtics in the regular season were fu...,"{'neg': 0.222, 'neu': 0.657, 'pos': 0.121, 'co...",-0.4588,Negative,0.222,0.657,0.121,4
3,user i could actually kill that girl i m so ...,"{'neg': 0.518, 'neu': 0.482, 'pos': 0.0, 'comp...",-0.7537,Negative,0.518,0.482,0.0,3
4,user user user i find that very hard to ...,"{'neg': 0.145, 'neu': 0.855, 'pos': 0.0, 'comp...",-0.1761,Negative,0.145,0.855,0.0,4


### 3.3 Manual investigation to detect sentiment
In this last part we are going to check manualy in the clusters to try to find the 5 dominant emotions and label them.

In [24]:
# Cluster 0
all_tweets[all_tweets['cluster'] == 0].head(10)

Unnamed: 0,tweet,sentiment,comp,pred,neg,neu,pos,cluster
8,why is she so perfect url,"{'neg': 0.0, 'neu': 0.513, 'pos': 0.487, 'comp...",0.6948,Positive,0.0,0.513,0.487,0
18,layers of the heart paperback this journey w...,"{'neg': 0.0, 'neu': 0.75, 'pos': 0.25, 'compou...",0.7184,Positive,0.0,0.75,0.25,0
50,user hahahah ok ok sorry do you like the nam...,"{'neg': 0.068, 'neu': 0.368, 'pos': 0.563, 'co...",0.8555,Positive,0.068,0.368,0.563,0
70,user thanks for trying veevs but he won t call,"{'neg': 0.0, 'neu': 0.5, 'pos': 0.5, 'compound...",0.7906,Positive,0.0,0.5,0.5,0
80,newpics #nsfw #sexy hot blonde ready for the g...,"{'neg': 0.0, 'neu': 0.599, 'pos': 0.401, 'comp...",0.7717,Positive,0.0,0.599,0.401,0
100,dvd learn to play irish fiddle # dvd kevin...,"{'neg': 0.0, 'neu': 0.591, 'pos': 0.409, 'comp...",0.8271,Positive,0.0,0.591,0.409,0
111,but i was nice all day,"{'neg': 0.0, 'neu': 0.519, 'pos': 0.481, 'comp...",0.5719,Positive,0.0,0.519,0.481,0
112,stronghold legends dvd rom relive of the g...,"{'neg': 0.0, 'neu': 0.711, 'pos': 0.289, 'comp...",0.6908,Positive,0.0,0.711,0.289,0
114,i wish i could play an instrument,"{'neg': 0.0, 'neu': 0.37, 'pos': 0.63, 'compou...",0.6249,Positive,0.0,0.37,0.63,0
116,user i know thanks #teamlilrowdy,"{'neg': 0.0, 'neu': 0.508, 'pos': 0.492, 'comp...",0.4404,Positive,0.0,0.508,0.492,0


The cluster 0 is highly positive.

In [25]:
# Cluster 1
all_tweets[all_tweets['cluster'] == 1].head(10)

Unnamed: 0,tweet,sentiment,comp,pred,neg,neu,pos,cluster
6,user i got kicked out the wgm,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0,Neutral,0.0,1.0,0.0,1
10,introduction to programming with c nd e...,"{'neg': 0.0, 'neu': 0.896, 'pos': 0.104, 'comp...",0.1901,Positive,0.0,0.896,0.104,1
13,user i m white #aw,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0,Neutral,0.0,1.0,0.0,1
17,the post boom in spanish american fiction su...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0,Neutral,0.0,1.0,0.0,1
20,guess who texted me again and wants us back ...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0,Neutral,0.0,1.0,0.0,1
23,mlb the show video game mlb the show...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0,Neutral,0.0,1.0,0.0,1
24,rt if #justinbieber is not following you,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0,Neutral,0.0,1.0,0.0,1
30,trying to get user tickets for the vancouver...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0,Neutral,0.0,1.0,0.0,1
31,x custom picture frame poster frame ...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0,Neutral,0.0,1.0,0.0,1
36,bye bye beach and buddies fort lauderdale ...,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound...",0.0,Neutral,0.0,1.0,0.0,1


The cluster 1 is neutral.

In [26]:
# Cluster 2
all_tweets[all_tweets['cluster'] == 2].head(10)

Unnamed: 0,tweet,sentiment,comp,pred,neg,neu,pos,cluster
0,vinco tresorpack difficulty of objec...,"{'neg': 0.12, 'neu': 0.649, 'pos': 0.231, 'com...",0.4981,Positive,0.12,0.649,0.231,2
1,glad i dot have taks tomorrow #thankful #s...,"{'neg': 0.0, 'neu': 0.667, 'pos': 0.333, 'comp...",0.4588,Positive,0.0,0.667,0.333,2
5,wish i could be out all night tonight user,"{'neg': 0.0, 'neu': 0.722, 'pos': 0.278, 'comp...",0.4019,Positive,0.0,0.722,0.278,2
7,rt user user user yes she is u tell it...,"{'neg': 0.0, 'neu': 0.723, 'pos': 0.277, 'comp...",0.5574,Positive,0.0,0.723,0.277,2
9,user hi harry did u havea good time in aus...,"{'neg': 0.0, 'neu': 0.888, 'pos': 0.112, 'comp...",0.4404,Positive,0.0,0.888,0.112,2
14,user dan i love and miss you don t be sad ...,"{'neg': 0.296, 'neu': 0.44, 'pos': 0.264, 'com...",0.128,Positive,0.296,0.44,0.264,2
15,so many wonderful building in dc but still mis...,"{'neg': 0.142, 'neu': 0.672, 'pos': 0.186, 'co...",0.1504,Positive,0.142,0.672,0.186,2
16,user it s annoying because i secretly find i...,"{'neg': 0.201, 'neu': 0.521, 'pos': 0.278, 'co...",0.2596,Positive,0.201,0.521,0.278,2
33,user not when i m straight and i m taller th...,"{'neg': 0.132, 'neu': 0.632, 'pos': 0.237, 'co...",0.3257,Positive,0.132,0.632,0.237,2
35,i really want him to do something special and ...,"{'neg': 0.135, 'neu': 0.612, 'pos': 0.253, 'co...",0.1761,Positive,0.135,0.612,0.253,2


The cluster 2 is positive.

In [27]:
# Cluster 3
all_tweets[all_tweets['cluster'] == 3].head(10)

Unnamed: 0,tweet,sentiment,comp,pred,neg,neu,pos,cluster
3,user i could actually kill that girl i m so ...,"{'neg': 0.518, 'neu': 0.482, 'pos': 0.0, 'comp...",-0.7537,Negative,0.518,0.482,0.0,3
32,sooo i missed music class does this mean ...,"{'neg': 0.295, 'neu': 0.705, 'pos': 0.0, 'comp...",-0.7269,Negative,0.295,0.705,0.0,3
44,rivals url war room rivals david ...,"{'neg': 0.218, 'neu': 0.782, 'pos': 0.0, 'comp...",-0.5994,Negative,0.218,0.782,0.0,3
48,suffering from post festival disorder,"{'neg': 0.527, 'neu': 0.182, 'pos': 0.291, 'co...",-0.3818,Negative,0.527,0.182,0.291,3
53,omfg they did a hello kitty shoot on antm ...,"{'neg': 0.439, 'neu': 0.561, 'pos': 0.0, 'comp...",-0.7404,Negative,0.439,0.561,0.0,3
77,tht had to be one of the worst voicemails i ev...,"{'neg': 0.291, 'neu': 0.709, 'pos': 0.0, 'comp...",-0.6249,Negative,0.291,0.709,0.0,3
89,user the worst way to miss a person is to ...,"{'neg': 0.222, 'neu': 0.778, 'pos': 0.0, 'comp...",-0.6908,Negative,0.222,0.778,0.0,3
94,seriously what about jerking off a strapon r...,"{'neg': 0.251, 'neu': 0.749, 'pos': 0.0, 'comp...",-0.5719,Negative,0.251,0.749,0.0,3
102,and all the days you let me cry,"{'neg': 0.307, 'neu': 0.693, 'pos': 0.0, 'comp...",-0.4767,Negative,0.307,0.693,0.0,3
113,why am i still sick,"{'neg': 0.524, 'neu': 0.476, 'pos': 0.0, 'comp...",-0.5106,Negative,0.524,0.476,0.0,3


The Cluster 3 is highly negative.

In [28]:
# Cluster 4
all_tweets[all_tweets['cluster'] == 4].head(10)

Unnamed: 0,tweet,sentiment,comp,pred,neg,neu,pos,cluster
2,vs celtics in the regular season were fu...,"{'neg': 0.222, 'neu': 0.657, 'pos': 0.121, 'co...",-0.4588,Negative,0.222,0.657,0.121,4
4,user user user i find that very hard to ...,"{'neg': 0.145, 'neu': 0.855, 'pos': 0.0, 'comp...",-0.1761,Negative,0.145,0.855,0.0,4
21,user # farrow v a litter offence wilfu...,"{'neg': 0.216, 'neu': 0.784, 'pos': 0.0, 'comp...",-0.296,Negative,0.216,0.784,0.0,4
22,people at barca s ground tonight are gon...,"{'neg': 0.138, 'neu': 0.862, 'pos': 0.0, 'comp...",-0.4939,Negative,0.138,0.862,0.0,4
27,why should i even care she dont want my whit...,"{'neg': 0.297, 'neu': 0.502, 'pos': 0.201, 'co...",-0.1336,Negative,0.297,0.502,0.201,4
28,ugh idk why i got my hopes up my plans n...,"{'neg': 0.262, 'neu': 0.562, 'pos': 0.175, 'co...",-0.1027,Negative,0.262,0.562,0.175,4
29,user ummm playing trials hd and ignoring the...,"{'neg': 0.164, 'neu': 0.727, 'pos': 0.109, 'co...",-0.2263,Negative,0.164,0.727,0.109,4
34,user user bloodforge not worth fighting to...,"{'neg': 0.183, 'neu': 0.722, 'pos': 0.095, 'co...",-0.2396,Negative,0.183,0.722,0.095,4
46,user i m not surprised you shouldn t do that...,"{'neg': 0.192, 'neu': 0.808, 'pos': 0.0, 'comp...",-0.1695,Negative,0.192,0.808,0.0,4
54,user stab in the heart i really wanted to ...,"{'neg': 0.211, 'neu': 0.611, 'pos': 0.178, 'co...",-0.1531,Negative,0.211,0.611,0.178,4


The cluster 4 negative.

#### To summarize, we have:
- cluster 0 that is highly positive. We will call it 'love'.
- cluster 1 that is neutral. We will call it 'neutral'
- cluster 2 that is positive. We will call it 'enjoyment'.
- cluster 3 that is highly negative. We will call it 'anger'.
- cluster 4 that is negative. We will call it 'sadness'.

In [30]:
# We add the labels to the dataset
emotions = {'0': 'love', '1': 'neutral', '2': 'enjoyment', '3': 'anger', '4': 'sadness'}
all_tweets['emotion'] = all_tweets['cluster'].apply(lambda cluster: emotions[str(cluster)])
all_tweets.head()

Unnamed: 0,tweet,sentiment,comp,pred,neg,neu,pos,cluster,emotion
0,vinco tresorpack difficulty of objec...,"{'neg': 0.12, 'neu': 0.649, 'pos': 0.231, 'com...",0.4981,Positive,0.12,0.649,0.231,2,enjoyment
1,glad i dot have taks tomorrow #thankful #s...,"{'neg': 0.0, 'neu': 0.667, 'pos': 0.333, 'comp...",0.4588,Positive,0.0,0.667,0.333,2,enjoyment
2,vs celtics in the regular season were fu...,"{'neg': 0.222, 'neu': 0.657, 'pos': 0.121, 'co...",-0.4588,Negative,0.222,0.657,0.121,4,sadness
3,user i could actually kill that girl i m so ...,"{'neg': 0.518, 'neu': 0.482, 'pos': 0.0, 'comp...",-0.7537,Negative,0.518,0.482,0.0,3,anger
4,user user user i find that very hard to ...,"{'neg': 0.145, 'neu': 0.855, 'pos': 0.0, 'comp...",-0.1761,Negative,0.145,0.855,0.0,4,sadness
