In [None]:
import pandas as pd
import pickle
from nltk.tokenize import word_tokenize
import datetime as dt
import plotly.graph_objects as go
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
import matplotlib.pyplot as plt

In [None]:
## Import csv twitts in Spanish
mx_twitts=pd.read_csv('data/clean/mx_twitts.csv')

In [None]:
mx_twitts.head(6)

## Load Sentiment Model Trained and Tested

In [None]:
# To load:
f = open('sentiment_classifier.pickle', 'rb')
classifier = pickle.load(f)
f.close()

## Run model to classify twitts

In [None]:
pd.options.mode.chained_assignment = None  # default='warn'

for index, row in mx_twitts.iterrows():
    
    custom_tokens = word_tokenize(row['text'])    
    
    mx_twitts.at[index,'classified']=classifier.classify(dict([token, True] for token in custom_tokens))    

In [None]:
mx_twitts.head(5)

In [None]:
# Imporing the necessary columns to plot
mx_twitts_plot = pd.DataFrame()

mx_twitts_plot["created_at"]=mx_twitts.created_at

mx_twitts_plot.loc[mx_twitts['classified'].str.contains('Positive'), 'pos'] = 'yes'
mx_twitts_plot.loc[mx_twitts['classified'].str.contains('Negative'), 'neg'] = 'yes'

In [None]:
mx_twitts_plot.head(5)

In [None]:
#pd.options.mode.chained_assignment = None  # default='warn'

# Coverting all date values in proper Datetime format
for i in range(len(mx_twitts_plot.created_at)):
     mx_twitts_plot.created_at[i] = dt.datetime.strptime(mx_twitts_plot.created_at[i],'%Y-%m-%dT%H:%M:%SZ').date()

In [None]:
# Add new columns to get positive/negavite counts per day
mx_twitts_plot['count_pos']  = mx_twitts_plot.groupby('created_at')['pos'].transform('count')
mx_twitts_plot['count_neg'] = mx_twitts_plot.groupby('created_at')['neg'].transform('count')

mx_twitts_plot.drop(['pos', 'neg'],axis=1,inplace=True)
mx_twitts_plot.drop_duplicates(inplace=True)

In [None]:
mx_twitts_plot.head(5)

## Plotting twitts Classified

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=mx_twitts_plot['created_at'], y=mx_twitts_plot['count_pos'], name="Positive",
                         line_color='deepskyblue'))

fig.add_trace(go.Scatter(x=mx_twitts_plot['created_at'], y=mx_twitts_plot['count_neg'], name="Negative",
                         line_color='dimgray'))

fig.update_layout(title_text='MX Twitts Positive/Negative Per Day',
                  xaxis_rangeslider_visible=True)
fig.show()

## Word Cloud By Twitts Classified

In [None]:
pos_twitts=mx_twitts.query('classified=="Positive"')
neg_twitts=mx_twitts.query('classified=="Negative"')

In [None]:
#text = df.description[0]
text = (" ").join(pos_twitts.clean_text.tolist())

# Create and generate a word cloud image:
wordcloud = WordCloud().generate(text)

# Display the generated image:
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()

In [None]:
#text = df.description[0]
text = (" ").join(neg_twitts.clean_text.tolist())

# Create and generate a word cloud image:
wordcloud = WordCloud().generate(text)

# Display the generated image:
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()