<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Description" data-toc-modified-id="Description-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Description</a></span></li><li><span><a href="#Load-the-libraries" data-toc-modified-id="Load-the-libraries-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Load the libraries</a></span></li><li><span><a href="#Load-the-data" data-toc-modified-id="Load-the-data-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Load the data</a></span></li><li><span><a href="#Positive-and-Negative-Tweets" data-toc-modified-id="Positive-and-Negative-Tweets-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Positive and Negative Tweets</a></span></li><li><span><a href="#Frequency-Distribution" data-toc-modified-id="Frequency-Distribution-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Frequency Distribution</a></span></li><li><span><a href="#Word-Cloud" data-toc-modified-id="Word-Cloud-6"><span class="toc-item-num">6&nbsp;&nbsp;</span>Word Cloud</a></span></li><li><span><a href="#Treemap" data-toc-modified-id="Treemap-7"><span class="toc-item-num">7&nbsp;&nbsp;</span>Treemap</a></span></li></ul></div>

# Description
Twitter sentiment analysis.

# Load the libraries

In [13]:
import sys
sys.path.append('/Users/poudel/opt/miniconda3/envs/nlp/lib/python3.7/site-packages')

In [14]:
import numpy as np
import pandas as pd
import seaborn as sns
import sklearn
import mlxtend
import plotly_express as px

pd.options.plotting.backend = "matplotlib"
pd.set_option('max_columns',100)
pd.set_option('max_colwidth',1000)

import time,os,json,sys
time_start_notebook = time.time()
home = os.path.expanduser('~')
SEED=100

import matplotlib.pyplot as plt
plt.style.use('ggplot')
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

print([(x.__name__,x.__version__) for x in [np,pd,sns,sklearn,mlxtend,px]])


#========= NLP
import re
import string
import nltk
import spacy
import textblob
import gensim
import texthero
from urllib.parse import urlparse
from nltk.corpus import stopwords

print([(x.__name__,x.__version__) for x in [nltk,spacy,textblob,gensim]])

#=======OTHERS
import scipy
import multiprocessing as mp

[('numpy', '1.17.5'), ('pandas', '1.0.5'), ('seaborn', '0.10.1'), ('sklearn', '0.23.1'), ('mlxtend', '0.17.0'), ('plotly_express', '0.4.1')]
[('nltk', '3.4.4'), ('spacy', '2.2.3'), ('textblob', '0.15.3'), ('gensim', '3.8.3')]


# Load the data

In [15]:
df_combined = pd.read_csv('../data/processed/df_combined_clean.csv')

target = 'label'
df_train = df_combined[~df_combined[target].isnull()]
df_test = df_combined[df_combined[target].isnull()]

print(f"shape df_train: {df_train.shape}")
print(f"shape df_test: {df_test.shape}")

maincol = 'tweet'
mc = maincol + '_clean'
mcl = maincol + '_lst_clean'

df_train.head(2).append(df_train.tail(2))

shape df_train: (7920, 24)
shape df_test: (1953, 24)


Unnamed: 0,index,id,label,tweet,tweet_lst_clean,tweet_clean,hashtags_lst,hashtags,total_length,num_words,num_sent,num_unique_words,num_words_title,num_uppercase,num_exclamation_marks,num_question_marks,num_punctuation,num_symbols,num_digits,avg_word_len,avg_uppercase,avg_unique,tweet_lst_clean_emoji,tweet_clean_emoji
0,0,1,0.0,#fingerprint #Pregnancy Test https://goo.gl/h1MfQV #android #apps #beautiful #cute #health #igers #iphoneonly #iphonesia #iphone,"['fingerprint', 'pregnancy', 'test', 'android', 'aps', 'beautiful', 'cute', 'health', 'igers', 'iphoneonly', 'iphonesia', 'iphone']",fingerprint pregnancy test android aps beautiful cute health igers iphoneonly iphonesia iphone,"['#fingerprint', '#Pregnancy', '#android', '#apps', '#beautiful', '#cute', '#health', '#igers', '#iphoneonly', '#iphonesia', '#iphone']",#fingerprint #Pregnancy #android #apps #beautiful #cute #health #igers #iphoneonly #iphonesia #iphone,128,13,1,13,2,5,0,0,2,0,0,8.923077,0.039062,1.0,"['fingerprint', 'pregnancy', 'test', 'android', 'aps', 'beautiful', 'cute', 'health', 'igers', 'iphoneonly', 'iphonesia', 'iphone']",fingerprint pregnancy test android aps beautiful cute health igers iphoneonly iphonesia iphone
1,1,2,0.0,Finally a transparant silicon case ^^ Thanks to my uncle :) #yay #Sony #Xperia #S #sonyexperias… http://instagram.com/p/YGEt5JC6JM/,"['finaly', 'transparant', 'silicon', 'case', 'thanks', 'uncle', 'yay', 'sony', 'xperia', 'sonyexperias']",finaly transparant silicon case thanks uncle yay sony xperia sonyexperias,"['#yay', '#Sony', '#Xperia', '#S', '#sonyexperias…']",#yay #Sony #Xperia #S #sonyexperias…,131,17,1,17,5,12,0,0,3,0,0,6.764706,0.091603,1.0,"['finaly', 'transparant', 'silicon', 'case', 'thanks', 'uncle', 'hapyfaceorsmiley', 'yay', 'sony', 'xperia', 'sonyexperias']",finaly transparant silicon case thanks uncle hapyfaceorsmiley yay sony xperia sonyexperias
7918,7918,7919,0.0,"Finally got my #smart #pocket #wifi stay connected anytime,anywhere! #ipad and #samsung #s3 #gadget # http://instagr.am/p/U-53G_vJU8/","['finaly', 'got', 'smart', 'pocket', 'wifi', 'stay', 'conected', 'anytimeanywhere', 'ipad', 'samsung', 'gadget']",finaly got smart pocket wifi stay conected anytimeanywhere ipad samsung gadget,"['#smart', '#pocket', '#wifi', '#ipad', '#samsung', '#s3', '#gadget', '#']",#smart #pocket #wifi #ipad #samsung #s3 #gadget #,133,16,1,16,1,5,1,0,3,0,0,7.375,0.037594,1.0,"['finaly', 'got', 'smart', 'pocket', 'wifi', 'stay', 'conected', 'anytimeanywhere', 'ipad', 'samsung', 'gadget']",finaly got smart pocket wifi stay conected anytimeanywhere ipad samsung gadget
7919,7919,7920,0.0,Apple Barcelona!!! #Apple #Store #BCN #Barcelona #travel #iphone #selfie #fly #fun #cabincrew… http://instagram.com/p/wBApVzpCl3/,"['aple', 'barcelona', 'aple', 'store', 'bcn', 'barcelona', 'travel', 'iphone', 'selfie', 'fly', 'fun', 'cabincrew']",aple barcelona aple store bcn barcelona travel iphone selfie fly fun cabincrew,"['#Apple', '#Store', '#BCN', '#Barcelona', '#travel', '#iphone', '#selfie', '#fly', '#fun', '#cabincrew…']",#Apple #Store #BCN #Barcelona #travel #iphone #selfie #fly #fun #cabincrew…,129,13,1,13,5,12,3,0,2,0,0,9.0,0.093023,1.0,"['aple', 'barcelona', 'aple', 'store', 'bcn', 'barcelona', 'travel', 'iphone', 'selfie', 'fly', 'fun', 'cabincrew']",aple barcelona aple store bcn barcelona travel iphone selfie fly fun cabincrew


# Positive and Negative Tweets

# Frequency Distribution

# Word Cloud

# Treemap