In [1]:
import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy.ext.automap import automap_base
from sqlalchemy import Table, Column, Integer, String, Float, DateTime, MetaData

In [2]:
engine = create_engine("sqlite:///../../data/data.sqlite", echo=False)
conn = engine.connect()

Base = automap_base()
Base.prepare(engine, reflect=True)
meta = MetaData()

In [3]:
tweets_text_data = pd.read_sql('SELECT * FROM TWEETS', conn)
tweets_text_data.head()

Unnamed: 0,source,text,created_at,retweet_count,favorite_count,id_str
0,Twitter for Android,"""@flicka__: @realDonaldTrump for president""",2015-01-01 06:54:59.000000,18,58,550545703932796928
1,Twitter for Android,The Mar-a-Lago Club was amazing tonight. Every...,2015-01-01 07:02:39.000000,27,77,550547634218614784
2,Twitter for Android,"""@archangeljf12: ;,@realDonaldTrump for Presi...",2015-01-01 07:04:33.000000,24,56,550548111161294848
3,Twitter for Android,"""@TalentlessCook: @realDonaldTrump You're only...",2015-01-01 07:04:45.000000,17,56,550548164177309696
4,Twitter for Android,"""@yankeejayman: @realDonaldTrump @flicka__ Do ...",2015-01-01 07:05:27.000000,11,38,550548339708940288


In [4]:
import pickle
filename = 'finalized_model.sav'


In [5]:
from sklearn.feature_extraction.text import CountVectorizer
from nltk.tokenize import RegexpTokenizer
#tokenizer to remove unwanted elements from out data like symbols and numbers
token = RegexpTokenizer(r'[a-zA-Z0-9]+')
cv = CountVectorizer(lowercase=True,stop_words='english',ngram_range = (1,1),tokenizer = token.tokenize,max_features=30495)
text_counts= cv.fit_transform(tweets_text_data['text'])
text_counts

<23645x30495 sparse matrix of type '<class 'numpy.int64'>'
	with 299049 stored elements in Compressed Sparse Row format>

In [6]:
from sklearn.feature_extraction.text import TfidfVectorizer
tf=TfidfVectorizer()
text_tf= tf.fit_transform(tweets_text_data['text'])
text_tf

<23645x30845 sparse matrix of type '<class 'numpy.float64'>'
	with 472198 stored elements in Compressed Sparse Row format>

In [7]:
loaded_model = pickle.load(open(filename, 'rb'))
result = loaded_model.predict(text_counts)
print(len(result))

23645


In [8]:
len(tweets_text_data)

23645

In [9]:
tweets_text_data['sentiment']=result
tweets_text_data['sentiment']= tweets_text_data['sentiment'].replace(0,"Negative").replace(4,"Positive")

In [10]:
pd.set_option('display.max_colwidth', -1)
tweets_text_data.head(20)

Unnamed: 0,source,text,created_at,retweet_count,favorite_count,id_str,sentiment
0,Twitter for Android,"""@flicka__: @realDonaldTrump for president""",2015-01-01 06:54:59.000000,18,58,550545703932796928,Negative
1,Twitter for Android,"The Mar-a-Lago Club was amazing tonight. Everybody was there, the biggest and the hottest. Palm Beach is so lucky to have best club in world",2015-01-01 07:02:39.000000,27,77,550547634218614784,Negative
2,Twitter for Android,"""@archangeljf12: ;,@realDonaldTrump for President of the United States! @SenTedCruz Vice President ;A #WinningTicket2016 #TrumpCruz2016""",2015-01-01 07:04:33.000000,24,56,550548111161294848,Positive
3,Twitter for Android,"""@TalentlessCook: @realDonaldTrump You're only a year away from #2016 ...pick a running mate. Happy New Year""",2015-01-01 07:04:45.000000,17,56,550548164177309696,Negative
4,Twitter for Android,"""@yankeejayman: @realDonaldTrump @flicka__ Do u ever sleep?"" Not much!",2015-01-01 07:05:27.000000,11,38,550548339708940288,Negative
5,Twitter for Android,"""@JoshHammer77: @realDonaldTrump happy new year sir can't wait for #CelebrityApprentice""",2015-01-01 07:06:13.000000,9,30,550548532600766464,Positive
6,Twitter for Android,"""@SparkleAWin: @realDonaldTrump So excited for a New Year &amp; new Celeb Apprentice! Can't wait to see The Trumps back in The Boardroom!""",2015-01-01 07:08:39.000000,9,33,550549145027870720,Positive
7,Twitter for Android,"""@Danwolff22: @realDonaldTrump I believe in you to bring this country back to what it once was! MR. Trump for president!"" Thanks Dan!",2015-01-01 07:09:11.000000,15,42,550549278679379968,Positive
8,Twitter for Android,"""@tlowery19: @realDonaldTrump for president please! üò©""",2015-01-01 07:09:22.000000,10,54,550549323562639360,Positive
9,Twitter for Android,"""@jmcafee23: @realDonaldTrump please run for president this country needs you!""",2015-01-01 07:09:30.000000,17,58,550549358819934208,Negative


In [11]:
new_df = tweets_text_data.drop(columns=["source","text","created_at","retweet_count","favorite_count"])

In [12]:
new_df.head()

Unnamed: 0,id_str,sentiment
0,550545703932796928,Negative
1,550547634218614784,Negative
2,550548111161294848,Positive
3,550548164177309696,Negative
4,550548339708940288,Negative


In [13]:
sentiment = Table(
   'sentiment', meta, 
    Column('id_str', Integer, primary_key = True),
    Column('sentiment',String)
)

In [14]:
meta.bind = engine
meta.drop_all()
meta.create_all()

In [15]:
new_df.to_sql('sentiment', conn, if_exists='append', index=False, index_label="id_str")

In [16]:
tweets_text_data.to_csv("SentimentData.csv",index=False)