##### Instructions:

Most of the imported libraries can be just pip installed, but for fasttext, do the following:

Install Microsoft Build Tools from here: https://visualstudio.microsoft.com/visual-cpp-build-tools/

And then do the following in conda command promt window:

git clone https://github.com/facebookresearch/fastText.git
cd fastText
pip install .

Let me know if there are other issues!

In [None]:
import pandas as pd
import numpy as np
import fasttext
import matplotlib.pyplot as plt
import nltk
from string import punctuation
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import sent_tokenize
from nltk import WordPunctTokenizer

import nltk
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('stopwords')
en_stop = set(nltk.corpus.stopwords.words('english'))
from bs4 import BeautifulSoup
import re
import emoji
import csv
import itertools

%matplotlib inline
import GetOldTweets3 as got
from datetime import datetime, timedelta
import time
import tweepy
from pymongo import MongoClient
import dns

#### Load model, you will need my model file

In [None]:
model = fasttext.load_model("model_fasttext.bin")

In [None]:
def connect_mongo ():
    client = MongoClient("mongodb+srv://ee461l-blog:trapdungeon@cluster0-1mz2k.mongodb.net/test?retryWrites=true&w=majority")
    db = client['tsentimeter']
    return db

db = connect_mongo()
db

In [None]:
tweets_collection = db.tweets
tweets_collection

In [None]:
def get_prob_of_positive_sentiment_from_tweet(tweet):
    tweet = BeautifulSoup(tweet).get_text()
    tweet = tweet.replace('\n', ' ')
    tweet = tweet.replace('\x92',"'")
    tweet = ' '.join(re.sub("(@[A-Za-z0-9]+)|(#[A-Za-z0-9]+)", " ", tweet).split())
    tweet = ' '.join(re.sub("(\w+:\/\/\S+)", " ", tweet).split())
    tweet = ' '.join(re.sub("[\.\,\!\?\:\;\-\=]", " ", tweet).split())
    tweet = tweet.lower()
    
    CONTRACTIONS = load_dict_contractions()
    tweet = tweet.replace("’","'")
    words = tweet.split()
    reformed = [CONTRACTIONS[word] if word in CONTRACTIONS else word for word in words]
    tweet = " ".join(reformed)
    
    tweet = ''.join(''.join(s)[:2] for _, s in itertools.groupby(tweet))
    
    SMILEY = load_dict_smileys()  
    words = tweet.split()
    reformed = [SMILEY[word] if word in SMILEY else word for word in words]
    tweet = " ".join(reformed)
    
    tweet = emoji.demojize(tweet)
    tweet = tweet.replace(":"," ")
    tweet = ' '.join(tweet.split())
    
    
    pred = model.predict(tweet)
    label, proba = pred
    if proba[0] > 1:
        proba[0] = 1
    if proba[0] < 0:
        proba[0] = 0
    
    if label[0] == "__label__NEGATIVE":
        return (1-proba[0])
        
    else:
        return (proba[0])   




#### Specify search parameters and create all variables

In [None]:
text_query = 'Amazon' #keyword search. can also search by user with this API, but for now this is good enough
since_date = '2020-01-01' #start date of searching
until_date = '2020-02-20' #end date of searching (it will not inculde this day)
count = 500 #number of tweets per request, but the way I have written this, it is number of tweets per day of searches

#### Getting all the tweets, will take a while if over long time period

In [None]:
df_tweets = pd.DataFrame(columns = ['date', 'text'])
count_days = 0
while (since_date!=until_date):
    count_days = 0;
    while (count_days < 8): #will get tweets in 10 day batches
        count_days = count_days + 1
        next_date = (datetime.strptime(since_date, '%Y-%m-%d') + timedelta(days=1)).strftime('%Y-%m-%d')
        print (next_date)
        tweetCriteria = got.manager.TweetCriteria().setQuerySearch(text_query).setSince(since_date).setUntil(next_date).setMaxTweets(count).setTopTweets(True)

        # list that contains all tweets
        tweets = got.manager.TweetManager.getTweets(tweetCriteria)
        # list of chosen tweet data
        text_tweets = [[tweet.date, tweet.text, tweet.id, tweet.retweets, tweet.favorites] for tweet in tweets]
        df_tweets = pd.concat([df_tweets, pd.DataFrame(text_tweets)], ignore_index=True)
        
        since_date = (datetime.strptime(since_date, '%Y-%m-%d') + timedelta(days=1)).strftime('%Y-%m-%d')
        if (since_date==until_date):
            break;
    
    if(since_date!=until_date):
        print("sleep")
        time.sleep(120) #2 minute delay since twitter has a rate limit on number of requests from this API
        print("wake up")
    

In [None]:
df_tweets_f = df_tweets.drop(['date', 'text'], axis = 1)
df_tweets_f = df_tweets_f.rename(columns = {0:'date', 1:'text', 2:'id', 3:'retweets', 4:'favorites'})

#format dates from datetime to string objects
date_string = []
for row in df_tweets_f.iterrows():
    date_string.append((row[1].date).strftime('%a %b %d %H:%M:%S %z %Y'))
df_tweets_f['date_string'] = pd.DataFrame(date_string)
df_tweets_f

#### Cleaning up all the tweets and getting the labels as well as a df for evaluation of tweets

In [None]:
clean_tweets=[]
for row in df_tweets_f.iterrows():
                  
    tweet = BeautifulSoup(row[1].text).get_text()
    tweet = tweet.replace('\n', ' ')
    tweet = tweet.replace('\x92',"'")
    tweet = ' '.join(re.sub("(@[A-Za-z0-9]+)|(#[A-Za-z0-9]+)", " ", tweet).split())
    tweet = ' '.join(re.sub("(\w+:\/\/\S+)", " ", tweet).split())
    tweet = ' '.join(re.sub("[\.\,\!\?\:\;\-\=]", " ", tweet).split())
    tweet = tweet.lower()
    
    CONTRACTIONS = load_dict_contractions()
    tweet = tweet.replace("’","'")
    words = tweet.split()
    reformed = [CONTRACTIONS[word] if word in CONTRACTIONS else word for word in words]
    tweet = " ".join(reformed)
    
    tweet = ''.join(''.join(s)[:2] for _, s in itertools.groupby(tweet))
    
    SMILEY = load_dict_smileys()  
    words = tweet.split()
    reformed = [SMILEY[word] if word in SMILEY else word for word in words]
    tweet = " ".join(reformed)
    
    tweet = emoji.demojize(tweet)
    tweet = tweet.replace(":"," ")
    tweet = ' '.join(tweet.split())
    clean_tweets.append(tweet)
    
clean_tweets = pd.DataFrame(clean_tweets)
df_tweets_f['processed_tweets'] = clean_tweets #all formatted tweets are now in df



In [None]:
labels_for_df = []
proba_for_df = []

for row in df_tweets_f.iterrows():

    
    pred = model.predict(row[1].processed_tweets)
    label, proba = pred
    if proba[0] > 1:
        proba[0] = 1
    if proba[0] < 0:
        proba[0] = 0
    
    if label[0] == "__label__NEGATIVE":
        labels_for_df.append('negative')
        proba_for_df.append(1-proba[0])
        
    else:
        labels_for_df.append('positive')
        proba_for_df.append(proba[0])



df_tweets_f['sentiment'] = pd.DataFrame(labels_for_df)
df_tweets_f['pred_proba'] = pd.DataFrame(proba_for_df)

In [None]:
df_tweets_f.date_string

In [None]:
for row in df_tweets_f.iterrows():
    db_tweet = {}

    db_tweet['topic'] = text_query
    db_tweet['search_method'] = 'keyword'
    db_tweet['date'] = str(row[1].date_string)
    db_tweet['tweet_id'] = str(row[1].id)
    db_tweet['tweet'] = row[1].text
    db_tweet['retweet_count'] = row[1].retweets
    db_tweet['favorite_count'] = row[1].favorites
    db_tweet['sentiment'] = str(row[1].pred_proba)

    tweets_collection.insert_one(db_tweet)

print("Success")

In [None]:
result = tweets_collection.delete_many({"topic": "Tesla"})
result

In [None]:
query = {"topic" : "quarantine"}
replacement_data = {"topic" : "Quarantine"}

In [None]:
tweets_collection.find().forEach(function(doc) {
    doc.tweet.topic = doc.tweet.topic.replace('quarantine', 'Quarantine');
    tweets_collection.save(doc);
});

In [None]:
def load_dict_smileys():
    
    return {
        ":‑)":"smiley",
        ":-]":"smiley",
        ":-3":"smiley",
        ":->":"smiley",
        "8-)":"smiley",
        ":-}":"smiley",
        ":)":"smiley",
        ":]":"smiley",
        ":3":"smiley",
        ":>":"smiley",
        "8)":"smiley",
        ":}":"smiley",
        ":o)":"smiley",
        ":c)":"smiley",
        ":^)":"smiley",
        "=]":"smiley",
        "=)":"smiley",
        ":-))":"smiley",
        ":‑D":"smiley",
        "8‑D":"smiley",
        "x‑D":"smiley",
        "X‑D":"smiley",
        ":D":"smiley",
        "8D":"smiley",
        "xD":"smiley",
        "XD":"smiley",
        ":‑(":"sad",
        ":‑c":"sad",
        ":‑<":"sad",
        ":‑[":"sad",
        ":(":"sad",
        ":c":"sad",
        ":<":"sad",
        ":[":"sad",
        ":-||":"sad",
        ">:[":"sad",
        ":{":"sad",
        ":@":"sad",
        ">:(":"sad",
        ":'‑(":"sad",
        ":'(":"sad",
        ":‑P":"playful",
        "X‑P":"playful",
        "x‑p":"playful",
        ":‑p":"playful",
        ":‑Þ":"playful",
        ":‑þ":"playful",
        ":‑b":"playful",
        ":P":"playful",
        "XP":"playful",
        "xp":"playful",
        ":p":"playful",
        ":Þ":"playful",
        ":þ":"playful",
        ":b":"playful",
        "<3":"love"
        }

# self defined contractions
def load_dict_contractions():
    
    return {
        "ain't":"is not",
        "amn't":"am not",
        "aren't":"are not",
        "can't":"cannot",
        "'cause":"because",
        "couldn't":"could not",
        "couldn't've":"could not have",
        "could've":"could have",
        "daren't":"dare not",
        "daresn't":"dare not",
        "dasn't":"dare not",
        "didn't":"did not",
        "doesn't":"does not",
        "don't":"do not",
        "e'er":"ever",
        "em":"them",
        "everyone's":"everyone is",
        "finna":"fixing to",
        "gimme":"give me",
        "gonna":"going to",
        "gon't":"go not",
        "gotta":"got to",
        "hadn't":"had not",
        "hasn't":"has not",
        "haven't":"have not",
        "he'd":"he would",
        "he'll":"he will",
        "he's":"he is",
        "he've":"he have",
        "how'd":"how would",
        "how'll":"how will",
        "how're":"how are",
        "how's":"how is",
        "I'd":"I would",
        "I'll":"I will",
        "I'm":"I am",
        "I'm'a":"I am about to",
        "I'm'o":"I am going to",
        "isn't":"is not",
        "it'd":"it would",
        "it'll":"it will",
        "it's":"it is",
        "I've":"I have",
        "kinda":"kind of",
        "let's":"let us",
        "mayn't":"may not",
        "may've":"may have",
        "mightn't":"might not",
        "might've":"might have",
        "mustn't":"must not",
        "mustn't've":"must not have",
        "must've":"must have",
        "needn't":"need not",
        "ne'er":"never",
        "o'":"of",
        "o'er":"over",
        "ol'":"old",
        "oughtn't":"ought not",
        "shalln't":"shall not",
        "shan't":"shall not",
        "she'd":"she would",
        "she'll":"she will",
        "she's":"she is",
        "shouldn't":"should not",
        "shouldn't've":"should not have",
        "should've":"should have",
        "somebody's":"somebody is",
        "someone's":"someone is",
        "something's":"something is",
        "that'd":"that would",
        "that'll":"that will",
        "that're":"that are",
        "that's":"that is",
        "there'd":"there would",
        "there'll":"there will",
        "there're":"there are",
        "there's":"there is",
        "these're":"these are",
        "they'd":"they would",
        "they'll":"they will",
        "they're":"they are",
        "they've":"they have",
        "this's":"this is",
        "those're":"those are",
        "'tis":"it is",
        "'twas":"it was",
        "wanna":"want to",
        "wasn't":"was not",
        "we'd":"we would",
        "we'd've":"we would have",
        "we'll":"we will",
        "we're":"we are",
        "weren't":"were not",
        "we've":"we have",
        "what'd":"what did",
        "what'll":"what will",
        "what're":"what are",
        "what's":"what is",
        "what've":"what have",
        "when's":"when is",
        "where'd":"where did",
        "where're":"where are",
        "where's":"where is",
        "where've":"where have",
        "which's":"which is",
        "who'd":"who would",
        "who'd've":"who would have",
        "who'll":"who will",
        "who're":"who are",
        "who's":"who is",
        "who've":"who have",
        "why'd":"why did",
        "why're":"why are",
        "why's":"why is",
        "won't":"will not",
        "wouldn't":"would not",
        "would've":"would have",
        "y'all":"you all",
        "you'd":"you would",
        "you'll":"you will",
        "you're":"you are",
        "you've":"you have",
        "Whatcha":"What are you",
        "luv":"love",
        "sux":"sucks"
        }