In [2]:
import pickle
import random
import nltk
import numpy as np
import pandas as pd
import sklearn
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
import zipfile
import gensim
from tensorflow import keras
import tensorflow as tf
import json
import tweepy as tw

In [3]:
with open('raw.pickle', 'rb') as f:
    u = pickle._Unpickler(f)
    u.encoding = 'latin1'
    data = u.load()

In [4]:
print(len(data['texts']))
data.keys()

7480


dict_keys(['info', 'texts', 'val_ind', 'test_ind', 'train_ind'])

In [5]:
data['info'][:5]

[{'label': array([1., 0., 0., 0., 0., 0., 0.])},
 {'label': array([0., 1., 0., 0., 0., 0., 0.])},
 {'label': array([0., 0., 1., 0., 0., 0., 0.])},
 {'label': array([0., 0., 0., 1., 0., 0., 0.])},
 {'label': array([0., 0., 0., 0., 1., 0., 0.])}]

In [6]:
labels_text = ['joy', 'fear', 'anger', 'sadness', 'disgust', 'shame', 'guilt']

In [7]:
labels = [np.argmax(el['label']) for el in data['info']]  # np.argmax
labels[:5]

[0, 1, 2, 3, 4]

In [8]:
texts = data['texts']
texts[:5]

['During the period of falling in love, each time that we met and especially when we had not met for a long time.',
 'When I was involved in a traffic accident.',
 'When I was driving home after  several days of hard work, there was a motorist ahead of me who was driving at 50 km/hour and refused, despite his low speeed to let me overtake.',
 'When I lost the person who meant the most to me. ',
 "The time I knocked a deer down - the sight of the animal's injuries and helplessness.  The realization that the animal was so badly hurt that it had to be put down, and when the animal screamed at the moment of death."]

Word2vec: http://vectors.nlpl.eu/repository/#

In [9]:
%%time

with zipfile.ZipFile("18.zip", "r") as archive:
    stream = archive.open("model.txt")
    word2vec_model = gensim.models.KeyedVectors.load_word2vec_format(stream, binary=False, unicode_errors='replace')

CPU times: user 1min 27s, sys: 623 ms, total: 1min 28s
Wall time: 1min 28s


In [10]:
word2vec_model.index2word[:10]

['said',
 'also',
 'would',
 'one',
 'first',
 'two',
 'year',
 "n't",
 'percent',
 'people']

In [11]:
def vectorize_texts(texts):
    vectors = []
    tokenizer = nltk.RegexpTokenizer(r'\w+')
    for text in texts:
        text = tokenizer.tokenize(text)
        text_vectors = []
        for word in text:
            word = word.lower()
            try:
                vec = word2vec_model[word]
                text_vectors.append(vec)
            except KeyError:
                vec = None
        if len(text_vectors) > 0:
            vectors.append(np.mean(text_vectors, axis=0))
        else:
            vectors.append(np.array([0]*300))
    return vectors

vectors = vectorize_texts(texts)

In [12]:
with open('vectors.pkl', 'wb') as f:
    pickle.dump(vectors, f)
    
with open('labels.pkl', 'wb') as f:
    pickle.dump(labels, f)

# Get Twitter Data

In [13]:
with open('twitter_credentials.json', 'r') as secret_info:
    twitter_cred = json.load(secret_info)

In [14]:
auth = tw.OAuthHandler(twitter_cred["CONSUMER_KEY"], twitter_cred["CONSUMER_SECRET"])
auth.set_access_token(twitter_cred["ACCESS_KEY"], twitter_cred["ACCESS_SECRET"])
api = tw.API(auth, wait_on_rate_limit=True)

In [15]:
def get_tweets_by_search(search_term):
    tweets = tw.Cursor(api.search,
                  q=search_term,
                  lang="en").items(100)
    return [tweet.text for tweet in tweets]

In [16]:
joy_tweets = get_tweets_by_search('"wish you" OR excited -filter:retweets')
joy_tweets[:5]

['Last time a guest was wearing edm I got excited, until she was rude af and made me want to rage quit \U0001f97a https://t.co/as1PvN8R0n',
 'IM lecture marathon with IMD students ako starting tomorrow as make up classes because of holidays and earthquakes.… https://t.co/umIm2Ut6kC',
 "I am so excited to be working with @deadbirds_net on my book trailers. I can't wait to see the finished versions!!… https://t.co/priucWTkNh",
 '@Mrs_Steed19 a lot of my stuff is gender neutral too like grey and white! I’m so excited for you!',
 'wish you were here']

In [17]:
fear_tweets = get_tweets_by_search('terrified OR scared -filter:retweets')
fear_tweets[:5]

['IM SO TERRIFIED OF HOW SECRETIVE THEYRE BEING ABT S4 AS IF THERE’S HARDLY EVEN ANY MINOR THINGS THEY CAN SPOIL LIKE… https://t.co/0LBGmuRLpr',
 'OMG THE DEMS MUST BE TERRIFIED! Extrapolated Results from FOX News Poll Show 70% of Americans (Reps and Inds) Are A… https://t.co/bZm0Ytszln',
 'yesterday i orderd food from zomato for my husband and his frien, who were at my oher flat..That area is still deve… https://t.co/tCyKcRvrUB',
 'OMG THE DEMS MUST BE TERRIFIED!  Extrapolated Results from FOX News Poll Show 70% of Americans (Reps and Inds) Are… https://t.co/bOjrsaGndN',
 'NO WAIT MARTY IM SCARED I MIGHT NOT COME BACK']

In [18]:
anger_tweets = get_tweets_by_search('taxes trump -filter:retweets')
anger_tweets[:5]

['if trump is sooo good the. why am i struggling to make it? huh? i legit work 40 hours a week and over half goes on… https://t.co/AO04YlIOYE',
 '@marklevinshow @realDonaldTrump And trump lied to voters to get elected in 2016 by PROMISING that Mex would pay for… https://t.co/j9mIDjTmf7',
 '@Meb7777i @eugenegu @IvankaTrump @SenWarren And... There are jobs out there &amp; thanks to Trump &amp; Ivanka, you can get… https://t.co/hmem7OPV4W',
 '#BREAKING!\n\n@realDonaldTrump says #Republicans should release their own taxes in impeachment probe | \n https://t.co/6s1FMsuNFw #BreakingNews',
 '@SteveScalise @realDonaldTrump I can’t wait till Trump’s taxes come out😱😱😱that will be a movie in itself😱😱impeachme… https://t.co/TEkbv4kNiU']

In [19]:
sadness_tweets = get_tweets_by_search('depressed cry OR "not fine" -filter:retweets')
sadness_tweets[:5]

["Idk y'all but I already wrote my Christmas letter, have a look \n\n“Dear Santa,\n\nThis year I just want to lay down in… https://t.co/O0J7RmlVAN",
 'I woke up from a nightmare about my weight &amp; appearance and I’m. Really depressed now why does a stupid as dream make me want to cry',
 'I’m actually so depressed all I do is cry and drink lucazade',
 'This may sound mental but does anyone else just want to watch a tragedy like romantic tragedy to enjoy? like have a… https://t.co/SURS6SCzQM',
 'If you girl can’t make you bawl, if you can’t cry around her, if she tell people that you bawl around her to make y… https://t.co/7INpfvTMEw']

In [20]:
disgust_tweets = get_tweets_by_search('disgust OR disgusting -filter:retweets')
disgust_tweets[:5]

['Disgusting!! https://t.co/rK21orvVfH',
 "@ushapadhee1996 You can't solve a problem by dodging it. Hope is idealistic not pragmatic. All have to face the pro… https://t.co/IPzMhOFlMZ",
 "I'm gonna start muting all accounts posting that vile tramp dancing around. I'm beyond sick of seeing her disgusting ass.",
 '@PostOffice parcel to be delivered was left in general waste bin which was full of rubbish! So disgusting!',
 '@RAwamleh so disgusting']

In [21]:
shame_tweets = get_tweets_by_search('embarassing OR "feel embarassed" -filter:retweets')
shame_tweets[:5]

['@DarthBlount47 OC is a problem. Facts. Rudolph needs to not hold on to ball so long and make better reads/throws. B… https://t.co/yTzrcD4eD8',
 'everytime i make an adult phone call i always end up embarassing myself.',
 'theyre so embarassing i love them more than anything https://t.co/kP99dAnErd',
 "don't even get near me if you feel embarassed to let people know that we're close enough to joke around 😌",
 'sit down gramps. this is more embarassing for you than you think it is  https://t.co/4lJcNdsmwP']

In [22]:
guilt_tweets = get_tweets_by_search('"self blame" OR "feel guilty" -filter:retweets')
guilt_tweets[:5]

['I fucking hate that I feel guilty about talking loudly and excitedly about things or taking up space. I deserve to… https://t.co/SxmK88DTtX',
 '@p_nthabeleng_ @asandamaswazi 😂😂😂😂 why do I feel guilty 🙈🙈🏃🏃🏃🏃',
 '@pc_irrelevant Do I feel guilty and would I do it again. https://t.co/OIBQLtpyNy',
 "@law83692622 @cheryl_english I'm cross eyed driving home headaches then most evenings feel guilty as I tell Dad how… https://t.co/2ZBcT2k7P9",
 'Never feel guilty about doing what is best for you \n#MotivationMonday #wellness']

In [23]:
tweets = []
tweets.extend(joy_tweets)
tweets.extend(fear_tweets)
tweets.extend(anger_tweets)
tweets.extend(sadness_tweets)
tweets.extend(disgust_tweets)
tweets.extend(shame_tweets)
tweets.extend(guilt_tweets)
len(tweets)

700

In [86]:
labels_tweets = [0] * 100
labels_tweets.extend([1] * 100)
labels_tweets.extend([2] * 100)
labels_tweets.extend([3] * 100)
labels_tweets.extend([4] * 100)
labels_tweets.extend([5] * 100)
labels_tweets.extend([6] * 100)
len(labels_tweets)

700

In [87]:
tweets_vectors = vectorize_texts(tweets)

In [88]:
# with open('tweets_vectors.pkl', 'wb') as f:
#     pickle.dump(tweets_vectors, f)
#     
# with open('tweets_labels.pkl', 'wb') as f:
#     pickle.dump(labels_tweets, f)