In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv("data/processed_tweet.csv")

# Calculate TF-IDF

In [3]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [10]:
df.fillna({"text": "None"}, inplace=True)

In [11]:
text = df["text"]
vectoriser = TfidfVectorizer(max_df=0.9, min_df=10, stop_words='english')
matrix = vectoriser.fit_transform(text)

In [12]:
matrix.shape

(6444, 1046)

# Most Important Word

Use TF-IDF to find most important word in each tweet

In [34]:
important_words = []
feature_names = vectoriser.get_feature_names_out()

for sentence_idx in range(matrix.shape[0]):
    tfidf_scores = matrix[sentence_idx].toarray().flatten()
    if sum(tfidf_scores == np.zeros(matrix.shape[1])) == matrix.shape[1]:
        important_words.append(np.nan)
        continue
    max_idx = np.argmax(tfidf_scores)
    important_word = feature_names[max_idx]
    important_words.append(important_word)

df["most_important_word"] = important_words

In [36]:
df.head()

Unnamed: 0,id,handle,text,is_retweet,original_author,time,in_reply_to_screen_name,in_reply_to_status_id,in_reply_to_user_id,is_quote_status,...,place_country_code,place_country,place_contained_within,place_attributes,place_bounding_box,source_url,truncated,entities,extended_entities,most_important_word
0,780925634159796224,HillaryClinton,question election put plans action make life b...,False,,2016-09-28T00:22:34,,,,False,...,,,,,,https://studio.twitter.com,False,{'media': [{'display_url': 'pic.twitter.com/Xr...,{'media': [{'display_url': 'pic.twitter.com/Xr...,question
1,780916180899037184,HillaryClinton,last night donald trump said paying taxes smar...,True,timkaine,2016-09-27T23:45:00,,,,False,...,,,,,,http://twitter.com,False,{'media': [{'display_url': 'pic.twitter.com/t0...,{'media': [{'display_url': 'pic.twitter.com/t0...,paying
2,780911564857761793,HillaryClinton,couldnt proud hillaryclinton vision command la...,True,POTUS,2016-09-27T23:26:40,,,,False,...,,,,,,https://about.twitter.com/products/tweetdeck,False,"{'user_mentions': [{'id_str': '1536791610', 'n...",,showed
3,780907038650068994,HillaryClinton,stand together theres nothing cant make sure y...,False,,2016-09-27T23:08:41,,,,False,...,,,,,,https://studio.twitter.com,False,{'media': [{'display_url': 'pic.twitter.com/Q3...,{'media': [{'display_url': 'pic.twitter.com/Q3...,ready
4,780897419462602752,HillaryClinton,candidates asked theyd confront racial injusti...,False,,2016-09-27T22:30:27,,,,False,...,,,,,,https://about.twitter.com/products/tweetdeck,False,"{'user_mentions': [], 'symbols': [], 'urls': [...",,answer


In [37]:
tweets = pd.read_csv("data/tweets.csv")
tweets["most_important_word"] = df["most_important_word"]

In [38]:
tweets.head()

Unnamed: 0,id,handle,text,is_retweet,original_author,time,in_reply_to_screen_name,in_reply_to_status_id,in_reply_to_user_id,is_quote_status,...,place_country_code,place_country,place_contained_within,place_attributes,place_bounding_box,source_url,truncated,entities,extended_entities,most_important_word
0,780925634159796224,HillaryClinton,The question in this election: Who can put the...,False,,2016-09-28T00:22:34,,,,False,...,,,,,,https://studio.twitter.com,False,{'media': [{'display_url': 'pic.twitter.com/Xr...,{'media': [{'display_url': 'pic.twitter.com/Xr...,question
1,780916180899037184,HillaryClinton,"Last night, Donald Trump said not paying taxes...",True,timkaine,2016-09-27T23:45:00,,,,False,...,,,,,,http://twitter.com,False,{'media': [{'display_url': 'pic.twitter.com/t0...,{'media': [{'display_url': 'pic.twitter.com/t0...,paying
2,780911564857761793,HillaryClinton,Couldn't be more proud of @HillaryClinton. Her...,True,POTUS,2016-09-27T23:26:40,,,,False,...,,,,,,https://about.twitter.com/products/tweetdeck,False,"{'user_mentions': [{'id_str': '1536791610', 'n...",,showed
3,780907038650068994,HillaryClinton,"If we stand together, there's nothing we can't...",False,,2016-09-27T23:08:41,,,,False,...,,,,,,https://studio.twitter.com,False,{'media': [{'display_url': 'pic.twitter.com/Q3...,{'media': [{'display_url': 'pic.twitter.com/Q3...,ready
4,780897419462602752,HillaryClinton,Both candidates were asked about how they'd co...,False,,2016-09-27T22:30:27,,,,False,...,,,,,,https://about.twitter.com/products/tweetdeck,False,"{'user_mentions': [], 'symbols': [], 'urls': [...",,answer


In [40]:
tweets.to_csv("data/tweets_wmost_important_word.csv", index=False)

# Building a Visualisation

In [41]:
df[df["most_important_word"] == "2nd"]

Unnamed: 0,id,handle,text,is_retweet,original_author,time,in_reply_to_screen_name,in_reply_to_status_id,in_reply_to_user_id,is_quote_status,...,place_country_code,place_country,place_contained_within,place_attributes,place_bounding_box,source_url,truncated,entities,extended_entities,most_important_word
411,777022559367770112,realDonaldTrump,crooked hillary wants take 2nd amendment right...,False,,2016-09-17T05:53:09,,,,False,...,,,,,,http://twitter.com/download/android,False,"{'user_mentions': [], 'symbols': [], 'urls': [...",,2nd
1504,763385288295055360,realDonaldTrump,dbongino cut mic bongino refuses bullied lemon...,False,,2016-08-10T14:43:30,,,,False,...,,,,,,http://twitter.com/download/android,False,"{'user_mentions': [{'id_str': '232901331', 'na...",,2nd
3105,747874856130183168,realDonaldTrump,check 2nd video lying crooked hillary online w...,False,,2016-06-28T19:30:35,,,,False,...,,,,,,http://twitter.com/download/iphone,False,"{'user_mentions': [], 'symbols': [], 'urls': [...",,2nd
3533,742066996767031296,realDonaldTrump,reporting orlando killer shouted allah hu akba...,False,,2016-06-12T18:52:13,,,,False,...,,,,,,http://twitter.com/download/android,False,"{'user_mentions': [], 'symbols': [], 'urls': [...",,2nd
5302,709687052384731136,realDonaldTrump,bring jobs back america fix military take care...,False,,2016-03-15T10:25:53,,,,False,...,,,,,,http://twitter.com/download/android,False,"{'user_mentions': [], 'symbols': [], 'urls': [...",,2nd
5436,706149793697902592,realDonaldTrump,would anyone kentucky listen failed presidenti...,False,,2016-03-05T16:10:05,,,,False,...,,,,,,http://twitter.com/download/iphone,False,"{'user_mentions': [], 'symbols': [], 'urls': [...",,2nd
5995,694600151894212608,realDonaldTrump,morningjoe mikebarnicle realdonaldtrump finish...,False,,2016-02-02T19:15:55,,,,False,...,,,,,,http://twitter.com,False,"{'user_mentions': [{'id_str': '254117355', 'na...",,2nd
6001,694551687080644612,realDonaldTrump,experience iowa great one started experts sayi...,False,,2016-02-02T16:03:21,,,,False,...,,,,,,http://twitter.com/download/android,False,"{'user_mentions': [], 'symbols': [], 'urls': [...",,2nd


In [42]:
tweets[tweets["most_important_word"] == "2nd"]

Unnamed: 0,id,handle,text,is_retweet,original_author,time,in_reply_to_screen_name,in_reply_to_status_id,in_reply_to_user_id,is_quote_status,...,place_country_code,place_country,place_contained_within,place_attributes,place_bounding_box,source_url,truncated,entities,extended_entities,most_important_word
411,777022559367770112,realDonaldTrump,Crooked Hillary wants to take your 2nd Amendme...,False,,2016-09-17T05:53:09,,,,False,...,,,,,,http://twitter.com/download/android,False,"{'user_mentions': [], 'symbols': [], 'urls': [...",,2nd
1504,763385288295055360,realDonaldTrump,"""@dbongino: ‘Now cut off my mic!’ Bongino refu...",False,,2016-08-10T14:43:30,,,,False,...,,,,,,http://twitter.com/download/android,False,"{'user_mentions': [{'id_str': '232901331', 'na...",,2nd
3105,747874856130183168,realDonaldTrump,Check it out - 2nd video on Lying Crooked Hill...,False,,2016-06-28T19:30:35,,,,False,...,,,,,,http://twitter.com/download/iphone,False,"{'user_mentions': [], 'symbols': [], 'urls': [...",,2nd
3533,742066996767031296,realDonaldTrump,"Reporting that Orlando killer shouted ""Allah h...",False,,2016-06-12T18:52:13,,,,False,...,,,,,,http://twitter.com/download/android,False,"{'user_mentions': [], 'symbols': [], 'urls': [...",,2nd
5302,709687052384731136,realDonaldTrump,"I will bring our jobs back to America, fix our...",False,,2016-03-15T10:25:53,,,,False,...,,,,,,http://twitter.com/download/android,False,"{'user_mentions': [], 'symbols': [], 'urls': [...",,2nd
5436,706149793697902592,realDonaldTrump,Why would anyone in Kentucky listen to failed ...,False,,2016-03-05T16:10:05,,,,False,...,,,,,,http://twitter.com/download/iphone,False,"{'user_mentions': [], 'symbols': [], 'urls': [...",,2nd
5995,694600151894212608,realDonaldTrump,.@Morning_Joe @mikebarnicle on @realDonaldTru...,False,,2016-02-02T19:15:55,,,,False,...,,,,,,http://twitter.com,False,"{'user_mentions': [{'id_str': '254117355', 'na...",,2nd
6001,694551687080644612,realDonaldTrump,My experience in Iowa was a great one. I start...,False,,2016-02-02T16:03:21,,,,False,...,,,,,,http://twitter.com/download/android,False,"{'user_mentions': [], 'symbols': [], 'urls': [...",,2nd
