# Monitoring changes in related words over time.

### This notebook will show how words related to a particular word will change over time deltas

In [1]:
import pandas as pd
import numpy as np
from gensim.models import Word2Vec
from nltk.tokenize import TweetTokenizer
from nltk.corpus import stopwords
from sklearn.decomposition import PCA

In [2]:
columns = ['tweet_id','timestamp','tweet_text','user_id',
           'tweet_coords','tweet_coords_list','tweet_long','tweet_lat','location',
           'enc_url','tweet_lang','hashtags']
tweet_full = pd.read_csv(r'./tweetCoords.csv',
                         header=None,
                         names=columns,
                         parse_dates=[1],
                         infer_datetime_format=True,
                         index_col='timestamp')

In [3]:
tweet_stops = stopwords.words('english')
tweet_tokenizer = TweetTokenizer(strip_handles=True,preserve_case=False,reduce_len=True)

What's the word we're comparing similarity to?

In [4]:
search_term = "storm"

Starting here, begin the iteration over times.

In [5]:
related_words = pd.DataFrame()
tweet_date = pd.to_datetime("2017-09-08 00:00:00")
date_delta = pd.Timedelta("24HR")
end_date = pd.to_datetime("2017-09-15 00:00:00")

In [6]:
for tweet_day in pd.date_range(start = tweet_date, end = end_date, freq = date_delta):
    print(tweet_day) # this line is just here for diagnostic purposes.
    tweet_text = tweet_full.loc[tweet_day:tweet_day + date_delta,"tweet_text"]
    tweets_tokens = tweet_text.apply(lambda x: [word for word in tweet_tokenizer.tokenize(x) if word not in tweet_stops])
    vector_model = Word2Vec(tweets_tokens, min_count=15, sg=1, window=4)
    word_matrix = vector_model.wv[vector_model.wv.vocab]
    pca = PCA(n_components=2)
    result = pca.fit_transform(word_matrix)
    related_words = pd.concat([related_words,pd.DataFrame.from_records(vector_model.wv.most_similar(search_term))],axis=1)

2017-09-08 00:00:00
2017-09-09 00:00:00
2017-09-10 00:00:00
2017-09-11 00:00:00
2017-09-12 00:00:00
2017-09-13 00:00:00
2017-09-14 00:00:00
2017-09-15 00:00:00


In [7]:
related_words

Unnamed: 0,0,1,0.1,1.1,0.2,1.2,0.3,1.3,0.4,1.4,0.5,1.5,0.6,1.6,0.7,1.7
0,calm,0.94635,surge,0.907672,surge,0.892879,surge,0.943319,clean,0.98632,trees,0.974065,thanks,0.996102,made,0.990625
1,#hurricaneirma,0.940089,calm,0.887879,tropical,0.867548,tropical,0.919578,little,0.98565,office,0.967436,away,0.993515,damage,0.989353
2,prep,0.927381,strong,0.866568,hurricane,0.856402,hurricane,0.909756,survived,0.984759,church,0.960387,came,0.993113,since,0.988853
3,surge,0.919667,emergency,0.859832,media,0.856101,#jax,0.901502,beautiful,0.980893,welcome,0.959782,electricity,0.992815,helping,0.985541
4,strong,0.911769,moving,0.855116,#mfl,0.844237,emergency,0.880233,damage,0.979647,beautiful,0.95878,prayers,0.990924,thursday,0.984412
5,path,0.91077,far,0.854192,#jax,0.827318,media,0.873712,house,0.977327,survived,0.957494,yet,0.990533,clean,0.983728
6,state,0.910412,riding,0.852145,calm,0.825644,public,0.87035,mess,0.977325,group,0.956413,cable,0.990223,live,0.981898
7,preparing,0.910144,beginning,0.851486,radio,0.822751,flood,0.857686,lucky,0.977225,#floridastrong,0.949781,hit,0.98997,party,0.981427
8,waiting,0.908537,morning,0.850153,emergency,0.811255,mngr,0.857315,neighborhood,0.976879,blessed,0.949285,luck,0.989429,supplies,0.980859
9,evacuating,0.908052,ahead,0.842962,riding,0.80463,survived,0.854135,yesterday,0.976446,#afterirma,0.946417,running,0.989321,soon,0.980405


In [8]:
related_words.iloc[:,0::2]

Unnamed: 0,0,0.1,0.2,0.3,0.4,0.5,0.6,0.7
0,calm,surge,surge,surge,clean,trees,thanks,made
1,#hurricaneirma,calm,tropical,tropical,little,office,away,damage
2,prep,strong,hurricane,hurricane,survived,church,came,since
3,surge,emergency,media,#jax,beautiful,welcome,electricity,helping
4,strong,moving,#mfl,emergency,damage,beautiful,prayers,thursday
5,path,far,#jax,media,house,survived,yet,clean
6,state,riding,calm,public,mess,group,cable,live
7,preparing,beginning,radio,flood,lucky,#floridastrong,hit,party
8,waiting,morning,emergency,mngr,neighborhood,blessed,luck,supplies
9,evacuating,ahead,riding,survived,yesterday,#afterirma,running,soon
