# Monitoring changes in related words over time.

### This notebook will show how words related to a particular word will change over time deltas

In [1]:
import pandas as pd
import numpy as np
from gensim.models import Word2Vec
from nltk.tokenize import TweetTokenizer
from nltk.corpus import stopwords
from sklearn.decomposition import PCA

In [2]:
columns = ['tweet_id','timestamp','tweet_text','user_id',
           'tweet_coords','tweet_coords_list','tweet_long','tweet_lat','location',
           'enc_url','tweet_lang','hashtags']
tweet_full = pd.read_csv(r'./tweetCoords.csv',
                         header=None,
                         names=columns,
                         parse_dates=[1],
                         infer_datetime_format=True,
                         index_col='timestamp')

In [18]:
tweet_stops = stopwords.words('english')
tweet_tokenizer = TweetTokenizer(strip_handles=True,preserve_case=False,reduce_len=True)

What's the word we're comparing similarity to?

In [43]:
search_term = "storm"

Starting here, begin the iteration over times.

In [44]:
related_words = pd.DataFrame()
tweet_date = pd.to_datetime("2017-09-08 00:00:00")
date_delta = pd.Timedelta("24HR")
end_date = pd.to_datetime("2017-09-17 00:00:00")

In [45]:
for tweet_day in pd.date_range(start = tweet_date, end = end_date, freq = date_delta):
    print(tweet_day)
    tweet_text = tweet_full.loc[tweet_day:tweet_day + date_delta,"tweet_text"]
    tweets_tokens = tweet_text.apply(lambda x: [word for word in tweet_tokenizer.tokenize(x) if word not in tweet_stops])
    vector_model = Word2Vec(tweets_tokens, min_count=15, sg=1, window=4)
    word_matrix = vector_model.wv[vector_model.wv.vocab]
    pca = PCA(n_components=2)
    result = pca.fit_transform(word_matrix)
    related_words = pd.concat([related_words,pd.DataFrame.from_records(vector_model.wv.most_similar(search_term))],axis=1)

2017-09-08 00:00:00
2017-09-09 00:00:00
2017-09-10 00:00:00
2017-09-11 00:00:00
2017-09-12 00:00:00
2017-09-13 00:00:00
2017-09-14 00:00:00
2017-09-15 00:00:00
2017-09-16 00:00:00
2017-09-17 00:00:00


In [46]:
related_words

Unnamed: 0,0,1,0.1,1.1,0.2,1.2,0.3,1.3,0.4,1.4,0.5,1.5,0.6,1.6,0.7,1.7,0.8,1.8,0.9,1.9
0,calm,0.954908,calm,0.920498,tropical,0.866808,surge,0.955742,clean,0.99499,office,0.975337,away,0.9958,made,0.987971,neighbors,0.996556,late,0.991846
1,#hurricaneirma,0.921577,surge,0.884942,media,0.850365,hurricane,0.919426,lucky,0.992368,keys,0.969065,thanks,0.994555,helping,0.987702,clean,0.996381,end,0.989417
2,boarded,0.917284,strong,0.863493,surge,0.847497,#jax,0.902006,damage,0.990733,pool,0.967907,soon,0.994055,damage,0.984422,across,0.99569,safe,0.988237
3,preparation,0.905267,outer,0.858256,#mfl,0.844983,tropical,0.897893,home,0.988792,trees,0.967625,yet,0.993666,thursday,0.983915,break,0.995505,kinda,0.988082
4,surge,0.902888,riding,0.847123,force,0.833662,media,0.892259,neighborhood,0.986158,open,0.965232,came,0.992739,dinner,0.983858,saw,0.995398,business,0.987676
5,#hurricaneirma2017,0.902633,starting,0.846909,emergency,0.828541,lakeland,0.875331,little,0.985786,church,0.959722,luck,0.992561,lot,0.98355,trucks,0.99519,thanks,0.987513
6,prep,0.89973,coast,0.846373,#jax,0.827453,calm,0.865086,hit,0.985764,beautiful,0.958411,went,0.991967,ride,0.983541,beat,0.995166,lose,0.986465
7,strong,0.896382,bands,0.842958,#mlb,0.825323,state,0.857775,yesterday,0.985668,aftermath,0.956715,making,0.99192,luck,0.983358,minute,0.995115,prayers,0.9864
8,closed,0.896284,emergency,0.840474,radio,0.824388,emergency,0.847628,service,0.984426,group,0.954851,blessed,0.991856,party,0.982899,care,0.99448,little,0.985489
9,path,0.894863,saturday,0.840021,station,0.82434,garden,0.844885,beautiful,0.983243,coffee,0.954272,prayers,0.991517,fall,0.98241,tickets,0.99429,meet,0.985481
