# Exploring Non-linear Dynamics on Twitter Conversation #

#### Used the ```tweepy``` package to collect tweets on the Germany vs Mexico game on the 17th June, 2018. ####

### Research Questions ####
Given two adversarial groups of people in discussion on a topic of high personal importance, what dynamics can be observed when emotional perturbations are exogenously imposed on the population.

#### hypothesis 1 ####
We expect to see stickiness in expressed emotions following the occurrence of a high impact event, where the emotional effect lingers on after the event due to transience. 

#### hypothesis 2 ####
We expect that this transience is culture dependent and will change according to the characteristics of the individuals.

#### hypothesis 3 ####
We expect to observe a duality in the activity regimes of users.

<img src="polarityseries.jpeg">

In [1]:
def delay_coordinate_embedding(data_file,tau,m):
    time_series_data = np.array([])
    for line in data_file:
        time_series_data = np.append(time_series_data,float(line))
    projection = np.array([])
    for i in np.arange((m-1)*tau,time_series_data.shape[0],1):
        delay_coordinate = np.array([])
        for j in np.arange(0,m*tau,tau):
            delay_coordinate = np.append(delay_coordinate,time_series_data[i-j])
        if(i == (m-1)*tau):
            projection = delay_coordinate
        projection = np.column_stack((projection,delay_coordinate))
    return projection

* Used TISEAN to produce a surrogate timeseries of polarity scores.
* False nearest neighbors gave best dimension as 5
* mutual information gave best tau as 100

In [3]:
import numpy as np 
df = open("../twitractors/surrogate_polarity_germex.csv",'r')
embedding = delay_coordinate_embedding(df,5,100)
plt.plot(embedding[0,:],embedding[1,:])
plt.show()

NameError: name 'plt' is not defined

In [None]:
%matplotlib inline

import matplotlib.pyplot as plt
import matplotlib.ticker as mtick

def plotLanguageHistiogram(lang_dict, fig_path):
    
    tableau20 = [(31, 119, 180), (174, 199, 232), (255, 127, 14), (255, 187, 120),   
                (44, 160, 44), (152, 223, 138), (214, 39, 40), (255, 152, 150),   
                (148, 103, 189), (197, 176, 213), (140, 86, 75), (196, 156, 148),   
                (227, 119, 194), (247, 182, 210), (127, 127, 127), (199, 199, 199),   
                (188, 189, 34), (219, 219, 141), (23, 190, 207), (158, 218, 229)]
    
    for i in range(len(tableau20)):   
        r, g, b = tableau20[i]   
        tableau20[i] = (r / 255., g / 255., b / 255.)

    plt.figure(figsize=(0.5*len(lang_dict.keys()), 4))
    ax = plt.subplot(111)
    ax.get_xaxis().tick_bottom()   
    ax.get_yaxis().tick_left()
    ax.yaxis.set_tick_params(width=1,length=3)
    ax.xaxis.set_tick_params(width=1,length=3)
    ax.spines["top"].set_visible(False)   
    ax.spines["bottom"].set_visible(False)   
    ax.spines["right"].set_visible(False)   
    ax.spines["left"].set_visible(False)  
    plt.ylim(-100, 230000)

    plt.bar(range(len(lang_dict)), lang_dict.values(), align='center')
    plt.xticks(range(len(lang_dict)), list(lang_dict.keys()), fontsize=12)


    #fmt = '%.0f%%' # Format you want the ticks, e.g. '40%'
    #fmt = '%.0f' # Format you want the ticks, e.g. '40%'

    #yticks = mtick.FormatStrFormatter(fmt)
    #ax.yaxis.set_major_formatter(yticks)

    ax.set_xlabel('language', fontsize=15)
    ax.set_ylabel('# tweets', fontsize=15)

    plt.savefig(fig_path, bbox_inches="tight")

In [None]:
from langdetect import detect
import pandas as pd
parse_dates = ['time']
all_data = pd.read_csv('text_location_timeseries.csv', parse_dates = parse_dates, low_memory = False)

texts = all_data['text']

dectected_languages = []
for text in texts:
    try:
        language = detect(text)
    except:
        language = '?'
    dectected_languages.append(language)
# get frequency dictionaries for languages

from collections import Counter

specified_languages_dict = Counter(all_data['lang'])
detected_languages_dict = Counter(dectected_languages)
# exclude languages from language frequency dictionary that appear less often than cutoff

def filteringLangDict(lang_dict, cutoff):
    filtered_dict = {}
    for key in lang_dict.keys():
        if lang_dict[key] > cutoff:
            filtered_dict[key] = lang_dict[key]
    return filtered_dict
fig_path_detected_c = 'language_detection_detected_cutoff.jpeg'
fig_path_specified_c = 'language_detection_twitter_cutoff.jpeg'

test = plotLanguageHistiogram(filteringLangDict(detected_languages_dict,1000),fig_path_detected_c)
test = plotLanguageHistiogram(filteringLangDict(specified_languages_dict,1000),fig_path_specified_c)
