In [23]:
%pylab inline
%matplotlib inline

import os
import sys
import time
import pandas as pd
import csv
import random
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

sys.path.append('/Users/Bya/git/predictEPL/config/')
sys.path.append('/Users/Bya/git/predictEPL/utils/')
sys.path.append('/Users/Bya/git/predictEPL/MyFunctions/')

Populating the interactive namespace from numpy and matplotlib


In [27]:
from dataIO import GetFilenames, csv_dic_df
from converter import toSeconds

from useful_methods import EmolexDic
from useful_methods import PreprocessingTweet
from useful_methods import TweetEmolex

from paths import READ_PATH_EXTRACTED_CSV

from convert_raw_data import FolderFiles

In [2]:
dic_emolex, dic_emolex_stemmed = EmolexDic()

All Words: 14182


In [18]:
def create_minute_dics_emolex(df, side, category, cumulative = False):
    minutes = range(120) 
    dic_minutes = {}

    for minute in minutes:    
        try:
            if cumulative:
                dic_minutes[minute] = sum(df[(df['side'] == side) & \
                                                    (df['ith_minute'] < minute + 1)][category])
            else:
                dic_minutes[minute] = sum(df[(df['side'] == side) & \
                                                    (df['ith_minute'] > minute) & \
                                                    (df['ith_minute'] < minute + 1)][category])
        except:
            dic_minutes[minute] = 0
        
    return (dic_minutes)


def plot_emolex(home_team, list_dic_minutes, colors, categorys, i_plot, limit=True):

    ax = plt.subplot(3,2,i_plot)
    if limit:
        ax.set_xlim([0, 60])
    else:
        ax.set_xlim([0, 120])

    for i in range(len(list_dic_minutes)):
        tweets = tuple(list(list_dic_minutes[i].values()))
        plot1 = plt.plot(tweets, label=categorys[i], alpha=0.5, color=colors[i])

    
    plt.xlabel('Minutes')
    plt.ylabel('Emotion Signals')
    plt.title('Emotion-Lexicon  (Home Team: %s)'  % home_team)
    plt.legend()
    plt.tight_layout()


def plot_three(df, home_team, away_team):
    side = ['home', 'away', 'both']
    categorys_neg = ['anger', 'fear', 'disgust', 'sadness', 'surprise']
    colors_neg = ['red', 'darkorange', 'darksalmon', 'palegreen', 'darkgreen']
    categorys_pos = ['trust', 'joy', 'anticipation']
    colors_pos = ['skyblue', 'purple', 'blue']
    categorys_sent = ['positive', 'negative']
    colors_sent = ['green', 'red']

    plt.figure(figsize=(15, 20))

    list_dic_minutes = []
    for category in categorys_neg:
        list_dic_minutes.append(create_minute_dics_emolex(df, side[0], category))
    plot_emolex(home_team, list_dic_minutes, colors_neg, categorys_neg, 1, False)

    list_dic_minutes = []
    for category in categorys_pos:
        list_dic_minutes.append(create_minute_dics_emolex(df, side[0], category))
    plot_emolex(home_team, list_dic_minutes, colors_pos, categorys_pos, 3, False)


    list_dic_minutes = []
    for category in categorys_sent:
        list_dic_minutes.append(create_minute_dics_emolex(df, side[0], category))
    plot_emolex(home_team, list_dic_minutes, colors_sent, categorys_sent, 5, False)

    
    list_dic_minutes = []
    for category in categorys_neg:
        list_dic_minutes.append(create_minute_dics_emolex(df, side[1], category))
    plot_emolex(away_team, list_dic_minutes, colors_neg, categorys_neg, 2, False)

    list_dic_minutes = []
    for category in categorys_pos:
        list_dic_minutes.append(create_minute_dics_emolex(df, side[1], category))
    plot_emolex(away_team, list_dic_minutes, colors_pos, categorys_pos, 4, False)


    list_dic_minutes = []
    for category in categorys_sent:
        list_dic_minutes.append(create_minute_dics_emolex(df, side[1], category))
    plot_emolex(away_team, list_dic_minutes, colors_sent, categorys_sent, 6, False)


    plt.show()

In [20]:
# read csv data as dataframe
def create_emolex_df(filePath):
    home_team = filePath.split("_")[0][5::]
    away_team = filePath.split("_")[2][0:-4]

    df = csv_dic_df(filePath)

    # adding minute column
    df['ith_minute'] = [float(ith_minute) for ith_minute in df['ith_minute']]

    # adding Emotion-Lexicon columns
    df['anger'] = 0
    df['fear'] = 0
    df['disgust'] = 0
    df['sadness'] = 0
    df['surprise'] = 0
    df['trust'] = 0
    df['joy'] = 0
    df['anticipation'] = 0
    df['positive'] = 0
    df['negative'] = 0


    # adding emotion-lexicon values to dataframe
    for i in range(len(df)):
        tweet = df.loc[i]['text']

        words, words_stemmed, good_tweet = PreprocessingTweet(tweet)
        dic_counted = TweetEmolex(words, words_stemmed, good_tweet)

        df.loc[i, 'anger'] = dic_counted['anger']
        df.loc[i, 'anticipation'] = dic_counted['anticipation']
        df.loc[i, 'disgust'] = dic_counted['disgust']
        df.loc[i, 'fear'] = dic_counted['fear']
        df.loc[i, 'joy'] = dic_counted['joy']
        df.loc[i, 'negative'] = dic_counted['negative']
        df.loc[i, 'positive'] = dic_counted['positive']
        df.loc[i, 'sadness'] = dic_counted['sadness']
        df.loc[i, 'surprise'] = dic_counted['surprise']
        df.loc[i, 'trust'] = dic_counted['trust']
    
    return df

In [21]:
def calculate_df_emolex(df, side):
    dic = {
        'anger': [sum(df[df['side'] == side]['anger'])],
        'fear': [sum(df[df['side'] == side]['fear'])],
        'disgust': [sum(df[df['side'] == side]['disgust'])],
        'sadness': [sum(df[df['side'] == side]['sadness'])],
        'surprise': [sum(df[df['side'] == side]['surprise'])],
        'trust': [sum(df[df['side'] == side]['trust'])],
        'joy': [sum(df[df['side'] == side]['joy'])],
        'anticipation': [sum(df[df['side'] == side]['anticipation'])]
    }
    
    
    dic_sent = {
        'positive': [sum(df[df['side'] == side]['positive'])],
        'negative': [sum(df[df['side'] == side]['negative'])]
    }
    
    
    emolex_sum = float(sum(list(dic.values())))
    emolex_sum_sent = float(sum(list(dic_sent.values())))

    for key in dic.keys():
        dic[key].append(dic[key][0] / emolex_sum)
    
    for key in dic_sent.keys():
        dic_sent[key].append(dic_sent[key][0] / emolex_sum_sent)
    
    return dic, dic_sent

In [44]:
FolderFiles('GW16/SingleGames/', READ_PATH_EXTRACTED_CSV, ends='.csv')

['Bournemouth_vs_United.csv',
 'City_vs_Swansea.csv',
 'Crystal_vs_Southampton.csv',
 'Liverpool_vs_WestBromwich.csv',
 'Norwich_vs_Everton.csv',
 'Sunderland_vs_Watford.csv',
 'Tottenham_vs_Newcastle.csv',
 'Villa_vs_Arsenal.csv',
 'WestHam_vs_Stoke.csv']

In [37]:
READ_PATH_EXTRACTED_CSV

'/Users/Bya/Dropbox/Research/datas/EPL/ExtractedCsvData/'

In [48]:
game_key = 1
for filename in filenames[0:1]:
    home_team = filename.split("_")[0][5::]
    away_team = filename.split("_")[2][0:-4]
    
    start = time.time()

    # create df
    os.chdir(READ_PATH_EXTRACTED_CSV + GW + '/SingleGames/')
    df = create_emolex_df(filename)

    # plots
    plot_three(df, home_team, away_team)

    # time
    print("\n[Time]: %s" % (time.time() - start))

    # games
    print("================================")
    print("\n[Game]: %s vs %s" % (home_team, away_team))

    # calculated dics
    dic_home, dic_sent_home = calculate_df_emolex(df, 'home')
    dic_away, dic_sent_away = calculate_df_emolex(df, 'away')

    print("\n\n\n******[Emotions]******")
    print("---------------------------------------------------")
    print("    CATEGORY \t\t HOME TEAM \t AWAY TEAM")
    for key in dic_home.keys():
        print("---------------------------------------------------")
        print("%12s \t\t %.0f (%.2f) \t %.0f (%.2f)" % (key, dic_home[key][0], dic_home[key][1], 
                                     dic_away[key][0], dic_away[key][1]))

    print("\n\n\n******[Sentiment]******")
    print("---------------------------------------------------")
    print("    CATEGORY \t\t HOME TEAM \t AWAY TEAM")
    for key in dic_sent_home.keys():
        print("---------------------------------------------------")
        print("%12s \t\t %.0f (%.2f) \t %.0f (%.2f)" % (key, dic_sent_home[key][0], dic_sent_home[key][1], 
                                     dic_sent_away[key][0], dic_sent_away[key][1]))

KeyError: 'ith_minute'