In [1]:
%pylab inline
%matplotlib inline

import os
from time import time
import pandas as pd

# Local Imports
path = str(os.path.expanduser('~')) + '/git/predictEPL/config'
sys.path.append(path)
import paths

sys.path.append(paths.UTILS)
import emolex
import useful_methods

Populating the interactive namespace from numpy and matplotlib


### Data Loads & Defnings

In [2]:
# Game Infos
os.chdir(paths.READ_PATH_GAME_INFO)
dfGameInfos = useful_methods.csv_dic_df('game_infos.csv')


# Convert number strings to integers
dfGameInfos['GW'] = [int(GW) for GW in dfGameInfos['GW']]
dfGameInfos['score_ht_home'] = [int(number) for number in dfGameInfos['score_ht_home']]
dfGameInfos['score_ht_away'] = [int(number) for number in dfGameInfos['score_ht_away']]
dfGameInfos['score_ft_home'] = [int(number) for number in dfGameInfos['score_ft_home']]
dfGameInfos['score_ft_away'] = [int(number) for number in dfGameInfos['score_ft_away']]


# Read Emotion-Lexicon-Soccer as Dictionary
dic_emolex_soccer = emolex.EmolexSoccerDic()


# Limitations
time_limit = 60
retweet = True
start_time = 1
end_time = 60

[Emolex Dic's All Words]: 14136


### Emolex Count Functions

In [11]:
# Summing counted emolex
def EmolexSumList(dfEmolex, start=1, end=60):
    # Time Interval
    dfEmolex.ith_minute = [int(ith_minute) for ith_minute in list(dfEmolex.ith_minute)]
    dfEmolex = dfEmolex[(dfEmolex.ith_minute >= start) & (dfEmolex.ith_minute <= end)]
    
    # Sum Emolex Count
    anger = dfEmolex.anger.sum()
    fear = dfEmolex.fear.sum()
    disgust = dfEmolex.disgust.sum()
    sadness = dfEmolex.sadness.sum()
    surprise = dfEmolex.surprise.sum()
    trust = dfEmolex.trust.sum()
    joy = dfEmolex.joy.sum()
    anticipation = dfEmolex.anticipation.sum()
    positive = dfEmolex.positive.sum()
    negative = dfEmolex.negative.sum()
    
    return {'anger': anger,
            'fear': fear,
            'disgust': disgust,
            'sadness': sadness,
            'surprise': surprise,
            'trust': trust,
            'joy': joy,
            'anticipation': anticipation,
            'positive': positive,
            'negative': negative}


# Count Home, Away Emolex
def CountGameEmolex(week, team_home, team_away):

    # Read Single as DF
    dfGame = useful_methods.SingleGameDf(week, team_home, team_away, filtering=True, retweet=retweet)
    if dfGame is None:
        return (None, None)

    dfGame.ith_minute = [int(ith_minute) for ith_minute in list(dfGame.ith_minute)]

    # Count Emolex Words
    dfEmolexHome = emolex.CreateEmolexDF(dfGame[(dfGame.side == 'home') & (dfGame.ith_minute <= time_limit)], dic_emolex_soccer)
    dfEmolexAway = emolex.CreateEmolexDF(dfGame[(dfGame.side == 'away') & (dfGame.ith_minute <= time_limit)], dic_emolex_soccer)

    # Sum Emolex
    dic_emolex_home = EmolexSumList(dfEmolexHome, start=start_time, end=end_time)
    dic_emolex_away = EmolexSumList(dfEmolexAway, start=start_time, end=end_time)

    return (dic_emolex_home, dic_emolex_away)

In [12]:
# Add Emolex Counted Columns
def CreateDfEmolexCounted(counted_game_emolex):
    pos_home = []
    neg_home = []
    pos_away = []
    neg_away = []
    
    emolex_home = []
    emolex_away = []

    for game_emolex in counted_game_emolex:
        home_emolex = game_emolex[0]
        away_emolex = game_emolex[1]

        if home_emolex and away_emolex:
            pos_home.append(home_emolex['positive'])
            pos_away.append(away_emolex['positive'])
            neg_home.append(home_emolex['negative'])
            neg_away.append(away_emolex['negative'])
            
            emolex_home.append([
                    home_emolex['anger'],
                    home_emolex['fear'],
                    home_emolex['disgust'],
                    home_emolex['sadness'],
                    home_emolex['surprise'],
                    home_emolex['trust'],
                    home_emolex['joy'],
                    home_emolex['anticipation'],
                ])
            
            emolex_away.append([
                    away_emolex['anger'],
                    away_emolex['fear'],
                    away_emolex['disgust'],
                    away_emolex['sadness'],
                    away_emolex['surprise'],
                    away_emolex['trust'],
                    away_emolex['joy'],
                    away_emolex['anticipation'],
                ])
            
        else:
            pos_home.append(0)
            pos_away.append(0)
            neg_home.append(0)
            neg_away.append(0)
            
            emolex_home.append([0])
            emolex_away.append([0])

    
    df = dfGameInfos.copy()
    df['pos_home'] = pos_home
    df['pos_away'] = pos_away
    df['neg_home'] = neg_home
    df['neg_away'] = neg_away
    
    df['emolex_home'] = emolex_home
    df['emolex_away'] = emolex_away
    
    return df

### All Game Count Start 

In [4]:
counted_game_emolex = []

for week in set(dfGameInfos.GW):
    for index in range((week - 1) * 10, week * 10):
        # Team names
        team_home = dfGameInfos[dfGameInfos.GW == week].loc[index]['home_team']
        team_away = dfGameInfos[dfGameInfos.GW == week].loc[index]['away_team']

        print("\n\n------------------")
        print(week, team_home, team_away)
        counted_game_emolex.append(CountGameEmolex(week, team_home, team_away))



------------------
1 United Tottenham
[Not Game Exists]: Check your inputs


------------------
1 Everton Watford
[Not Game Exists]: Check your inputs


------------------
1 Leicester Sunderland
[Not Game Exists]: Check your inputs


------------------
1 Norwich Crystal
[Not Game Exists]: Check your inputs


------------------
1 Bournemouth Villa
[Not Game Exists]: Check your inputs


------------------
1 Chelsea Swansea
[Not Game Exists]: Check your inputs


------------------
1 Arsenal WestHam
[Not Game Exists]: Check your inputs


------------------
1 Newcastle Southampton
[Not Game Exists]: Check your inputs


------------------
1 Stoke Liverpool
[Not Game Exists]: Check your inputs


------------------
1 WestBromwich City
[Not Game Exists]: Check your inputs


------------------
2 Villa United
[Not Game Exists]: Check your inputs


------------------
2 Southampton Everton
[Not Game Exists]: Check your inputs


------------------
2 Tottenham Stoke
[Not Game Exists]: Check your in

### Create DF and Save

In [14]:
# Create DF
df = CreateDfEmolexCounted(counted_game_emolex)
df = df[df.pos_home != 0].copy()
df

Unnamed: 0,GW,away_team,date,home_team,score_ft_away,score_ft_home,score_ht_away,score_ht_home,time,pos_home,pos_away,neg_home,neg_away,emolex_home,emolex_away
40,5,Chelsea,Sat. 12 Sep.,Everton,1,3,1,2,11:45,1009,3901,647,3699,"[405.0, 287.0, 192.0, 322.0, 291.0, 484.0, 530...","[1732.0, 1670.0, 1436.0, 1569.0, 1079.0, 2217...."
41,5,Bournemouth,Sat. 12 Sep.,Norwich,1,3,0,1,14:00,331,112,100,53,"[67.0, 64.0, 22.0, 54.0, 122.0, 194.0, 122.0, ...","[36.0, 36.0, 25.0, 22.0, 27.0, 50.0, 37.0, 37.0]"
42,5,Swansea,Sat. 12 Sep.,Watford,0,1,0,0,14:00,98,78,55,36,"[38.0, 22.0, 18.0, 20.0, 57.0, 66.0, 52.0, 58.0]","[24.0, 23.0, 10.0, 20.0, 22.0, 51.0, 33.0, 46.0]"
43,5,Southampton,Sat. 12 Sep.,WestBromwich,0,0,0,0,14:00,116,154,116,62,"[56.0, 30.0, 38.0, 32.0, 43.0, 80.0, 48.0, 94.0]","[23.0, 28.0, 17.0, 23.0, 75.0, 65.0, 68.0, 67.0]"
44,5,Stoke,Sat. 12 Sep.,Arsenal,0,2,0,1,14:00,2529,255,938,113,"[603.0, 445.0, 421.0, 432.0, 1039.0, 1681.0, 1...","[82.0, 59.0, 40.0, 47.0, 75.0, 130.0, 107.0, 8..."
45,5,City,Sat. 12 Sep.,Crystal,1,0,0,0,14:00,299,975,225,1708,"[146.0, 113.0, 60.0, 80.0, 100.0, 190.0, 145.0...","[1029.0, 1358.0, 538.0, 805.0, 441.0, 378.0, 3..."
46,5,Liverpool,Sat. 12 Sep.,United,1,3,0,0,16:30,11807,4881,9999,4464,"[5228.0, 3026.0, 2403.0, 2447.0, 2705.0, 7167....","[1743.0, 1955.0, 1644.0, 1838.0, 1207.0, 2875...."
47,5,Tottenham,Sun. 13 Sep.,Sunderland,1,0,0,0,12:30,498,1117,330,815,"[158.0, 152.0, 87.0, 120.0, 205.0, 329.0, 218....","[379.0, 365.0, 248.0, 315.0, 297.0, 511.0, 421..."
48,5,Villa,Sun. 13 Sep.,Leicester,2,3,1,0,15:00,276,1550,163,480,"[70.0, 75.0, 74.0, 65.0, 99.0, 122.0, 99.0, 10...","[195.0, 233.0, 203.0, 191.0, 384.0, 725.0, 881..."
49,5,Newcastle,Mon. 14 Sep.,WestHam,0,2,0,1,19:00,655,1560,202,1563,"[96.0, 92.0, 64.0, 68.0, 147.0, 401.0, 297.0, ...","[589.0, 697.0, 473.0, 643.0, 418.0, 919.0, 574..."


In [15]:
# Save as CSV
useful_methods.DFtoCSV(df, paths.DATA_HOME + "EPL/", 'all_game_emolex_counted', index=False)
print("[Saved in]: %s" % (paths.DATA_HOME + "EPL/" + 'all_game_emolex_counted.csv'))

[Saved in]: /Users/Bya/Dropbox/Research/datas/EPL/all_game_emolex_counted.csv
