In [1]:
%pylab inline
%matplotlib inline

import os
from time import time
import pandas as pd

# Local Imports
path = str(os.path.expanduser('~')) + '/git/predictEPL/config'
sys.path.append(path)
import paths

sys.path.append(paths.UTILS)
import emolex
import useful_methods

Populating the interactive namespace from numpy and matplotlib


### Data Loads & Defnings

In [2]:
# Game Infos
os.chdir(paths.READ_PATH_GAME_INFO)
dfGameInfos = useful_methods.csv_dic_df('game_infos.csv')


# Convert number strings to integers
dfGameInfos['GW'] = [int(GW) for GW in dfGameInfos['GW']]
dfGameInfos['score_ht_home'] = [int(number) for number in dfGameInfos['score_ht_home']]
dfGameInfos['score_ht_away'] = [int(number) for number in dfGameInfos['score_ht_away']]
dfGameInfos['score_ft_home'] = [int(number) for number in dfGameInfos['score_ft_home']]
dfGameInfos['score_ft_away'] = [int(number) for number in dfGameInfos['score_ft_away']]


# Read Emotion-Lexicon-Soccer as Dictionary
dic_emolex_soccer = emolex.EmolexSoccerDic()


# Limitations
TIME_LIMIT = 60
RETWEET_STATUS = False
FILTER_STATUS = True
START_TIME = 1
END_TIME = 60

[Emolex Dic's All Words]: 14136


### Emolex Count Functions

In [3]:
# Summing counted emolex
def EmolexSumList(dfEmolex, start=1, end=60):
    # Time Interval
    dfEmolex.ith_minute = [int(ith_minute) for ith_minute in list(dfEmolex.ith_minute)]
    dfEmolex = dfEmolex[(dfEmolex.ith_minute >= start) & (dfEmolex.ith_minute <= end)]
    
    # Sum Emolex Count
    anger = dfEmolex.anger.sum()
    fear = dfEmolex.fear.sum()
    disgust = dfEmolex.disgust.sum()
    sadness = dfEmolex.sadness.sum()
    surprise = dfEmolex.surprise.sum()
    trust = dfEmolex.trust.sum()
    joy = dfEmolex.joy.sum()
    anticipation = dfEmolex.anticipation.sum()
    positive = dfEmolex.positive.sum()
    negative = dfEmolex.negative.sum()
    
    return {'anger': anger,
            'fear': fear,
            'disgust': disgust,
            'sadness': sadness,
            'surprise': surprise,
            'trust': trust,
            'joy': joy,
            'anticipation': anticipation,
            'positive': positive,
            'negative': negative}


# Count Home, Away Emolex
def CountGameEmolex(week, team_home, team_away):

    # Read Single as DF
    dfGame = useful_methods.SingleGameDf(week, team_home, team_away, filtering=FILTER_STATUS, retweet=RETWEET_STATUS)
    if dfGame is None:
        return (None, None)

    dfGame.ith_minute = [int(ith_minute) for ith_minute in list(dfGame.ith_minute)]

    # Count Emolex Words
    dfEmolexHome = emolex.CreateEmolexDF(dfGame[(dfGame.side == 'home') & (dfGame.ith_minute <= TIME_LIMIT)], dic_emolex_soccer)
    dfEmolexAway = emolex.CreateEmolexDF(dfGame[(dfGame.side == 'away') & (dfGame.ith_minute <= TIME_LIMIT)], dic_emolex_soccer)

    # Sum Emolex
    dic_emolex_home = EmolexSumList(dfEmolexHome, start=START_TIME, end=END_TIME)
    dic_emolex_away = EmolexSumList(dfEmolexAway, start=START_TIME, end=END_TIME)

    return (dic_emolex_home, dic_emolex_away)

In [4]:
# Add Emolex Counted Columns
def CreateDfEmolexCounted(counted_game_emolex):
    pn_home = []
    pn_away = []
    
    emolex_home = []
    emolex_away = []

    for game_emolex in counted_game_emolex:
        home_emolex = game_emolex[0]
        away_emolex = game_emolex[1]

        if home_emolex and away_emolex:
            pn_home.append([
                    home_emolex['positive'],
                    home_emolex['negative']
                ])
            
            pn_away.append([
                    away_emolex['positive'],
                    away_emolex['negative']
                ])

            emolex_home.append([
                    home_emolex['anger'],
                    home_emolex['fear'],
                    home_emolex['disgust'],
                    home_emolex['sadness'],
                    home_emolex['surprise'],
                    home_emolex['trust'],
                    home_emolex['joy'],
                    home_emolex['anticipation'],
                ])
            
            emolex_away.append([
                    away_emolex['anger'],
                    away_emolex['fear'],
                    away_emolex['disgust'],
                    away_emolex['sadness'],
                    away_emolex['surprise'],
                    away_emolex['trust'],
                    away_emolex['joy'],
                    away_emolex['anticipation'],
                ])
            
        else:
            pn_home.append(0)
            pn_away.append(0)
            
            emolex_home.append([0])
            emolex_away.append([0])

    
    df = dfGameInfos.copy()
    df['pn_home'] = pn_home
    df['pn_away'] = pn_away
    
    df['emolex_home'] = emolex_home
    df['emolex_away'] = emolex_away
    
    return df

### All Game Count Start 

In [5]:
taken_time = time()

counted_game_emolex = []

for week in set(dfGameInfos.GW):
    for index in range((week - 1) * 10, week * 10):
        # Team names
        team_home = dfGameInfos[dfGameInfos.GW == week].loc[index]['home_team']
        team_away = dfGameInfos[dfGameInfos.GW == week].loc[index]['away_team']

        print("\n\n------------------")
        print(week, team_home, team_away)
        counted_game_emolex.append(CountGameEmolex(week, team_home, team_away))

print("[Done:] %.2f" % (time() - taken_time))



------------------
1 United Tottenham
[Not Game Exists]: Check your inputs


------------------
1 Everton Watford
[Not Game Exists]: Check your inputs


------------------
1 Leicester Sunderland
[Not Game Exists]: Check your inputs


------------------
1 Norwich Crystal
[Not Game Exists]: Check your inputs


------------------
1 Bournemouth Villa
[Not Game Exists]: Check your inputs


------------------
1 Chelsea Swansea
[Not Game Exists]: Check your inputs


------------------
1 Arsenal WestHam
[Not Game Exists]: Check your inputs


------------------
1 Newcastle Southampton
[Not Game Exists]: Check your inputs


------------------
1 Stoke Liverpool
[Not Game Exists]: Check your inputs


------------------
1 WestBromwich City
[Not Game Exists]: Check your inputs


------------------
2 Villa United
[Not Game Exists]: Check your inputs


------------------
2 Southampton Everton
[Not Game Exists]: Check your inputs


------------------
2 Tottenham Stoke
[Not Game Exists]: Check your in

### Create DF and Save

In [6]:
# Create DF
df = CreateDfEmolexCounted(counted_game_emolex)
df = df[df.pn_home != 0].copy().reset_index(drop=True)
df

Unnamed: 0,GW,away_team,date,home_team,score_ft_away,score_ft_home,score_ht_away,score_ht_home,time,pn_home,pn_away,emolex_home,emolex_away
0,5,Chelsea,Sat. 12 Sep.,Everton,1,3,1,2,11:45,"[871.0, 549.0]","[2525.0, 2564.0]","[344.0, 244.0, 170.0, 239.0, 254.0, 425.0, 485...","[1238.0, 1128.0, 983.0, 1093.0, 749.0, 1405.0,..."
1,5,Bournemouth,Sat. 12 Sep.,Norwich,1,3,0,1,14:00,"[206.0, 57.0]","[78.0, 34.0]","[32.0, 27.0, 18.0, 21.0, 72.0, 115.0, 95.0, 95.0]","[21.0, 21.0, 15.0, 15.0, 17.0, 37.0, 25.0, 27.0]"
2,5,Swansea,Sat. 12 Sep.,Watford,0,1,0,0,14:00,"[64.0, 35.0]","[41.0, 21.0]","[24.0, 15.0, 11.0, 14.0, 33.0, 40.0, 34.0, 35.0]","[15.0, 13.0, 9.0, 11.0, 15.0, 29.0, 20.0, 22.0]"
3,5,Southampton,Sat. 12 Sep.,WestBromwich,0,0,0,0,14:00,"[62.0, 89.0]","[96.0, 46.0]","[38.0, 30.0, 35.0, 32.0, 29.0, 35.0, 28.0, 41.0]","[18.0, 17.0, 14.0, 17.0, 39.0, 48.0, 51.0, 45.0]"
4,5,Stoke,Sat. 12 Sep.,Arsenal,0,2,0,1,14:00,"[1600.0, 736.0]","[137.0, 79.0]","[459.0, 348.0, 381.0, 341.0, 813.0, 1009.0, 87...","[49.0, 47.0, 28.0, 39.0, 49.0, 79.0, 61.0, 60.0]"
5,5,City,Sat. 12 Sep.,Crystal,1,0,0,0,14:00,"[205.0, 138.0]","[470.0, 590.0]","[81.0, 79.0, 36.0, 51.0, 59.0, 115.0, 91.0, 96.0]","[324.0, 375.0, 162.0, 330.0, 202.0, 270.0, 212..."
6,5,Liverpool,Sat. 12 Sep.,United,1,3,0,0,16:30,"[4447.0, 4497.0]","[2667.0, 3225.0]","[1760.0, 1544.0, 1346.0, 1516.0, 1235.0, 2628....","[1348.0, 1341.0, 1282.0, 1368.0, 728.0, 1431.0..."
7,5,Tottenham,Sun. 13 Sep.,Sunderland,1,0,0,0,12:30,"[381.0, 243.0]","[704.0, 654.0]","[127.0, 126.0, 86.0, 100.0, 163.0, 211.0, 161....","[298.0, 266.0, 225.0, 263.0, 242.0, 362.0, 312..."
8,5,Villa,Sun. 13 Sep.,Leicester,2,3,1,0,15:00,"[219.0, 143.0]","[922.0, 334.0]","[62.0, 65.0, 68.0, 59.0, 62.0, 92.0, 81.0, 84.0]","[138.0, 156.0, 127.0, 138.0, 271.0, 417.0, 498..."
9,5,Newcastle,Mon. 14 Sep.,WestHam,0,2,0,1,19:00,"[418.0, 128.0]","[1064.0, 1062.0]","[62.0, 61.0, 45.0, 59.0, 101.0, 235.0, 208.0, ...","[444.0, 419.0, 370.0, 387.0, 267.0, 530.0, 376..."


In [7]:
# Save as CSV
useful_methods.DFtoCSV(df, paths.DATA_HOME + "EPL/", 'all_game_emolex_counted_nonretweet', index=False)
print("[Saved in]: %s" % (paths.DATA_HOME + "EPL/" + 'all_game_emolex_counted_nonretweet.csv'))

[Saved in]: /Users/Bya/Dropbox/Research/datas/EPL/all_game_emolex_counted_nonretweet.csv
