In [1]:
%pylab inline
%matplotlib inline

import os
from time import time
import pandas as pd

# Local Imports
path = str(os.path.expanduser('~')) + '/git/predictEPL/config'
sys.path.append(path)
import paths

sys.path.append(paths.UTILS)
import emolex
import useful_methods

Populating the interactive namespace from numpy and matplotlib


### Data Loads & Defnings

In [3]:
# which week
WEEK_NUM = 23

# Limitations
TIME_LIMIT = 60
RETWEET_STATUS = False
FILTER_STATUS = True
START_TIME = 1
END_TIME = 60


# Game Infos
os.chdir(paths.READ_PATH_GAME_INFO)
dfGameInfos = useful_methods.csv_dic_df('game_infos.csv')
dfGameInfos = useful_methods.DropNanGames(dfGameInfos)
dfGameInfos = dfGameInfos[dfGameInfos.GW == str(WEEK_NUM)].copy().reset_index(drop=True)


# Convert number strings to integers
dfGameInfos['GW'] = [int(GW) for GW in dfGameInfos['GW']]
dfGameInfos['score_ht_home'] = [int(number) for number in dfGameInfos['score_ht_home']]
dfGameInfos['score_ht_away'] = [int(number) for number in dfGameInfos['score_ht_away']]
dfGameInfos['score_ft_home'] = [int(number) for number in dfGameInfos['score_ft_home']]
dfGameInfos['score_ft_away'] = [int(number) for number in dfGameInfos['score_ft_away']]


# Read Emotion-Lexicon-Soccer as Dictionary
dic_emolex_soccer = emolex.EmolexSoccerDic()


dfGameInfos

[Emolex Dic's All Words]: 14136


Unnamed: 0,GW,away_team,date,home_team,score_ft_away,score_ft_home,score_ht_away,score_ht_home,time
0,23,Liverpool,Sat. 23 Jan.,Norwich,5,4,1,2,12:45
1,23,Villa,Sat. 23 Jan.,WestBromwich,0,0,0,0,15:00
2,23,Bournemouth,Sat. 23 Jan.,Sunderland,1,1,1,1,15:00
3,23,Southampton,Sat. 23 Jan.,United,1,0,0,0,15:00
4,23,Stoke,Sat. 23 Jan.,Leicester,0,3,0,1,15:00
5,23,Tottenham,Sat. 23 Jan.,Crystal,3,1,0,1,15:00
6,23,Newcastle,Sat. 23 Jan.,Watford,1,2,0,0,15:00
7,23,City,Sat. 23 Jan.,WestHam,2,2,1,1,17:30
8,23,Swansea,Sun. 24 Jan.,Everton,2,1,2,1,13:30
9,23,Chelsea,Sun. 24 Jan.,Arsenal,1,0,1,0,16:00


### Emolex Count Functions

In [4]:
# Summing counted emolex
def EmolexSumList(dfEmolex, start=1, end=60):
    # Time Interval
    dfEmolex.ith_minute = [int(ith_minute) for ith_minute in list(dfEmolex.ith_minute)]
    dfEmolex = dfEmolex[(dfEmolex.ith_minute >= start) & (dfEmolex.ith_minute <= end)]
    
    # Sum Emolex Count
    anger = dfEmolex.anger.sum()
    fear = dfEmolex.fear.sum()
    disgust = dfEmolex.disgust.sum()
    sadness = dfEmolex.sadness.sum()
    surprise = dfEmolex.surprise.sum()
    trust = dfEmolex.trust.sum()
    joy = dfEmolex.joy.sum()
    anticipation = dfEmolex.anticipation.sum()
    positive = dfEmolex.positive.sum()
    negative = dfEmolex.negative.sum()
    
    return {'anger': anger,
            'fear': fear,
            'disgust': disgust,
            'sadness': sadness,
            'surprise': surprise,
            'trust': trust,
            'joy': joy,
            'anticipation': anticipation,
            'positive': positive,
            'negative': negative}


# Count Home, Away Emolex
def CountGameEmolex(week, team_home, team_away):

    # Read Single as DF
    dfGame = useful_methods.SingleGameDf(week, team_home, team_away, filtering=FILTER_STATUS, retweet=RETWEET_STATUS)
    if dfGame is None:
        return (None, None)

    dfGame.ith_minute = [int(ith_minute) for ith_minute in list(dfGame.ith_minute)]

    # Count Emolex Words
    dfEmolexHome = emolex.CreateEmolexDF(dfGame[(dfGame.side == 'home') & (dfGame.ith_minute <= TIME_LIMIT)], dic_emolex_soccer)
    dfEmolexAway = emolex.CreateEmolexDF(dfGame[(dfGame.side == 'away') & (dfGame.ith_minute <= TIME_LIMIT)], dic_emolex_soccer)

    # Sum Emolex
    dic_emolex_home = EmolexSumList(dfEmolexHome, start=START_TIME, end=END_TIME)
    dic_emolex_away = EmolexSumList(dfEmolexAway, start=START_TIME, end=END_TIME)

    return (dic_emolex_home, dic_emolex_away)

In [5]:
# Add Emolex Counted Columns
def CreateDfEmolexCounted(counted_game_emolex):
    pn_home = []
    pn_away = []
    
    emolex_home = []
    emolex_away = []

    for game_emolex in counted_game_emolex:
        home_emolex = game_emolex[0]
        away_emolex = game_emolex[1]

        if home_emolex and away_emolex:
            pn_home.append([
                    home_emolex['positive'],
                    home_emolex['negative']
                ])
            
            pn_away.append([
                    away_emolex['positive'],
                    away_emolex['negative']
                ])

            emolex_home.append([
                    home_emolex['anger'],
                    home_emolex['fear'],
                    home_emolex['disgust'],
                    home_emolex['sadness'],
                    home_emolex['surprise'],
                    home_emolex['trust'],
                    home_emolex['joy'],
                    home_emolex['anticipation'],
                ])
            
            emolex_away.append([
                    away_emolex['anger'],
                    away_emolex['fear'],
                    away_emolex['disgust'],
                    away_emolex['sadness'],
                    away_emolex['surprise'],
                    away_emolex['trust'],
                    away_emolex['joy'],
                    away_emolex['anticipation'],
                ])
            
        else:
            pn_home.append(0)
            pn_away.append(0)
            
            emolex_home.append([0])
            emolex_away.append([0])

    
    df = dfGameInfos.copy()
    df['pn_home'] = pn_home
    df['pn_away'] = pn_away
    
    df['emolex_home'] = emolex_home
    df['emolex_away'] = emolex_away
    
    return df

### All Game Count Start 

In [8]:
taken_time = time()

counted_game_emolex = []

for ith_row in range(len(dfGameInfos)):
    # Team names
    week = dfGameInfos.iloc[ith_row]['GW']
    team_home = dfGameInfos.iloc[ith_row]['home_team']
    team_away = dfGameInfos.iloc[ith_row]['away_team']

    print("\n\n------------------")
    print(week, team_home, team_away)
    counted_game_emolex.append(CountGameEmolex(week, team_home, team_away))

print("[Done:] %.2f" % (time() - taken_time))



------------------
23 Norwich Liverpool


------------------
23 WestBromwich Villa


------------------
23 Sunderland Bournemouth


------------------
23 United Southampton


------------------
23 Leicester Stoke


------------------
23 Crystal Tottenham


------------------
23 Watford Newcastle


------------------
23 WestHam City


------------------
23 Everton Swansea


------------------
23 Arsenal Chelsea
[Done:] 48.27


In [11]:
counted_game_emolex[0]

({'anger': 109.0,
  'anticipation': 204.0,
  'disgust': 81.0,
  'fear': 94.0,
  'joy': 243.0,
  'negative': 228.0,
  'positive': 464.0,
  'sadness': 90.0,
  'surprise': 134.0,
  'trust': 252.0},
 {'anger': 2028.0,
  'anticipation': 1469.0,
  'disgust': 1773.0,
  'fear': 1672.0,
  'joy': 1255.0,
  'negative': 4474.0,
  'positive': 3746.0,
  'sadness': 1653.0,
  'surprise': 1071.0,
  'trust': 1728.0})

### Create DF and Save

In [10]:
# Create DF
df = CreateDfEmolexCounted(counted_game_emolex)
df = df[df.pn_home != 0].copy().reset_index(drop=True)
df

Unnamed: 0,GW,away_team,date,home_team,score_ft_away,score_ft_home,score_ht_away,score_ht_home,time,pn_home,pn_away,emolex_home,emolex_away
0,23,Liverpool,Sat. 23 Jan.,Norwich,5,4,1,2,12:45,"[464.0, 228.0]","[3746.0, 4474.0]","[109.0, 94.0, 81.0, 90.0, 134.0, 252.0, 243.0,...","[2028.0, 1672.0, 1773.0, 1653.0, 1071.0, 1728...."
1,23,Villa,Sat. 23 Jan.,WestBromwich,0,0,0,0,15:00,"[89.0, 93.0]","[282.0, 299.0]","[42.0, 41.0, 43.0, 55.0, 27.0, 39.0, 43.0, 37.0]","[153.0, 111.0, 122.0, 117.0, 107.0, 139.0, 117..."
2,23,Bournemouth,Sat. 23 Jan.,Sunderland,1,1,1,1,15:00,"[297.0, 282.0]","[124.0, 76.0]","[135.0, 128.0, 128.0, 133.0, 71.0, 160.0, 102....","[43.0, 38.0, 24.0, 26.0, 41.0, 82.0, 64.0, 62.0]"
3,23,Southampton,Sat. 23 Jan.,United,1,0,0,0,15:00,"[2374.0, 3231.0]","[144.0, 91.0]","[1339.0, 1050.0, 1117.0, 1121.0, 800.0, 1179.0...","[48.0, 36.0, 32.0, 33.0, 51.0, 84.0, 69.0, 91.0]"
4,23,Stoke,Sat. 23 Jan.,Leicester,0,3,0,1,15:00,"[133.0, 50.0]","[60.0, 42.0]","[18.0, 16.0, 22.0, 17.0, 50.0, 79.0, 62.0, 83.0]","[18.0, 28.0, 13.0, 23.0, 20.0, 26.0, 30.0, 63.0]"
5,23,Tottenham,Sat. 23 Jan.,Crystal,3,1,0,1,15:00,"[198.0, 112.0]","[421.0, 381.0]","[48.0, 54.0, 39.0, 38.0, 60.0, 104.0, 95.0, 93.0]","[188.0, 165.0, 122.0, 171.0, 153.0, 239.0, 214..."
6,23,Newcastle,Sat. 23 Jan.,Watford,1,2,0,0,15:00,"[64.0, 43.0]","[470.0, 337.0]","[23.0, 19.0, 9.0, 18.0, 29.0, 26.0, 27.0, 26.0]","[138.0, 148.0, 108.0, 134.0, 142.0, 277.0, 221..."
7,23,City,Sat. 23 Jan.,WestHam,2,2,1,1,17:30,"[405.0, 244.0]","[624.0, 427.0]","[123.0, 140.0, 76.0, 97.0, 119.0, 237.0, 192.0...","[220.0, 204.0, 146.0, 167.0, 175.0, 362.0, 313..."
8,23,Swansea,Sun. 24 Jan.,Everton,2,1,2,1,13:30,"[653.0, 1050.0]","[177.0, 86.0]","[546.0, 316.0, 298.0, 334.0, 156.0, 277.0, 221...","[39.0, 39.0, 22.0, 28.0, 44.0, 81.0, 74.0, 69.0]"
9,23,Chelsea,Sun. 24 Jan.,Arsenal,1,0,1,0,16:00,"[2074.0, 2362.0]","[1624.0, 997.0]","[1020.0, 839.0, 789.0, 1009.0, 728.0, 1119.0, ...","[464.0, 445.0, 337.0, 416.0, 489.0, 891.0, 854..."


In [None]:
# Save as CSV
useful_methods.DFtoCSV(df, paths.DATA_HOME + "EPL/", 'GW23_emolex_counted_nonretweet', index=False)
print("[Saved in]: %s" % (paths.DATA_HOME + "EPL/" + 'GW23_game_emolex_counted_nonretweet.csv'))