In [1]:
import os
import sys
import pickle
from time import time
import pandas as pd

# Local Imports
path = str(os.path.expanduser('~')) + '/git/predictEPL/config'
sys.path.append(path)
import paths

sys.path.append(paths.UTILS)
import emolex
import useful_methods

In [2]:
# *******************************************************
# *******************************************************

# Limitations
TIME_LIMIT = 60
RETWEET_STATUS = False
FILTER_STATUS = True
START_TIME = 1
END_TIME = 60

In [3]:
# *******************************************************
# *******************************************************

# Game Infos
os.chdir(paths.READ_PATH_GAME_INFO)
dfGameInfos = useful_methods.csv_dic_df('game_infos.csv')

# Convert number strings to integers
dfGameInfos['GW'] = [int(GW) for GW in dfGameInfos['GW']]
dfGameInfos['score_ht_home'] = [int(number) for number in dfGameInfos['score_ht_home']]
dfGameInfos['score_ht_away'] = [int(number) for number in dfGameInfos['score_ht_away']]
dfGameInfos['score_ft_home'] = [int(number) for number in dfGameInfos['score_ft_home']]
dfGameInfos['score_ft_away'] = [int(number) for number in dfGameInfos['score_ft_away']]

In [4]:
# *******************************************************
# *******************************************************

# Read SVM(hash emolex) detecter
with open(paths.DETECTER_HOME + 'dtr_hash_svn_2016-01-24_05:14:42.pkl', 'rb') as f:
    u = pickle._Unpickler(f)
    u.encoding = 'utf-8'
    detector = u.load()

In [5]:
def AddScorePosNegColumn(df, detecter):
    df = df.copy().reset_index(drop=True)
    score_pos = []
    score_neg = []

    for text in df.text:
        neg, pos = detector.predict_proba([text])[0]
        score_pos.append(pos)
        score_neg.append(neg)

    df['score_pos'] = score_pos
    df['score_neg'] = score_neg

    return df

In [6]:
# Count Home, Away Emolex
def TweetPNscore(week, team_home, team_away):
    # week = 22
    # team_home = 'Swansea'
    # team_away = 'Watford'

    # Read Single as DF
    dfGame = useful_methods.SingleGameDf(week, team_home, team_away, filtering=True, retweet=RETWEET_STATUS)
    if dfGame is None:
        print(None, None)
        return None

    dfGame.ith_minute = [int(ith_minute) for ith_minute in list(dfGame.ith_minute)]

    # Count Emolex Words
    dfHome = AddScorePosNegColumn(dfGame[(dfGame.side == 'home') & (dfGame.ith_minute <= TIME_LIMIT)], detector)
    dfAway = AddScorePosNegColumn(dfGame[(dfGame.side == 'away') & (dfGame.ith_minute <= TIME_LIMIT)], detector)

    return (
        dfHome.score_pos.sum(), dfHome.score_neg.sum(),
        dfAway.score_pos.sum(), dfAway.score_neg.sum())

In [7]:
taken_time = time()

counted_game_emolex = []

for week in set(dfGameInfos.GW):
# for week in set([22]):
    for index in range((week - 1) * 10, week * 10):
        # Team names
        team_home = dfGameInfos[dfGameInfos.GW == week].loc[index]['home_team']
        team_away = dfGameInfos[dfGameInfos.GW == week].loc[index]['away_team']

        print("\n\n------------------")
        print(week, team_home, team_away)
        pn_score = TweetPNscore(week, team_home, team_away)
        counted_game_emolex.append(pn_score)
        print(pn_score)

print("[Done:] %.2f" % (time() - taken_time))



------------------
1 United Tottenham
[Not Game Exists]: Check your inputs
None None
None


------------------
1 Everton Watford
[Not Game Exists]: Check your inputs
None None
None


------------------
1 Leicester Sunderland
[Not Game Exists]: Check your inputs
None None
None


------------------
1 Norwich Crystal
[Not Game Exists]: Check your inputs
None None
None


------------------
1 Bournemouth Villa
[Not Game Exists]: Check your inputs
None None
None


------------------
1 Chelsea Swansea
[Not Game Exists]: Check your inputs
None None
None


------------------
1 Arsenal WestHam
[Not Game Exists]: Check your inputs
None None
None


------------------
1 Newcastle Southampton
[Not Game Exists]: Check your inputs
None None
None


------------------
1 Stoke Liverpool
[Not Game Exists]: Check your inputs
None None
None


------------------
1 WestBromwich City
[Not Game Exists]: Check your inputs
None None
None


------------------
2 Villa United
[Not Game Exists]: Check your inputs
N

In [8]:
pn_home_pos = []
pn_home_neg = []
pn_away_pos = []
pn_away_neg = []

for pn_scores in counted_game_emolex:
    if pn_scores is None:
        pn_home_pos.append(-1)
        pn_home_neg.append(-1)
        pn_away_pos.append(-1)
        pn_away_neg.append(-1)
    else:
        pn_home_pos.append(pn_scores[0])
        pn_home_neg.append(pn_scores[1])
        pn_away_pos.append(pn_scores[2])
        pn_away_neg.append(pn_scores[3])


dfGameInfos['pn_home_pos'] = pn_home_pos
dfGameInfos['pn_home_neg'] = pn_home_neg
dfGameInfos['pn_away_pos'] = pn_away_pos
dfGameInfos['pn_away_neg'] = pn_away_neg

dfGameInfos

Unnamed: 0,GW,away_team,date,home_team,score_ft_away,score_ft_home,score_ht_away,score_ht_home,time,pn_home_pos,pn_home_neg,pn_away_pos,pn_away_neg
0,1,Tottenham,Sat. 8 Aug.,United,0,1,0,1,11:45,-1.000000,-1.000000,-1.000000,-1.000000
1,1,Watford,Sat. 8 Aug.,Everton,2,2,1,0,14:00,-1.000000,-1.000000,-1.000000,-1.000000
2,1,Sunderland,Sat. 8 Aug.,Leicester,2,4,0,3,14:00,-1.000000,-1.000000,-1.000000,-1.000000
3,1,Crystal,Sat. 8 Aug.,Norwich,3,1,1,0,14:00,-1.000000,-1.000000,-1.000000,-1.000000
4,1,Villa,Sat. 8 Aug.,Bournemouth,1,0,0,0,14:00,-1.000000,-1.000000,-1.000000,-1.000000
5,1,Swansea,Sat. 8 Aug.,Chelsea,2,2,1,2,16:30,-1.000000,-1.000000,-1.000000,-1.000000
6,1,WestHam,Sun. 9 Aug.,Arsenal,2,0,1,0,12:30,-1.000000,-1.000000,-1.000000,-1.000000
7,1,Southampton,Sun. 9 Aug.,Newcastle,2,2,1,1,12:30,-1.000000,-1.000000,-1.000000,-1.000000
8,1,Liverpool,Sun. 9 Aug.,Stoke,1,0,0,0,15:00,-1.000000,-1.000000,-1.000000,-1.000000
9,1,City,Mon. 10 Aug.,WestBromwich,3,0,2,0,19:00,-1.000000,-1.000000,-1.000000,-1.000000


In [9]:
# Save as CSV
useful_methods.DFtoCSV(dfGameInfos, paths.DATA_HOME + "EPL/", 'all_game_hash_review_pn_score', index=False)
print("[Saved in]: %s" % (paths.DATA_HOME + "EPL/" + 'all_game_hash_review_pn_score.csv'))

[Saved in]: /Users/Bya/Dropbox/Research/datas/EPL/all_game_hash_review_pn_score.csv
