class DataCollector:
    def __init__(self):
        self.LOCAL_TZ = 'Asia/Almaty'
        self.SERVER_TZ = 'UTC'
        self.DATA_PATH='data/'
        self.ELO_DATA_PATH='data/elo/'
        self.PREREQUISITES_PATH='prerequisites/'
        self.COL_CAT=[]
        self.COL_NUM=[]
        self.COL_LBL=[]
        self.COL_INF=[]
    
    def _load_prerequisites(self,name):
        with open(os.path.join(self.PREREQUISITES_PATH, name),'rb') as f:
            encoder = pickle.load(f)
        return encoder
    
    def _save_prerequisite(self, name, data):
        os.makedirs(self.PREREQUISITES_PATH, mode=0o777, exist_ok=True)
        with open(os.path.join(self.PREREQUISITES_PATH, name), mode='wb') as f:
            pickle.dump(data, f) 

    def _ff(self, columns):
        if len(self.INCLUDE)>0:
            return [x for x in columns if x in self.INCLUDE]
        else:
            return [x for x in columns if x not in self.EXCLUDE]
    
    def _encode(self, enctype, features, outs, df):
        if (len(self.INCLUDE)>0 and outs[0] in self.INCLUDE) or outs[0] in self.EXCLUDE:
            return df
        name='_'.join(features)
        if self.LOAD:
            encoder=self._load_prerequisites(f'{enctype}_{features[0]}')
        else:
            if enctype=='sc':
                encoder = MinMaxScaler()
            elif enctype=='le':
                encoder = LabelEncoder()
            elif enctype=='ohe':
                encoder = OneHotEncoder()
            if len(features)==1:
                encoder.fit(df[features].values)
            else:
                encoder.fit(pd.concat([pd.DataFrame(df[features[0]].unique(), columns=[name]),pd.DataFrame(df[features[1]].unique(), columns=[name])])[name])
            self._save_prerequisite(f'{enctype}_{name}', encoder)
        if  enctype=='ohe':
            return encoder.transform(df[features].values).toarray()
        if len(features)==1:
            df[outs[0]] = encoder.transform(df[features].values)
        else:
            df[outs[0]] = encoder.transform(df[features[0]])
            df[outs[1]] = encoder.transform(df[features[1]])
        return df

    def _encode_teams(self, df):
        teams_name=self.ELO_DATA_PATH+'teams.csv'
        teams_saved=pd.read_csv(teams_name, index_col=None)
        teams=df[['team']].dropna().drop_duplicates()
        teams_new=teams[~teams.team.isin(teams_saved.team)]
        print(teams_new)
        if not teams_new.empty:
            print('New teams!')
            id=teams_saved.id.max()+1
            #id=0
            teams_list=[]
            for row in teams_new.itertuples():
                if len(row.team)>1:
                    teams_list.append({'team':row.team, 'id':id})
                    id+=1
                    #break
            teams_saved=pd.concat([teams_saved,pd.DataFrame(teams_list)])
            teams_saved.id=teams_saved.id.astype(int)
            teams_saved.to_csv(teams_name, index=False)
        df=df.merge(teams_saved, on='team', how='left')
        return df
    
    def _add_elo(self, df_src,df_elo):
        df_teams=pd.read_csv(self.DATA_PATH+'teams.csv', index_col=None)
        df_elo_merged=df_elo.merge(df_teams[['id','tid']], on='id', how='left').drop_duplicates()
        df_elo_merged=df_elo_merged.dropna()
        df_src['de']=df_src.ds.apply(lambda x: x.strftime('%Y-%m-%d'))
        df_elo_merged=df_elo_merged.rename(columns={'tid':'tid1', 'elo':'elo1'})
        df_src=df_src.merge(df_elo_merged[['tid1','de','elo1']], on=['tid1','de'], how='left')
        df_elo_merged=df_elo_merged.rename(columns={'tid1':'tid2', 'elo1':'elo2'})
        df_src=df_src.merge(df_elo_merged[['tid2','de','elo2']], on=['tid2','de'], how='left')
        return df_src

    def _provide_elo(self):
        df = pd.concat(map(pd.read_csv, glob.glob(os.path.join(self.DATA_PATH+'elo/', 'elo_*.csv'))))
        df=df[['Club', 'Country', 'Level', 'Elo', 'From', 'To']]
        df.columns=['team', 'country', 'level', 'elo', 'ds', 'de']
        df=self._encode_teams(df)
        return df

    def _provide_sofa(self):
        dp=SofaDataProvider(load=True)
        df=dp._load_data()
        return df.drop_duplicates(subset='mid', keep='last')

    def _provide_op(self):
        dp=OpDataProvider(load=True)
        df=dp._load_data()
        return df

    def _bind_sofa_op(self,df):
        df_op=self._provide_op()
        df_binds=pd.read_csv('data/binds_ss_op.csv', index_col=None)
        df_op=df_op.merge(df_binds[['op_mid','mid']], left_on='mid', right_on='op_mid')
        return df.merge(df_op[['mid_y','odds_away','odds_draw','odds_home','oddsprob_home','oddsprob_draw','oddsprob_away','drift_home','drift_away','drift_draw']], left_on='mid', right_on='mid_y', how='left')

    def _load_data(self):
        #df_op=self._provide_sofa()
        df_sofa=self._provide_sofa()
        df_elo=self._provide_elo()
        df_sofa=self._add_elo(df_sofa,df_elo)
        df_sofa=self._bind_sofa_op(df_sofa)
        return df_sofa
    
    def provide_data(self, double=True):
        df=self._load_data()
        df['psft']=df.sc1-df.sc2
        df['psht']=df.homeScoreHT-df.awayScoreHT
        df['w1']=np.where(df.winner=='home',1,0)
        df['wx']=np.where(df.winner=='draw',1,0)
        df['w2']=np.where(df.winner=='away',1,0)
        df_home=df.copy()
        df_home=df_home.rename(columns={'homeScoreHT':'ht1','awayScoreHT':'ht2','sc1':'ft1','sc2':'ft2','vote_home':'vote1','vote_draw':'votex','vote_away':'vote2','home_formation':'form1','away_formation':'form2','oddsprob_home':'oddsprob1','oddsprob_draw':'oddsprobx','oddsprob_away':'oddsprob2','drift_home':'drift1','drift_draw':'driftx','drift_away':'drift2'})
        if double:
            df_home['side']=1
            df_away=df.copy()
            df_away['side']=0
            df_away=df_away.rename(columns={'homeScoreHT':'ht2','awayScoreHT':'ht1','sc1':'ft2','sc2':'ft1','vote_home':'vote2','vote_draw':'votex','vote_away':'vote1',
                                            'home_formation':'form2','away_formation':'form1','w1':'w2','w2':'w1','elo1':'elo2','elo2':'elo1','t1':'t2','t2':'t1',
                                            'tid1':'tid2','tid2':'tid1','odds_away':'odds_home','odds_home':'odds_away','oddsprob1':'oddsprob2',
                                            'oddsprob2':'oddsprob1','drift1':'drift2','drift2':'drift1',
                                            'possession1':'possession2', 'shont1':'shont2', 'shofft1':'shofft2', 'corners1':'corners2', 
                                            'offsides1':'offsides2', 'fouls1':'fouls2', 'cards1':'cards2', 'gksaves1':'gksaves2',
                                            'possession2':'possession1', 'shont2':'shont1', 'shofft2':'shofft1', 'corners2':'corners1', 
                                            'offsides2':'offsides1', 'fouls2':'fouls1', 'cards2':'cards1', 'gksaves2':'gksaves1'})
            df_away['psft']=df_away['psft']*-1
            df_away['psht']=df_away['psht']*-1

            df_home=pd.concat([df_home,df_away], axis=0)

        return df_home.reset_index(drop=True)

In [17]:
import glob
import pandas as pd
import numpy as np
import seaborn as sns
import pickle
import pytz
from datetime import timezone,datetime,timedelta
from sklearn.preprocessing import LabelEncoder,OneHotEncoder,MinMaxScaler
import gc

import api.util
from api.data_collector import DataCollector
from api.sofa_dp import SofaDataProvider
from api.op_dp import OpDataProvider

from IPython.display import display
pd.options.display.max_columns = None
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [11]:
dp=DataCollector()
#df=dp.provide_data(double=False)
df=dp.provide_data()

Empty DataFrame
Columns: [team]
Index: []


In [12]:
#['side', 'country', 'country_id', 'liga', 'mid', 'round', 'ds', 'de', 'tid1', 'tid2', 't1', 't2', 'w1', 'wx', 'w2',  'ht1', 'ht2', 'ft1', 'ft2', 'ps_ht', 'ps_ft', 
#       'winner', 'form1', 'form2', 'vote1', 'votex', 'vote2', 'pop_r', 'elo1', 'elo2']
COL_CUR=['country_id', 'round', 'ds', 'de', 'form1', 'form2', 'vote1', 'votex', 'vote2', 'pop_r', 'elo1', 'elo2','oddsprob1','oddsprobx','oddsprob2','drift1','drift2','driftx']
COL_PREV=['w1', 'wx', 'w2',  'ht1', 'ht2', 'ft1', 'ft2', 'psht', 'psft', 'graph1','graph2', 'possession1','possession2', 'shont1','shont2', 'shofft1','shofft2', 'corners1','corners2', 'offsides1','offsides2', 'fouls1','fouls2', 'cards1','cards2', 'gksaves1','gksaves2','precision1','precision2']
COL_CAT=['country_id','form1', 'form2', 'pop_r']
COL_BIN=['side']
COL_NUM=['w1', 'wx', 'w2',  'ht1', 'ht2', 'ft1', 'ft2', 'psht', 'psft', 'vote1', 'votex', 'vote2', 'elo1', 'elo2','oddsprob1','oddsprobx','oddsprob2','drift1','driftx','drift2', 'graph1','graph2', 'possession1','possession2', 'shont1','shont2', 'shofft1','shofft2', 'corners1','corners2', 'offsides1','offsides2', 'fouls1','fouls2', 'cards1','cards2', 'gksaves1','gksaves2','precision1','precision2']
COL_INF=['side','country', 'liga', 'mid', 'round', 'ds', 't1', 't2','tid1', 'tid2', 'w1', 'wx', 'w2',  'ft1', 'ft2','winner','odds_away','odds_draw','odds_home']

#df=pd.read_csv('data/matches.csv', index_col=None)
#df['elo1'].fillna((df['elo1'].mean()), inplace=True)
#df['elo2'].fillna((df['elo2'].mean()), inplace=True)


In [13]:
def calc_stat(df_, group_feature, name, cols):
    df_=df_.sort_values(by='ds')
    cols_out=['ds','mid','tid']
    df_['C']=df_.groupby([group_feature])['N'].apply(lambda x : x.shift().cumsum())#.cumsum()#-1
    #display(df_)
    for col in cols:
        print(col, end=', ')
        cols_out.append(f'{col}_{name}_avg')
        df_[f'{col}_{name}_sum']=df_.groupby([group_feature])[col].apply(lambda x : x.shift().cumsum())#.cumsum()#-df_teams[col]
        df_[f'{col}_{name}_avg']=df_[f'{col}_{name}_sum']/df_['C']
        df_.drop(columns=[f'{col}_{name}_sum'], inplace=True)
        #cols_out.append(f'{col}_{name}_sum')
    df_=df_.rename(columns={group_feature:'tid'})
    return df_[cols_out]

def calc_stat_n(df_, group_feature, name, cols, n):
    df_=df_.sort_values(by='ds')
    cols_out=['ds','mid','tid']
    df_['C']=df_.groupby([group_feature])['N'].apply(lambda x : x.shift().rolling(min_periods=1, window=n).sum())
    #display(df_)
    for col in cols:
        print(col, end=', ')
        cols_out.append(f'{col}_{name}_{n}')
        df_[f'{col}_{name}_sum']=df_.groupby([group_feature])[col].apply(lambda x : x.shift().rolling(min_periods=1, window=n).sum())
        df_[f'{col}_{name}_{n}']=df_[f'{col}_{name}_sum']/df_['C']
        df_.drop(columns=[f'{col}_{name}_sum'], inplace=True)
        #cols_out.append(f'{col}_{name}_sum')
    df_=df_.rename(columns={group_feature:'tid'})
    return df_[cols_out]

In [14]:
deep=3
df['N']=1
cols_cum=COL_NUM
df_all=calc_stat(df, 'tid1', 'tt',cols_cum).sort_values(by=['mid','tid'])
df_all3=calc_stat_n(df, 'tid1', 'tt',cols_cum, deep).sort_values(by=['mid','tid'])
colnames=[x.replace('_avg','_form') for x in df_all.columns[3:]]
res=df_all3[df_all3.columns[3:]].values-df_all[df_all.columns[3:]].values
df_form=pd.DataFrame(res,columns=colnames)
df_all=pd.concat([df_all,df_form], axis=1)

df_home=calc_stat(df[df['side']==1], 'tid1', 'ts', cols_cum).sort_values(by=['mid','tid'])
df_home3=calc_stat_n(df[df['side']==1], 'tid1', 'ts', cols_cum, deep).sort_values(by=['mid','tid'])
colnames=[x.replace('_avg','_form') for x in df_home.columns[3:]]
res=df_home3[df_home3.columns[3:]].values-df_home[df_home.columns[3:]].values
df_form=pd.DataFrame(res,columns=colnames)
df_home=pd.concat([df_home,df_form], axis=1)

df_away=calc_stat(df[df['side']==0], 'tid1', 'ts', cols_cum).reset_index(drop=True).sort_values(by=['mid','tid'])
df_away3=calc_stat_n(df[df['side']==0], 'tid1', 'ts', cols_cum, deep).sort_values(by=['mid','tid'])
colnames=[x.replace('_avg','_form') for x in df_away.columns[3:]]
res=df_away3[df_away3.columns[3:]].values-df_away[df_away.columns[3:]].values
df_form=pd.DataFrame(res,columns=colnames)
df_away=pd.concat([df_away,df_form], axis=1)

#df_home=calc_stat(df, 'tid1', 'th', cols_cum)
#df_away=calc_stat(df, 'tid2', 'ta', cols_cum)

df_all.to_csv('data/teams_stats_all.csv', index=False)
df_home.to_csv('data/teams_stats_home.csv', index=False)
df_away.to_csv('data/teams_stats_away.csv', index=False)
df_all3.to_csv('data/teams_stats_all3.csv', index=False)
df_home3.to_csv('data/teams_stats_home3.csv', index=False)
df_away3.to_csv('data/teams_stats_away3.csv', index=False)

w1
wx
w2
ht1
ht2
ft1
ft2
psht
psft
vote1
votex
vote2
elo1
elo2
oddsprob1
oddsprobx
oddsprob2
drift1
driftx
drift2
graph1
graph2
possession1
possession2
shont1
shont2
shofft1
shofft2
corners1
corners2
offsides1
offsides2
fouls1
fouls2
cards1
cards2
gksaves1
gksaves2
precision1
precision2
w1
wx
w2
ht1
ht2
ft1
ft2
psht
psft
vote1
votex
vote2
elo1
elo2
oddsprob1
oddsprobx
oddsprob2
drift1
driftx
drift2
graph1
graph2
possession1
possession2
shont1
shont2
shofft1
shofft2
corners1
corners2
offsides1
offsides2
fouls1
fouls2
cards1
cards2
gksaves1
gksaves2
precision1
precision2
w1
wx
w2
ht1
ht2
ft1
ft2
psht
psft
vote1
votex
vote2
elo1
elo2
oddsprob1
oddsprobx
oddsprob2
drift1
driftx
drift2
graph1
graph2
possession1
possession2
shont1
shont2
shofft1
shofft2
corners1
corners2
offsides1
offsides2
fouls1
fouls2
cards1
cards2
gksaves1
gksaves2
precision1
precision2
w1
wx
w2
ht1
ht2
ft1
ft2
psht
psft
vote1
votex
vote2
elo1
elo2
oddsprob1
oddsprobx
oddsprob2
drift1
driftx
drift2
graph1
graph2
possessi

df_all=pd.read_csv('data/teams_stats_all.csv', index_col=None)
df_home=pd.read_csv('data/teams_stats_home.csv', index_col=None)
df_away=pd.read_csv('data/teams_stats_away.csv', index_col=None)

df_all=pd.read_csv('data/teams_stats_all.csv', index_col=None)
df_home=pd.read_csv('data/teams_stats_home.csv', index_col=None)
df_away=pd.read_csv('data/teams_stats_away.csv', index_col=None)

df_all.columns

id=9270007
tid1=594
tid2=1499
display(df_all3[df_all3.mid==id])
display(df_home3[df_home3.mid==id])
display(df_away3[df_away3.mid==id])


### df_all
- `tt_avg` - Все матчи до текущего, среднее
- `tt_form` - Среднее по 3 последним, без `tt_avg`

### df_home
Статсы только тех игр, когда команда играла дома
- `ts_avg` - Все матчи до текущего, среднее
- `ts_form` - Среднее по 3 последним, без `ts_avg`

### df_away
Статсы только тех игр, когда команда играла не дома
- `ts_avg` - Все матчи до текущего, среднее
- `ts_form` - Среднее по 3 последним, без `ts_avg`

Все, что 1 относится к самой команде, а с 2 - к её оппонентам, объединив данные добавлено два префикса - `tar_` и `_opp` соответственно.

Нужно скопировать статсы по командам с учетом:
1. Таргетная команда и оппонент.
2. Дома или нет.

## Вычисление diffs
Что есть 1 и 2 для разных статсов? 

На примере w1 - wx - w2:
- tar_w1_tt_avg - среднее побед таргетной команды по всем играм
- tar_wx_tt_avg - среднее ничей таргетной команды по всем играм
- tar_w2_tt_avg - среднее проигрышей таргетной команды по всем играм
- opp_w1_tt_avg - среднее побед оппонента по всем играм
- opp_wx_tt_avg - среднее ничей оппонента по всем играм
- opp_w2_tt_avg - среднее проигрышей оппонента по всем играм

tar_w1_tt_form - разница между последними играми и всей статистикой, может быть как положительной, если в последние игры параметр больше, так и отрицательной

аналогично с ts, только учитывается статистика только по играм дома либо в гостях

## Перекрестные сравнения: забил - пропустил
- tar_ft1_tt_avg - среднее количество голов забила таргетная команда по всем играм (оказала давление - graph1)
- tar_ft2_tt_avg - среднее количество голов пропустила таргетная команда по всем играм (позволила давление - graph2)
- opp_ft1_tt_avg - среднее количество голов забила команда оппонента по всем играм
- opp_ft2_tt_avg - среднее количество голов пропустила команда оппонента по всем играм

tar_ft1_tt_avg и opp_ft2_tt_avg => `ft1_tt_avg`, tar_ft2_tt_avg и opp_ft1_tt_avg => `ft2_tt_avg`, а также ft1_tt_avg - ft2_tt_avg => `diff_ft_tt_avg`

tar_graph1_tt_avg и opp_graph2_tt_avg => `graph1_tt_avg`, tar_graph2_tt_avg и opp_graph1_tt_avg => `graph2_tt_avg`, а также graph1_tt_avg - graph2_tt_avg => `diff_graph_tt_avg`

tar_w1_tt_avg и opp_w2_tt_avg => `w1_tt_avg`, tar_w2_tt_avg и opp_w1_tt_avg => `w2_tt_avg`, tar_wx_tt_avg и opp_wx_tt_avg => `wx_tt_avg`, а также w1_tt_avg - w2_tt_avg => `diff_w_tt_avg`

tar_psft_tt_avg и opp_psft_tt_avg => `psft_tt_avg`

In [15]:
# Копируем данные всех игр, и домашних и нет, вперемешку. Это - таргетный датасет.
# Префикс для него - tar_, а суффикс - _tt_.
df_all_target=df_all.copy()
df_all_target.columns=np.hstack([df_all.columns[:3],['tar_'+x for x in df_all.columns[3:]]])

# Точно такой же датасет называем оппонентным.
# Префикс для него - opp_, а суффикс - _tt_.
df_all_opponent=df_all.copy()
df_all_opponent.columns=np.hstack([df_all.columns[:3],['opp_'+x for x in df_all.columns[3:]]])

# Для команд, игравших дома делаем датасет из домашнего (домашний таргетный). 
# Префикс для него - tar_, а суффикс - _ts_.
df_home_target=df_home.copy()
df_home_target.columns=np.hstack([df_home.columns[:3],['tar_'+x.replace('_th_','_ts_') for x in df_home.columns[3:]]])
# Для их противников делаем датасет из гостевого (гостевой оппонентный). 
# Префикс для него - opp_, а суффикс - _ts_.
df_away_opponent=df_away.copy()
df_away_opponent.columns=np.hstack([df_away.columns[:3],['opp_'+x.replace('_ta_','_ts_') for x in df_away.columns[3:]]])

# Для команд, игравших в гостях делаем датасет из гостевого (гостевой таргетный). 
# Префикс для него - tar_, а суффикс - _ts_.
df_away_target=df_away.copy()
df_away_target.columns=np.hstack([df_away.columns[:3],['tar_'+x.replace('_ta_','_ts_') for x in df_away.columns[3:]]])
# Для их противников делаем датасет из домашнего (домашний оппонентный). 
# Префикс для него - opp_, а суффикс - _ts_.
df_home_opponent=df_home.copy()
df_home_opponent.columns=np.hstack([df_home.columns[:3],['opp_'+x.replace('_th_','_ts_') for x in df_home.columns[3:]]])

# Итого есть 6 датасетов с шаблонными названиями переменных:
# tar_???_tt_, opp_???_tt_ - для статсов по всем играм
# tar_???_ts_, opp_???_ts_ - для статсов с учётом дома / в гостях для таргетных команд, игравших дома
# tar_???_ts_, opp_???_ts_ - для статсов с учётом дома / в гостях для таргетных команд, игравших в гостях 

# Делаем два датасета для таргетных домашних и гостевых команд.
# В данные берём лишь информационные поля (по которым потом проводить анализ) - COL_INF, 
# и текущие данные, доступные к сбору перед матчем - COL_CUR. 
df_1=df[df['side']==1][COL_INF+COL_CUR]
df_2=df[df['side']==0][COL_INF+COL_CUR]

# Заполняем данные для таргетных команд, играющих дома:
# Статсы по всем данным для первой команды (таргет)                     tar_???_tt_
df_1=df_1.merge(df_all_target[df_all_target.columns[1:]], left_on=['mid','tid1'], right_on=['mid','tid'], how='left')
# Статсы по всем данным для второй команды (оппонент)                   opp_???_tt_
df_1=df_1.merge(df_all_opponent[df_all_opponent.columns[1:]], left_on=['mid','tid2'], right_on=['mid','tid'], how='left')
# Статсы по всем играм дома для первой команды (таргет домашняя)        tar_???_ts_
df_1=df_1.merge(df_home_target[df_home_target.columns[1:]], left_on=['mid','tid1'], right_on=['mid','tid'], how='left')
# Статсы по всем играм в гостях для второй команды (оппонент гостевая)  opp_???_ts_
df_1=df_1.merge(df_away_opponent[df_away_opponent.columns[1:]], left_on=['mid','tid2'], right_on=['mid','tid'], how='left')

# То же для таргетных команд, играющих в гостях:
# Статсы по всем данным для первой команды (таргет)                     tar_???_tt_
df_2=df_2.merge(df_all_target[df_all_target.columns[1:]], left_on=['mid','tid1'], right_on=['mid','tid'], how='left')
# Статсы по всем данным для второй команды (оппонент)                   opp_???_tt_
df_2=df_2.merge(df_all_opponent[df_all_opponent.columns[1:]], left_on=['mid','tid2'], right_on=['mid','tid'], how='left')
# Статсы по всем играм в гостях для первой команды (таргет гостевая)    tar_???_ts_
df_2=df_2.merge(df_away_target[df_away_target.columns[1:]], left_on=['mid','tid1'], right_on=['mid','tid'], how='left')
# Статсы по всем играм дома для второй команды (оппонент домашняя)      opp_???_ts_
df_2=df_2.merge(df_home_opponent[df_home_opponent.columns[1:]], left_on=['mid','tid2'], right_on=['mid','tid'], how='left')

# Объединяем оба датасета и удаляем дубликаты колонок:
df_=pd.concat([df_1,df_2], axis=0)
df_.drop(columns=['tid_x','tid_y'], inplace=True)

# Очищаем память
del df_all_target,df_all_opponent,df_home_target,df_away_opponent,df_away_target,df_home_opponent,df_1,df_2
gc.collect()

0

# Diffs
COL_PREV=['w1', 'wx', 'w2',  'ht1', 'ht2', 'ft1', 'ft2', 'ps_ht', 'ps_ft', 'graph1','graph2', 'possession1','possession2', 'shont1','shont2', 'shofft1','shofft2', 'corners1','corners2', 'offsides1','offsides2', 'fouls1','fouls2', 'cards1','cards2', 'gksaves1','gksaves2','precision1','precision2']

cols_tar=[x for x in df_.columns if 'tar_' in x]
cols_opp=[x for x in df_.columns if 'opp_' in x]
cols_diff=[x.replace('tar_','_diff_') for x in cols_tar]
df_.reset_index(drop=True, inplace=True)
df_=pd.concat([df_,pd.DataFrame(df_[cols_tar].values-df_[cols_opp].values, columns=cols_diff)], axis=1)
df_['diff_vote12']=df_['vote1']-df_['vote2']
df_['diff_elo']=df_['elo1']-df_['elo2']
df_['diff_op']=df_['oddsprob_home']-df_['oddsprob_away']
df_.to_csv('data/stats_generated1.csv', index=False)

In [16]:
# Diffs
def diff_numbers(df, feature):
    print(feature,', ', end='')
    for scope in ['tt','ts']:
        for typ in ['avg','form']:
            col1=f'tar_{feature}1_{scope}_{typ}'
            col2=f'opp_{feature}2_{scope}_{typ}'
            col_out1=f'{feature}1_{scope}_{typ}'
            df[col_out1]=(df[col1]+df[col2])/2
            col1=f'tar_{feature}2_{scope}_{typ}'
            col2=f'opp_{feature}1_{scope}_{typ}'
            col_out2=f'{feature}2_{scope}_{typ}'
            df[col_out2]=(df[col1]+df[col2])/2
            col_out=f'diff_{feature}_{scope}_{typ}'
            df[col_out]=df[col_out1]-df[col_out2]
    return df

def diff_non_numbers(df, feature):
    print(feature,', ', end='')
    for scope in ['tt','ts']:
        for typ in ['avg','form']:
            'tar_wx_tt_avg'
            col1=f'tar_{feature}_{scope}_{typ}'
            col2=f'opp_{feature}_{scope}_{typ}'
            col_out=f'{feature}_{scope}_{typ}'
            df[col_out]=(df[col1]+df[col2])/2
    return df

features=['w',  'ht', 'ft', 'graph', 'possession', 'shont', 'shofft', 'corners', 'offsides', 'fouls','cards', 'gksaves','precision']
features_non_numbers=['wx',  'psht', 'psft']

df=df_.copy()
for col in features:
    df=diff_numbers(df, col)

print('')
for col in features_non_numbers:
    df=diff_non_numbers(df, col)


w , ht , ft , graph , possession , shont , shofft , corners , offsides , fouls , cards , gksaves , precision , 
wx , psht , psft , 

In [17]:
df['diff_vote12']=df['vote1']-df_['vote2']
df['diff_elo']=df['elo1']-df_['elo2']
df['diff_op']=df['oddsprob1']-df_['oddsprob2']
df.to_csv('data/stats_generated.csv', index=False)

# Teams last stats

In [71]:
df_all=pd.read_csv('data/stats_generated.csv', index_col=None)
df_all=df_all.dropna(subset=['ft1','ft2'])
df_all['ds']=df_all['ds']=pd.to_datetime(df_all['ds'])
df_all=df_all.sort_values(by='ds')
df_last=df_all.drop_duplicates(subset=['side','tid1'], keep='last')
df_last.to_csv('data/stats_teams.csv', index=False)

# Today

In [76]:
from api.data_collector import DataCollector
dp=DataCollector(today=True)
#df=dp.provide_data()
df=dp.provide_today()
df_last=pd.read_csv('data/stats_teams.csv', index_col=None)

93
Empty DataFrame
Columns: [team]
Index: []


In [77]:
df

Unnamed: 0,ht2,country,country_id,ds,ht1,liga,mid,round,ft1,ft2,t1,t2,tid1,tid2,winner,formation_h,formation_a,form1,form2,vote1,votex,vote2,votes,y,pop_r,de,elo1,elo2,op_tid1,op_tid2,odds_away,odds_draw,odds_home,oddsprob1,oddsprobx,oddsprob2,drift1,drift2,driftx,psft,psht,w1,wx,w2,side,awayScoreHT,homeScoreHT,sc1,sc2,oddsprob_home,oddsprob_draw,oddsprob_away,drift_home,drift_away,drift_draw
0,,england,15,2021-01-31 12:00:00+00:00,,premier-league,8897050,21,,,chelsea,burnley,4,3,draw,,,,,0.762910,0.160065,0.077026,27770.0,2021,4.0,2021-01-31,1828.057861,1719.795166,256,135,9.68,5.20,1.35,0.713223,0.186128,0.100649,-0.068265,0.210811,0.081265,0,0,0,0,0,1,,,,,,,,,,
1,,england,15,2021-01-31 14:00:00+00:00,,premier-league,8897051,21,,,leicester city,leeds united,6,796,draw,4-2-3-1,4-1-4-1,14.0,12.0,0.689995,0.206004,0.104001,23019.0,2021,3.0,2021-01-31,1821.147705,1676.518799,239,1503,4.25,4.05,1.81,0.533874,0.238475,0.227651,-0.041660,0.058843,0.025857,0,0,0,0,0,1,,,,,,,,,,
2,,england,15,2021-01-31 16:30:00+00:00,,premier-league,8897027,21,,,west ham united,liverpool,120,181,draw,4-2-3-1,4-3-3,14.0,18.0,0.193081,0.186881,0.620038,28066.0,2021,4.0,2021-01-31,1757.818237,1944.577148,444,1183,1.78,4.04,4.41,0.218530,0.240785,0.540685,-0.091515,0.037856,-0.022932,0,0,0,0,0,1,,,,,,,,,,
3,,england,15,2021-01-31 19:15:00+00:00,,premier-league,8897039,21,,,brighton hove albion,tottenham,609,274,draw,3-5-2,4-2-3-1,9.0,14.0,0.119534,0.192975,0.687491,21609.0,2021,3.0,2021-01-31,1662.046997,1836.129150,984,680,2.28,3.40,3.30,0.291167,0.286855,0.421978,-0.078422,0.078682,-0.059461,0,0,0,0,0,1,,,,,,,,,,
4,,spain,42,2021-01-31 15:15:00+00:00,,laliga,8966464,21,,,cadiz,atletico madrid,940,134,draw,4-4-2,4-4-2,20.0,20.0,0.126301,0.173107,0.700591,22668.0,2021,3.0,2021-01-31,1598.655518,1927.645752,1588,261,,,,,,,,,,0,0,0,0,0,1,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
153,,austria,2,2021-01-31 13:30:00+00:00,,bundesliga,8858506,15,,,wolfsberger ac,skn st polten,330,1033,draw,,,,,0.457300,0.347658,0.195041,1815.0,2021,0.0,2021-01-31,1550.491699,1419.237427,1395,368,,,,,,,,,,0,0,0,0,0,0,,,,,,,,,,
154,,turkey,45,2021-01-31 13:00:00+00:00,,tff-1-lig,8917288,19,,,adana demirspor,boluspor,153,410,draw,,,,,0.509294,0.267658,0.223048,807.0,2021,0.0,2021-01-31,,,6537,4729,,,,,,,,,,0,0,0,0,0,0,,,,,,,,,,
155,,belgium,4,2021-01-31 12:30:00+00:00,,pro-league,8833161,23,,,standard liege,club brugge,270,82,draw,,,,,0.081898,0.254644,0.663458,7375.0,2021,1.0,2021-01-31,1521.353760,1688.555908,562,1355,,,,,,,,,,0,0,0,0,0,0,,,,,,,,,,
156,,germany,20,2021-01-31 13:00:00+00:00,,3-liga,8897913,22,,,fsv zwickau,tsv 1860 munchen,1161,296,draw,,,,,0.052140,0.254796,0.693064,2033.0,2021,0.0,2021-01-31,,,1193,2159,1.84,3.37,4.47,,,,,,,0,0,0,0,0,0,,,,,0.505812,0.280372,0.213816,0.063097,-0.084783,-0.082277


In [79]:
df_last[(df_last['side']==1) & (df_last['tid1']==4)]

Unnamed: 0,side,country,liga,mid,round,ds,t1,t2,tid1,tid2,w1,wx,w2,ft1,ft2,winner,odds_away,odds_draw,odds_home,country_id,round.1,ds.1,de,form1,form2,vote1,votex,vote2,pop_r,elo1,elo2,oddsprob1,oddsprobx,oddsprob2,drift1,drift2,driftx,tar_w1_tt_avg,tar_wx_tt_avg,tar_w2_tt_avg,tar_ht1_tt_avg,tar_ht2_tt_avg,tar_ft1_tt_avg,tar_ft2_tt_avg,tar_psht_tt_avg,tar_psft_tt_avg,tar_vote1_tt_avg,tar_votex_tt_avg,tar_vote2_tt_avg,tar_elo1_tt_avg,tar_elo2_tt_avg,tar_oddsprob1_tt_avg,tar_oddsprobx_tt_avg,tar_oddsprob2_tt_avg,tar_drift1_tt_avg,tar_driftx_tt_avg,tar_drift2_tt_avg,tar_graph1_tt_avg,tar_graph2_tt_avg,tar_possession1_tt_avg,tar_possession2_tt_avg,tar_shont1_tt_avg,tar_shont2_tt_avg,tar_shofft1_tt_avg,tar_shofft2_tt_avg,tar_corners1_tt_avg,tar_corners2_tt_avg,tar_offsides1_tt_avg,tar_offsides2_tt_avg,tar_fouls1_tt_avg,tar_fouls2_tt_avg,tar_cards1_tt_avg,tar_cards2_tt_avg,tar_gksaves1_tt_avg,tar_gksaves2_tt_avg,tar_precision1_tt_avg,tar_precision2_tt_avg,tar_w1_tt_form,tar_wx_tt_form,tar_w2_tt_form,tar_ht1_tt_form,tar_ht2_tt_form,tar_ft1_tt_form,tar_ft2_tt_form,tar_psht_tt_form,tar_psft_tt_form,tar_vote1_tt_form,tar_votex_tt_form,tar_vote2_tt_form,tar_elo1_tt_form,tar_elo2_tt_form,tar_oddsprob1_tt_form,tar_oddsprobx_tt_form,tar_oddsprob2_tt_form,tar_drift1_tt_form,tar_driftx_tt_form,tar_drift2_tt_form,tar_graph1_tt_form,tar_graph2_tt_form,tar_possession1_tt_form,tar_possession2_tt_form,tar_shont1_tt_form,tar_shont2_tt_form,tar_shofft1_tt_form,tar_shofft2_tt_form,tar_corners1_tt_form,tar_corners2_tt_form,tar_offsides1_tt_form,tar_offsides2_tt_form,tar_fouls1_tt_form,tar_fouls2_tt_form,tar_cards1_tt_form,tar_cards2_tt_form,tar_gksaves1_tt_form,tar_gksaves2_tt_form,tar_precision1_tt_form,tar_precision2_tt_form,opp_w1_tt_avg,opp_wx_tt_avg,opp_w2_tt_avg,opp_ht1_tt_avg,opp_ht2_tt_avg,opp_ft1_tt_avg,opp_ft2_tt_avg,opp_psht_tt_avg,opp_psft_tt_avg,opp_vote1_tt_avg,opp_votex_tt_avg,opp_vote2_tt_avg,opp_elo1_tt_avg,opp_elo2_tt_avg,opp_oddsprob1_tt_avg,opp_oddsprobx_tt_avg,opp_oddsprob2_tt_avg,opp_drift1_tt_avg,opp_driftx_tt_avg,opp_drift2_tt_avg,opp_graph1_tt_avg,opp_graph2_tt_avg,opp_possession1_tt_avg,opp_possession2_tt_avg,opp_shont1_tt_avg,opp_shont2_tt_avg,opp_shofft1_tt_avg,opp_shofft2_tt_avg,opp_corners1_tt_avg,opp_corners2_tt_avg,opp_offsides1_tt_avg,opp_offsides2_tt_avg,opp_fouls1_tt_avg,opp_fouls2_tt_avg,opp_cards1_tt_avg,opp_cards2_tt_avg,opp_gksaves1_tt_avg,opp_gksaves2_tt_avg,opp_precision1_tt_avg,opp_precision2_tt_avg,opp_w1_tt_form,opp_wx_tt_form,opp_w2_tt_form,opp_ht1_tt_form,opp_ht2_tt_form,opp_ft1_tt_form,opp_ft2_tt_form,opp_psht_tt_form,opp_psft_tt_form,opp_vote1_tt_form,opp_votex_tt_form,opp_vote2_tt_form,opp_elo1_tt_form,opp_elo2_tt_form,opp_oddsprob1_tt_form,opp_oddsprobx_tt_form,opp_oddsprob2_tt_form,opp_drift1_tt_form,opp_driftx_tt_form,opp_drift2_tt_form,opp_graph1_tt_form,opp_graph2_tt_form,opp_possession1_tt_form,opp_possession2_tt_form,opp_shont1_tt_form,opp_shont2_tt_form,opp_shofft1_tt_form,opp_shofft2_tt_form,opp_corners1_tt_form,opp_corners2_tt_form,opp_offsides1_tt_form,opp_offsides2_tt_form,opp_fouls1_tt_form,opp_fouls2_tt_form,opp_cards1_tt_form,opp_cards2_tt_form,opp_gksaves1_tt_form,opp_gksaves2_tt_form,opp_precision1_tt_form,opp_precision2_tt_form,tar_w1_ts_avg,tar_wx_ts_avg,tar_w2_ts_avg,tar_ht1_ts_avg,tar_ht2_ts_avg,tar_ft1_ts_avg,tar_ft2_ts_avg,tar_psht_ts_avg,tar_psft_ts_avg,tar_vote1_ts_avg,tar_votex_ts_avg,tar_vote2_ts_avg,tar_elo1_ts_avg,tar_elo2_ts_avg,tar_oddsprob1_ts_avg,tar_oddsprobx_ts_avg,tar_oddsprob2_ts_avg,tar_drift1_ts_avg,tar_driftx_ts_avg,tar_drift2_ts_avg,tar_graph1_ts_avg,tar_graph2_ts_avg,tar_possession1_ts_avg,tar_possession2_ts_avg,tar_shont1_ts_avg,tar_shont2_ts_avg,tar_shofft1_ts_avg,tar_shofft2_ts_avg,tar_corners1_ts_avg,tar_corners2_ts_avg,tar_offsides1_ts_avg,tar_offsides2_ts_avg,tar_fouls1_ts_avg,tar_fouls2_ts_avg,tar_cards1_ts_avg,tar_cards2_ts_avg,tar_gksaves1_ts_avg,tar_gksaves2_ts_avg,tar_precision1_ts_avg,tar_precision2_ts_avg,tar_w1_ts_form,tar_wx_ts_form,tar_w2_ts_form,tar_ht1_ts_form,tar_ht2_ts_form,tar_ft1_ts_form,tar_ft2_ts_form,tar_psht_ts_form,tar_psft_ts_form,tar_vote1_ts_form,tar_votex_ts_form,tar_vote2_ts_form,tar_elo1_ts_form,tar_elo2_ts_form,tar_oddsprob1_ts_form,tar_oddsprobx_ts_form,tar_oddsprob2_ts_form,tar_drift1_ts_form,tar_driftx_ts_form,tar_drift2_ts_form,tar_graph1_ts_form,tar_graph2_ts_form,tar_possession1_ts_form,tar_possession2_ts_form,tar_shont1_ts_form,tar_shont2_ts_form,tar_shofft1_ts_form,tar_shofft2_ts_form,tar_corners1_ts_form,tar_corners2_ts_form,tar_offsides1_ts_form,tar_offsides2_ts_form,tar_fouls1_ts_form,tar_fouls2_ts_form,tar_cards1_ts_form,tar_cards2_ts_form,tar_gksaves1_ts_form,tar_gksaves2_ts_form,tar_precision1_ts_form,tar_precision2_ts_form,opp_w1_ts_avg,opp_wx_ts_avg,opp_w2_ts_avg,opp_ht1_ts_avg,opp_ht2_ts_avg,opp_ft1_ts_avg,opp_ft2_ts_avg,opp_psht_ts_avg,opp_psft_ts_avg,opp_vote1_ts_avg,opp_votex_ts_avg,opp_vote2_ts_avg,opp_elo1_ts_avg,opp_elo2_ts_avg,opp_oddsprob1_ts_avg,opp_oddsprobx_ts_avg,opp_oddsprob2_ts_avg,opp_drift1_ts_avg,opp_driftx_ts_avg,opp_drift2_ts_avg,opp_graph1_ts_avg,opp_graph2_ts_avg,opp_possession1_ts_avg,opp_possession2_ts_avg,opp_shont1_ts_avg,opp_shont2_ts_avg,opp_shofft1_ts_avg,opp_shofft2_ts_avg,opp_corners1_ts_avg,opp_corners2_ts_avg,opp_offsides1_ts_avg,opp_offsides2_ts_avg,opp_fouls1_ts_avg,opp_fouls2_ts_avg,opp_cards1_ts_avg,opp_cards2_ts_avg,opp_gksaves1_ts_avg,opp_gksaves2_ts_avg,opp_precision1_ts_avg,opp_precision2_ts_avg,opp_w1_ts_form,opp_wx_ts_form,opp_w2_ts_form,opp_ht1_ts_form,opp_ht2_ts_form,opp_ft1_ts_form,opp_ft2_ts_form,opp_psht_ts_form,opp_psft_ts_form,opp_vote1_ts_form,opp_votex_ts_form,opp_vote2_ts_form,opp_elo1_ts_form,opp_elo2_ts_form,opp_oddsprob1_ts_form,opp_oddsprobx_ts_form,opp_oddsprob2_ts_form,opp_drift1_ts_form,opp_driftx_ts_form,opp_drift2_ts_form,opp_graph1_ts_form,opp_graph2_ts_form,opp_possession1_ts_form,opp_possession2_ts_form,opp_shont1_ts_form,opp_shont2_ts_form,opp_shofft1_ts_form,opp_shofft2_ts_form,opp_corners1_ts_form,opp_corners2_ts_form,opp_offsides1_ts_form,opp_offsides2_ts_form,opp_fouls1_ts_form,opp_fouls2_ts_form,opp_cards1_ts_form,opp_cards2_ts_form,opp_gksaves1_ts_form,opp_gksaves2_ts_form,opp_precision1_ts_form,opp_precision2_ts_form,w1_tt_avg,w2_tt_avg,diff_w_tt_avg,w1_tt_form,w2_tt_form,diff_w_tt_form,w1_ts_avg,w2_ts_avg,diff_w_ts_avg,w1_ts_form,w2_ts_form,diff_w_ts_form,ht1_tt_avg,ht2_tt_avg,diff_ht_tt_avg,ht1_tt_form,ht2_tt_form,diff_ht_tt_form,ht1_ts_avg,ht2_ts_avg,diff_ht_ts_avg,ht1_ts_form,ht2_ts_form,diff_ht_ts_form,ft1_tt_avg,ft2_tt_avg,diff_ft_tt_avg,ft1_tt_form,ft2_tt_form,diff_ft_tt_form,ft1_ts_avg,ft2_ts_avg,diff_ft_ts_avg,ft1_ts_form,ft2_ts_form,diff_ft_ts_form,graph1_tt_avg,graph2_tt_avg,diff_graph_tt_avg,graph1_tt_form,graph2_tt_form,diff_graph_tt_form,graph1_ts_avg,graph2_ts_avg,diff_graph_ts_avg,graph1_ts_form,graph2_ts_form,diff_graph_ts_form,possession1_tt_avg,possession2_tt_avg,diff_possession_tt_avg,possession1_tt_form,possession2_tt_form,diff_possession_tt_form,possession1_ts_avg,possession2_ts_avg,diff_possession_ts_avg,possession1_ts_form,possession2_ts_form,diff_possession_ts_form,shont1_tt_avg,shont2_tt_avg,diff_shont_tt_avg,shont1_tt_form,shont2_tt_form,diff_shont_tt_form,shont1_ts_avg,shont2_ts_avg,diff_shont_ts_avg,shont1_ts_form,shont2_ts_form,diff_shont_ts_form,shofft1_tt_avg,shofft2_tt_avg,diff_shofft_tt_avg,shofft1_tt_form,shofft2_tt_form,diff_shofft_tt_form,shofft1_ts_avg,shofft2_ts_avg,diff_shofft_ts_avg,shofft1_ts_form,shofft2_ts_form,diff_shofft_ts_form,corners1_tt_avg,corners2_tt_avg,diff_corners_tt_avg,corners1_tt_form,corners2_tt_form,diff_corners_tt_form,corners1_ts_avg,corners2_ts_avg,diff_corners_ts_avg,corners1_ts_form,corners2_ts_form,diff_corners_ts_form,offsides1_tt_avg,offsides2_tt_avg,diff_offsides_tt_avg,offsides1_tt_form,offsides2_tt_form,diff_offsides_tt_form,offsides1_ts_avg,offsides2_ts_avg,diff_offsides_ts_avg,offsides1_ts_form,offsides2_ts_form,diff_offsides_ts_form,fouls1_tt_avg,fouls2_tt_avg,diff_fouls_tt_avg,fouls1_tt_form,fouls2_tt_form,diff_fouls_tt_form,fouls1_ts_avg,fouls2_ts_avg,diff_fouls_ts_avg,fouls1_ts_form,fouls2_ts_form,diff_fouls_ts_form,cards1_tt_avg,cards2_tt_avg,diff_cards_tt_avg,cards1_tt_form,cards2_tt_form,diff_cards_tt_form,cards1_ts_avg,cards2_ts_avg,diff_cards_ts_avg,cards1_ts_form,cards2_ts_form,diff_cards_ts_form,gksaves1_tt_avg,gksaves2_tt_avg,diff_gksaves_tt_avg,gksaves1_tt_form,gksaves2_tt_form,diff_gksaves_tt_form,gksaves1_ts_avg,gksaves2_ts_avg,diff_gksaves_ts_avg,gksaves1_ts_form,gksaves2_ts_form,diff_gksaves_ts_form,precision1_tt_avg,precision2_tt_avg,diff_precision_tt_avg,precision1_tt_form,precision2_tt_form,diff_precision_tt_form,precision1_ts_avg,precision2_ts_avg,diff_precision_ts_avg,precision1_ts_form,precision2_ts_form,diff_precision_ts_form,wx_tt_avg,wx_tt_form,wx_ts_avg,wx_ts_form,psht_tt_avg,psht_tt_form,psht_ts_avg,psht_ts_form,psft_tt_avg,psft_tt_form,psft_ts_avg,psft_ts_form,diff_vote12,diff_elo,diff_op
3131,1,england,fa-cup,9278377,10,2021-01-24 12:00:00+00:00,chelsea,luton town,4,523,1,0,0,3.0,1.0,home,,,,15,10,2021-01-24 12:00:00+00:00,2021-01-24,14,23,0.777233,0.14729,0.075477,4.0,,,,,,,,,0.56875,0.19375,0.2375,0.84375,0.475,1.809375,1.071875,0.36875,0.7375,0.637711,0.153223,0.209065,1615.504652,1473.781758,,,,,,,0.520605,0.45127,0.524594,0.387906,0.197625,0.127875,0.173276,0.12694,0.211058,0.154207,0.126953,0.10293,0.144922,0.16144,0.137216,0.180398,0.10253,0.15506,0.069683,0.073699,0.017094,0.025641,-0.042735,-0.188034,0.0,-0.598291,-0.196581,-0.188034,-0.401709,0.142783,0.045273,-0.188056,99.445946,96.333838,0.233744,0.108801,0.04207,-0.032167,-0.002899,0.052207,0.079482,0.023082,0.035385,0.015897,-0.000684,-0.026325,-0.075744,-0.039198,0.011506,-0.04931,-0.032051,-0.060897,0.081349,0.070055,0.083916,-0.055167,-0.025641,0.02849,-0.000768,-0.020154,0.308642,0.222222,0.469136,0.506173,0.703704,1.111111,1.592593,-0.197531,-0.481481,0.228608,0.27897,0.492422,1239.401789,1338.833638,,,,,,,0.523748,0.463906,0.429136,0.472099,0.112593,0.159012,0.120903,0.165177,0.17094,0.207502,0.121914,0.098765,0.18761,0.210758,0.13468,0.118967,0.131099,0.089359,0.08435,0.09117,-0.445693,0.074906,0.370787,-0.617978,-0.17603,-0.872659,0.11236,-0.441948,-0.985019,-0.050833,-0.00356,0.054393,45.99404,-524.523556,,,,,,,0.016975,0.012987,3.7e-05,0.041161,0.00779,-0.039101,-0.026217,-0.013431,-0.007058,-0.067128,-0.041901,0.180478,0.035915,-0.012841,0.042901,0.019748,-0.037453,0.047084,-0.033736,-0.033955,0.621951,0.219512,0.158537,0.939024,0.378049,1.987805,0.908537,0.560976,1.079268,0.665648,0.147763,0.186589,1576.096386,1450.743095,,,,,,,0.573199,0.396313,0.527866,0.398963,0.224878,0.119268,0.183558,0.120269,0.238274,0.155019,0.126143,0.102896,0.144926,0.171167,0.131375,0.186807,0.098432,0.179733,0.061417,0.07648,-0.324561,0.429825,-0.105263,0.140351,0.324561,-0.377193,0.368421,-0.184211,-0.745614,-0.159116,0.057086,0.102029,1220.276661,1288.538502,,,,,,,-0.032186,0.032186,-0.036491,0.062807,-0.015088,0.000351,0.019964,-0.062311,-0.009784,-0.026316,0.021382,0.057018,0.013158,-0.058584,0.008772,-0.022329,-0.005013,-0.002506,-0.002644,-0.030285,0.282051,0.102564,0.615385,0.358974,0.846154,0.820513,1.717949,-0.487179,-0.897436,0.107007,0.26801,0.624984,1306.206574,1390.033419,,,,,,,0.566929,0.433071,0.421538,0.450256,0.085128,0.148718,0.106985,0.167109,0.142012,0.205128,0.105769,0.102564,0.195055,0.20467,0.153846,0.097902,0.120879,0.06105,0.087783,0.077286,-0.264151,-0.311321,0.575472,-0.147799,0.264151,-0.075472,0.490566,-0.41195,-0.566038,-0.156876,0.001644,0.155233,,,,,,,,,0.013689,0.005179,0.06544,0.038333,-0.024277,-0.011321,0.014314,0.005205,0.023585,-0.031567,0.07842,-0.024568,0.074012,0.054638,0.114637,0.061464,-0.027254,0.004642,0.026713,0.023428,0.518943,0.273071,0.245872,0.19394,-0.244214,0.438154,0.618668,0.220294,0.398374,0.125455,-0.184707,0.310162,0.773727,0.490586,0.28314,-0.182032,-0.308989,0.126957,0.892589,0.368512,0.524078,0.202251,0.088381,0.11387,1.700984,1.091493,0.609491,-0.242966,-0.53462,0.291655,1.852877,0.864525,0.988352,0.056687,0.146475,-0.089788,0.492255,0.487509,0.004746,0.046235,0.020029,0.026206,0.503135,0.481621,0.021514,-0.013503,0.022937,-0.036441,0.498346,0.408521,0.089825,0.038273,0.007967,0.030305,0.489061,0.410251,0.07881,0.000921,0.064124,-0.063203,0.178319,0.120234,0.058085,-0.019892,-0.009267,-0.010625,0.186798,0.102198,0.0846,-0.013204,-0.011963,-0.001241,0.169226,0.123921,0.045305,-0.044588,-0.032708,-0.01188,0.175333,0.113627,0.061706,0.012584,-0.023999,0.036583,0.20928,0.162573,0.046707,-0.027811,-0.028184,0.000373,0.221701,0.148515,0.073186,-0.020676,-0.001365,-0.01931,0.112859,0.112422,0.000438,0.074213,-0.051399,0.125612,0.114354,0.104333,0.010021,-0.001593,0.067719,-0.069312,0.17784,0.174525,0.003315,0.034254,0.052985,-0.018731,0.174798,0.183111,-0.008313,0.033898,0.007714,0.026184,0.128092,0.157539,-0.029447,0.051832,-0.006133,0.057965,0.114638,0.170327,-0.055688,0.035118,0.046154,-0.011036,0.095944,0.143079,-0.047135,0.010721,-0.004482,0.015203,0.079741,0.150306,-0.070565,-0.000185,-0.01488,0.014695,0.080427,0.079025,0.001402,-0.017362,-0.026945,0.009583,0.069351,0.082131,-0.01278,0.010392,-0.001786,0.012178,0.207986,0.050274,0.161038,0.059252,0.08561,-0.314991,0.036898,-0.29808,0.128009,-0.693364,0.090916,-0.655826,0.701756,,


In [80]:
df_last[(df_last['side']==0) & (df_last['tid1']==3)]

Unnamed: 0,side,country,liga,mid,round,ds,t1,t2,tid1,tid2,w1,wx,w2,ft1,ft2,winner,odds_away,odds_draw,odds_home,country_id,round.1,ds.1,de,form1,form2,vote1,votex,vote2,pop_r,elo1,elo2,oddsprob1,oddsprobx,oddsprob2,drift1,drift2,driftx,tar_w1_tt_avg,tar_wx_tt_avg,tar_w2_tt_avg,tar_ht1_tt_avg,tar_ht2_tt_avg,tar_ft1_tt_avg,tar_ft2_tt_avg,tar_psht_tt_avg,tar_psft_tt_avg,tar_vote1_tt_avg,tar_votex_tt_avg,tar_vote2_tt_avg,tar_elo1_tt_avg,tar_elo2_tt_avg,tar_oddsprob1_tt_avg,tar_oddsprobx_tt_avg,tar_oddsprob2_tt_avg,tar_drift1_tt_avg,tar_driftx_tt_avg,tar_drift2_tt_avg,tar_graph1_tt_avg,tar_graph2_tt_avg,tar_possession1_tt_avg,tar_possession2_tt_avg,tar_shont1_tt_avg,tar_shont2_tt_avg,tar_shofft1_tt_avg,tar_shofft2_tt_avg,tar_corners1_tt_avg,tar_corners2_tt_avg,tar_offsides1_tt_avg,tar_offsides2_tt_avg,tar_fouls1_tt_avg,tar_fouls2_tt_avg,tar_cards1_tt_avg,tar_cards2_tt_avg,tar_gksaves1_tt_avg,tar_gksaves2_tt_avg,tar_precision1_tt_avg,tar_precision2_tt_avg,tar_w1_tt_form,tar_wx_tt_form,tar_w2_tt_form,tar_ht1_tt_form,tar_ht2_tt_form,tar_ft1_tt_form,tar_ft2_tt_form,tar_psht_tt_form,tar_psft_tt_form,tar_vote1_tt_form,tar_votex_tt_form,tar_vote2_tt_form,tar_elo1_tt_form,tar_elo2_tt_form,tar_oddsprob1_tt_form,tar_oddsprobx_tt_form,tar_oddsprob2_tt_form,tar_drift1_tt_form,tar_driftx_tt_form,tar_drift2_tt_form,tar_graph1_tt_form,tar_graph2_tt_form,tar_possession1_tt_form,tar_possession2_tt_form,tar_shont1_tt_form,tar_shont2_tt_form,tar_shofft1_tt_form,tar_shofft2_tt_form,tar_corners1_tt_form,tar_corners2_tt_form,tar_offsides1_tt_form,tar_offsides2_tt_form,tar_fouls1_tt_form,tar_fouls2_tt_form,tar_cards1_tt_form,tar_cards2_tt_form,tar_gksaves1_tt_form,tar_gksaves2_tt_form,tar_precision1_tt_form,tar_precision2_tt_form,opp_w1_tt_avg,opp_wx_tt_avg,opp_w2_tt_avg,opp_ht1_tt_avg,opp_ht2_tt_avg,opp_ft1_tt_avg,opp_ft2_tt_avg,opp_psht_tt_avg,opp_psft_tt_avg,opp_vote1_tt_avg,opp_votex_tt_avg,opp_vote2_tt_avg,opp_elo1_tt_avg,opp_elo2_tt_avg,opp_oddsprob1_tt_avg,opp_oddsprobx_tt_avg,opp_oddsprob2_tt_avg,opp_drift1_tt_avg,opp_driftx_tt_avg,opp_drift2_tt_avg,opp_graph1_tt_avg,opp_graph2_tt_avg,opp_possession1_tt_avg,opp_possession2_tt_avg,opp_shont1_tt_avg,opp_shont2_tt_avg,opp_shofft1_tt_avg,opp_shofft2_tt_avg,opp_corners1_tt_avg,opp_corners2_tt_avg,opp_offsides1_tt_avg,opp_offsides2_tt_avg,opp_fouls1_tt_avg,opp_fouls2_tt_avg,opp_cards1_tt_avg,opp_cards2_tt_avg,opp_gksaves1_tt_avg,opp_gksaves2_tt_avg,opp_precision1_tt_avg,opp_precision2_tt_avg,opp_w1_tt_form,opp_wx_tt_form,opp_w2_tt_form,opp_ht1_tt_form,opp_ht2_tt_form,opp_ft1_tt_form,opp_ft2_tt_form,opp_psht_tt_form,opp_psft_tt_form,opp_vote1_tt_form,opp_votex_tt_form,opp_vote2_tt_form,opp_elo1_tt_form,opp_elo2_tt_form,opp_oddsprob1_tt_form,opp_oddsprobx_tt_form,opp_oddsprob2_tt_form,opp_drift1_tt_form,opp_driftx_tt_form,opp_drift2_tt_form,opp_graph1_tt_form,opp_graph2_tt_form,opp_possession1_tt_form,opp_possession2_tt_form,opp_shont1_tt_form,opp_shont2_tt_form,opp_shofft1_tt_form,opp_shofft2_tt_form,opp_corners1_tt_form,opp_corners2_tt_form,opp_offsides1_tt_form,opp_offsides2_tt_form,opp_fouls1_tt_form,opp_fouls2_tt_form,opp_cards1_tt_form,opp_cards2_tt_form,opp_gksaves1_tt_form,opp_gksaves2_tt_form,opp_precision1_tt_form,opp_precision2_tt_form,tar_w1_ts_avg,tar_wx_ts_avg,tar_w2_ts_avg,tar_ht1_ts_avg,tar_ht2_ts_avg,tar_ft1_ts_avg,tar_ft2_ts_avg,tar_psht_ts_avg,tar_psft_ts_avg,tar_vote1_ts_avg,tar_votex_ts_avg,tar_vote2_ts_avg,tar_elo1_ts_avg,tar_elo2_ts_avg,tar_oddsprob1_ts_avg,tar_oddsprobx_ts_avg,tar_oddsprob2_ts_avg,tar_drift1_ts_avg,tar_driftx_ts_avg,tar_drift2_ts_avg,tar_graph1_ts_avg,tar_graph2_ts_avg,tar_possession1_ts_avg,tar_possession2_ts_avg,tar_shont1_ts_avg,tar_shont2_ts_avg,tar_shofft1_ts_avg,tar_shofft2_ts_avg,tar_corners1_ts_avg,tar_corners2_ts_avg,tar_offsides1_ts_avg,tar_offsides2_ts_avg,tar_fouls1_ts_avg,tar_fouls2_ts_avg,tar_cards1_ts_avg,tar_cards2_ts_avg,tar_gksaves1_ts_avg,tar_gksaves2_ts_avg,tar_precision1_ts_avg,tar_precision2_ts_avg,tar_w1_ts_form,tar_wx_ts_form,tar_w2_ts_form,tar_ht1_ts_form,tar_ht2_ts_form,tar_ft1_ts_form,tar_ft2_ts_form,tar_psht_ts_form,tar_psft_ts_form,tar_vote1_ts_form,tar_votex_ts_form,tar_vote2_ts_form,tar_elo1_ts_form,tar_elo2_ts_form,tar_oddsprob1_ts_form,tar_oddsprobx_ts_form,tar_oddsprob2_ts_form,tar_drift1_ts_form,tar_driftx_ts_form,tar_drift2_ts_form,tar_graph1_ts_form,tar_graph2_ts_form,tar_possession1_ts_form,tar_possession2_ts_form,tar_shont1_ts_form,tar_shont2_ts_form,tar_shofft1_ts_form,tar_shofft2_ts_form,tar_corners1_ts_form,tar_corners2_ts_form,tar_offsides1_ts_form,tar_offsides2_ts_form,tar_fouls1_ts_form,tar_fouls2_ts_form,tar_cards1_ts_form,tar_cards2_ts_form,tar_gksaves1_ts_form,tar_gksaves2_ts_form,tar_precision1_ts_form,tar_precision2_ts_form,opp_w1_ts_avg,opp_wx_ts_avg,opp_w2_ts_avg,opp_ht1_ts_avg,opp_ht2_ts_avg,opp_ft1_ts_avg,opp_ft2_ts_avg,opp_psht_ts_avg,opp_psft_ts_avg,opp_vote1_ts_avg,opp_votex_ts_avg,opp_vote2_ts_avg,opp_elo1_ts_avg,opp_elo2_ts_avg,opp_oddsprob1_ts_avg,opp_oddsprobx_ts_avg,opp_oddsprob2_ts_avg,opp_drift1_ts_avg,opp_driftx_ts_avg,opp_drift2_ts_avg,opp_graph1_ts_avg,opp_graph2_ts_avg,opp_possession1_ts_avg,opp_possession2_ts_avg,opp_shont1_ts_avg,opp_shont2_ts_avg,opp_shofft1_ts_avg,opp_shofft2_ts_avg,opp_corners1_ts_avg,opp_corners2_ts_avg,opp_offsides1_ts_avg,opp_offsides2_ts_avg,opp_fouls1_ts_avg,opp_fouls2_ts_avg,opp_cards1_ts_avg,opp_cards2_ts_avg,opp_gksaves1_ts_avg,opp_gksaves2_ts_avg,opp_precision1_ts_avg,opp_precision2_ts_avg,opp_w1_ts_form,opp_wx_ts_form,opp_w2_ts_form,opp_ht1_ts_form,opp_ht2_ts_form,opp_ft1_ts_form,opp_ft2_ts_form,opp_psht_ts_form,opp_psft_ts_form,opp_vote1_ts_form,opp_votex_ts_form,opp_vote2_ts_form,opp_elo1_ts_form,opp_elo2_ts_form,opp_oddsprob1_ts_form,opp_oddsprobx_ts_form,opp_oddsprob2_ts_form,opp_drift1_ts_form,opp_driftx_ts_form,opp_drift2_ts_form,opp_graph1_ts_form,opp_graph2_ts_form,opp_possession1_ts_form,opp_possession2_ts_form,opp_shont1_ts_form,opp_shont2_ts_form,opp_shofft1_ts_form,opp_shofft2_ts_form,opp_corners1_ts_form,opp_corners2_ts_form,opp_offsides1_ts_form,opp_offsides2_ts_form,opp_fouls1_ts_form,opp_fouls2_ts_form,opp_cards1_ts_form,opp_cards2_ts_form,opp_gksaves1_ts_form,opp_gksaves2_ts_form,opp_precision1_ts_form,opp_precision2_ts_form,w1_tt_avg,w2_tt_avg,diff_w_tt_avg,w1_tt_form,w2_tt_form,diff_w_tt_form,w1_ts_avg,w2_ts_avg,diff_w_ts_avg,w1_ts_form,w2_ts_form,diff_w_ts_form,ht1_tt_avg,ht2_tt_avg,diff_ht_tt_avg,ht1_tt_form,ht2_tt_form,diff_ht_tt_form,ht1_ts_avg,ht2_ts_avg,diff_ht_ts_avg,ht1_ts_form,ht2_ts_form,diff_ht_ts_form,ft1_tt_avg,ft2_tt_avg,diff_ft_tt_avg,ft1_tt_form,ft2_tt_form,diff_ft_tt_form,ft1_ts_avg,ft2_ts_avg,diff_ft_ts_avg,ft1_ts_form,ft2_ts_form,diff_ft_ts_form,graph1_tt_avg,graph2_tt_avg,diff_graph_tt_avg,graph1_tt_form,graph2_tt_form,diff_graph_tt_form,graph1_ts_avg,graph2_ts_avg,diff_graph_ts_avg,graph1_ts_form,graph2_ts_form,diff_graph_ts_form,possession1_tt_avg,possession2_tt_avg,diff_possession_tt_avg,possession1_tt_form,possession2_tt_form,diff_possession_tt_form,possession1_ts_avg,possession2_ts_avg,diff_possession_ts_avg,possession1_ts_form,possession2_ts_form,diff_possession_ts_form,shont1_tt_avg,shont2_tt_avg,diff_shont_tt_avg,shont1_tt_form,shont2_tt_form,diff_shont_tt_form,shont1_ts_avg,shont2_ts_avg,diff_shont_ts_avg,shont1_ts_form,shont2_ts_form,diff_shont_ts_form,shofft1_tt_avg,shofft2_tt_avg,diff_shofft_tt_avg,shofft1_tt_form,shofft2_tt_form,diff_shofft_tt_form,shofft1_ts_avg,shofft2_ts_avg,diff_shofft_ts_avg,shofft1_ts_form,shofft2_ts_form,diff_shofft_ts_form,corners1_tt_avg,corners2_tt_avg,diff_corners_tt_avg,corners1_tt_form,corners2_tt_form,diff_corners_tt_form,corners1_ts_avg,corners2_ts_avg,diff_corners_ts_avg,corners1_ts_form,corners2_ts_form,diff_corners_ts_form,offsides1_tt_avg,offsides2_tt_avg,diff_offsides_tt_avg,offsides1_tt_form,offsides2_tt_form,diff_offsides_tt_form,offsides1_ts_avg,offsides2_ts_avg,diff_offsides_ts_avg,offsides1_ts_form,offsides2_ts_form,diff_offsides_ts_form,fouls1_tt_avg,fouls2_tt_avg,diff_fouls_tt_avg,fouls1_tt_form,fouls2_tt_form,diff_fouls_tt_form,fouls1_ts_avg,fouls2_ts_avg,diff_fouls_ts_avg,fouls1_ts_form,fouls2_ts_form,diff_fouls_ts_form,cards1_tt_avg,cards2_tt_avg,diff_cards_tt_avg,cards1_tt_form,cards2_tt_form,diff_cards_tt_form,cards1_ts_avg,cards2_ts_avg,diff_cards_ts_avg,cards1_ts_form,cards2_ts_form,diff_cards_ts_form,gksaves1_tt_avg,gksaves2_tt_avg,diff_gksaves_tt_avg,gksaves1_tt_form,gksaves2_tt_form,diff_gksaves_tt_form,gksaves1_ts_avg,gksaves2_ts_avg,diff_gksaves_ts_avg,gksaves1_ts_form,gksaves2_ts_form,diff_gksaves_ts_form,precision1_tt_avg,precision2_tt_avg,diff_precision_tt_avg,precision1_tt_form,precision2_tt_form,diff_precision_tt_form,precision1_ts_avg,precision2_ts_avg,diff_precision_ts_avg,precision1_ts_form,precision2_ts_form,diff_precision_ts_form,wx_tt_avg,wx_tt_form,wx_ts_avg,wx_ts_form,psht_tt_avg,psht_tt_form,psht_ts_avg,psht_ts_form,psft_tt_avg,psft_tt_form,psft_ts_avg,psft_ts_form,diff_vote12,diff_elo,diff_op
3188,0,england,fa-cup,9278376,10,2021-01-24 14:30:00+00:00,burnley,fulham,3,875,1,0,0,3.0,0.0,away,,,,15,10,2021-01-24 14:30:00+00:00,2021-01-24,20,1,0.239376,0.298099,0.462525,3.0,1712.234131,1602.060181,,,,,,,0.339535,0.232558,0.427907,0.465116,0.576744,1.018605,1.334884,-0.111628,-0.316279,0.259615,0.214903,0.525482,1622.16396,1658.191253,,,,,,,0.521452,0.436687,0.364884,0.495581,0.118326,0.168744,0.122855,0.168404,0.142218,0.211628,0.136628,0.084884,0.151661,0.15814,0.149683,0.124313,0.143079,0.096124,0.07524,0.081294,-0.341463,0.422764,-0.081301,-0.252033,-0.276423,-0.837398,-0.626016,0.02439,-0.211382,0.027136,0.010521,-0.037656,,284.184215,,,,,,,-0.129159,-0.106613,,,,,,,,,,,,,,,,,,,0.415094,0.201258,0.383648,0.559748,0.610063,1.27044,1.339623,-0.050314,-0.069182,0.396243,0.237777,0.36598,1491.36716,1529.075142,,,,,,,0.541453,0.458547,0.512013,0.399937,0.162013,0.161258,0.165691,0.142919,0.176101,0.176826,0.09945,0.15173,0.177224,0.174753,0.16295,0.148085,0.134172,0.133573,0.076053,0.074065,0.684211,-0.210526,-0.473684,0.491228,-0.192982,0.526316,-1.105263,0.684211,1.631579,0.032299,0.101691,-0.13399,-31.964004,-77.311984,,,,,,,-0.011559,0.046647,0.031579,0.038596,-0.030877,-0.06807,-0.030853,0.098004,-0.083671,-0.029015,-0.060307,-0.013158,0.118734,0.191416,-0.08453,0.154705,-0.046784,-0.033417,0.120164,-0.015264,0.254545,0.263636,0.481818,0.390909,0.709091,0.9,1.545455,-0.318182,-0.645455,0.181753,0.20534,0.612907,1607.161152,1642.881127,,,,,,,0.564065,0.39048,0.346636,0.498818,0.105091,0.186182,0.115361,0.188088,0.125874,0.234615,0.125568,0.088068,0.149026,0.150974,0.157025,0.119835,0.15671,0.083117,0.073894,0.076142,-0.146341,-0.292683,0.439024,-0.105691,0.813008,0.073171,0.934959,-0.918699,-0.861789,0.085832,0.004069,-0.089901,,,,,,,,,0.009418,-0.009418,,,,,,,,,,,,,,,,,,,0.506173,0.148148,0.345679,0.666667,0.555556,1.493827,1.259259,0.111111,0.234568,0.479441,0.22617,0.294389,1493.112401,1552.586907,,,,,,,0.553617,0.446383,0.51716,0.408765,0.182222,0.152593,0.185611,0.136228,0.198955,0.156695,0.104167,0.156636,0.178571,0.171737,0.163861,0.152637,0.123457,0.146972,0.084061,0.074539,0.166667,0.0,-0.166667,0.166667,-0.333333,0.166667,-0.166667,0.5,0.333333,-0.027378,-0.020837,0.048215,,,-0.037541,-0.022538,0.060078,0.001379,0.032582,-0.021212,-0.019272,0.019272,0.081667,0.085,0.006667,0.093333,0.017241,0.04023,-0.00641,0.044872,0.125,0.010417,0.032738,0.026786,0.030303,-2.775558e-17,0.103175,-0.007937,-0.017974,-0.00077,0.361591,0.421501,-0.059909,-0.407574,0.301455,-0.709029,0.300112,0.493996,-0.193883,-0.156504,0.302846,-0.45935,0.53759,0.568246,-0.030657,-0.222507,0.107403,-0.32991,0.473232,0.687879,-0.214646,-0.219512,0.489837,-0.70935,1.179114,1.302662,-0.123548,-0.971331,-0.04985,-0.921481,1.07963,1.519641,-0.440011,-0.046748,0.550813,-0.597561,0.49,0.48907,0.000929,-0.041256,-0.059086,0.01783,0.505224,0.472048,0.033176,0.014345,-0.014345,0.028691,0.38241,0.503797,-0.121387,,,,0.377701,0.507989,-0.130288,,,,0.139792,0.165378,-0.025587,,,,0.128842,0.184202,-0.05536,,,,0.132887,0.167047,-0.03416,,,,0.125794,0.186849,-0.061055,,,,0.159522,0.193864,-0.034342,,,,0.141285,0.216785,-0.075501,,,,0.144179,0.092167,0.052012,,,,0.141102,0.096117,0.044985,,,,0.163207,0.167682,-0.004475,,,,0.160382,0.164773,-0.004391,,,,0.148884,0.143632,0.005252,,,,0.154831,0.141848,0.012983,,,,0.138326,0.115148,0.023178,,,,0.151841,0.103287,0.048554,,,,0.074652,0.078673,-0.004021,,,,0.074217,0.080102,-0.005885,,,,0.216908,0.106119,0.205892,-0.146341,-0.080971,0.3543,-0.103535,-0.20935,-0.192731,0.710098,-0.205443,-0.264228,-0.223149,110.17395,


In [73]:
df.merge(df_last)


Unnamed: 0,side,country,liga,mid,round,ds,t1,t2,tid1,tid2,w1,wx,w2,ft1,ft2,winner,odds_away,odds_draw,odds_home,country_id,round.1,ds.1,de,form1,form2,vote1,votex,vote2,pop_r,elo1,elo2,oddsprob1,oddsprobx,oddsprob2,drift1,drift2,driftx,tar_w1_tt_avg,tar_wx_tt_avg,tar_w2_tt_avg,tar_ht1_tt_avg,tar_ht2_tt_avg,tar_ft1_tt_avg,tar_ft2_tt_avg,tar_psht_tt_avg,tar_psft_tt_avg,tar_vote1_tt_avg,tar_votex_tt_avg,tar_vote2_tt_avg,tar_elo1_tt_avg,tar_elo2_tt_avg,tar_oddsprob1_tt_avg,tar_oddsprobx_tt_avg,tar_oddsprob2_tt_avg,tar_drift1_tt_avg,tar_driftx_tt_avg,tar_drift2_tt_avg,tar_graph1_tt_avg,tar_graph2_tt_avg,tar_possession1_tt_avg,tar_possession2_tt_avg,tar_shont1_tt_avg,tar_shont2_tt_avg,tar_shofft1_tt_avg,tar_shofft2_tt_avg,tar_corners1_tt_avg,tar_corners2_tt_avg,tar_offsides1_tt_avg,tar_offsides2_tt_avg,tar_fouls1_tt_avg,tar_fouls2_tt_avg,tar_cards1_tt_avg,tar_cards2_tt_avg,tar_gksaves1_tt_avg,tar_gksaves2_tt_avg,tar_precision1_tt_avg,tar_precision2_tt_avg,tar_w1_tt_form,tar_wx_tt_form,tar_w2_tt_form,tar_ht1_tt_form,tar_ht2_tt_form,tar_ft1_tt_form,tar_ft2_tt_form,tar_psht_tt_form,tar_psft_tt_form,tar_vote1_tt_form,tar_votex_tt_form,tar_vote2_tt_form,tar_elo1_tt_form,tar_elo2_tt_form,tar_oddsprob1_tt_form,tar_oddsprobx_tt_form,tar_oddsprob2_tt_form,tar_drift1_tt_form,tar_driftx_tt_form,tar_drift2_tt_form,tar_graph1_tt_form,tar_graph2_tt_form,tar_possession1_tt_form,tar_possession2_tt_form,tar_shont1_tt_form,tar_shont2_tt_form,tar_shofft1_tt_form,tar_shofft2_tt_form,tar_corners1_tt_form,tar_corners2_tt_form,tar_offsides1_tt_form,tar_offsides2_tt_form,tar_fouls1_tt_form,tar_fouls2_tt_form,tar_cards1_tt_form,tar_cards2_tt_form,tar_gksaves1_tt_form,tar_gksaves2_tt_form,tar_precision1_tt_form,tar_precision2_tt_form,opp_w1_tt_avg,opp_wx_tt_avg,opp_w2_tt_avg,opp_ht1_tt_avg,opp_ht2_tt_avg,opp_ft1_tt_avg,opp_ft2_tt_avg,opp_psht_tt_avg,opp_psft_tt_avg,opp_vote1_tt_avg,opp_votex_tt_avg,opp_vote2_tt_avg,opp_elo1_tt_avg,opp_elo2_tt_avg,opp_oddsprob1_tt_avg,opp_oddsprobx_tt_avg,opp_oddsprob2_tt_avg,opp_drift1_tt_avg,opp_driftx_tt_avg,opp_drift2_tt_avg,opp_graph1_tt_avg,opp_graph2_tt_avg,opp_possession1_tt_avg,opp_possession2_tt_avg,opp_shont1_tt_avg,opp_shont2_tt_avg,opp_shofft1_tt_avg,opp_shofft2_tt_avg,opp_corners1_tt_avg,opp_corners2_tt_avg,opp_offsides1_tt_avg,opp_offsides2_tt_avg,opp_fouls1_tt_avg,opp_fouls2_tt_avg,opp_cards1_tt_avg,opp_cards2_tt_avg,opp_gksaves1_tt_avg,opp_gksaves2_tt_avg,opp_precision1_tt_avg,opp_precision2_tt_avg,opp_w1_tt_form,opp_wx_tt_form,opp_w2_tt_form,opp_ht1_tt_form,opp_ht2_tt_form,opp_ft1_tt_form,opp_ft2_tt_form,opp_psht_tt_form,opp_psft_tt_form,opp_vote1_tt_form,opp_votex_tt_form,opp_vote2_tt_form,opp_elo1_tt_form,opp_elo2_tt_form,opp_oddsprob1_tt_form,opp_oddsprobx_tt_form,opp_oddsprob2_tt_form,opp_drift1_tt_form,opp_driftx_tt_form,opp_drift2_tt_form,opp_graph1_tt_form,opp_graph2_tt_form,opp_possession1_tt_form,opp_possession2_tt_form,opp_shont1_tt_form,opp_shont2_tt_form,opp_shofft1_tt_form,opp_shofft2_tt_form,opp_corners1_tt_form,opp_corners2_tt_form,opp_offsides1_tt_form,opp_offsides2_tt_form,opp_fouls1_tt_form,opp_fouls2_tt_form,opp_cards1_tt_form,opp_cards2_tt_form,opp_gksaves1_tt_form,opp_gksaves2_tt_form,opp_precision1_tt_form,opp_precision2_tt_form,tar_w1_ts_avg,tar_wx_ts_avg,tar_w2_ts_avg,tar_ht1_ts_avg,tar_ht2_ts_avg,tar_ft1_ts_avg,tar_ft2_ts_avg,tar_psht_ts_avg,tar_psft_ts_avg,tar_vote1_ts_avg,tar_votex_ts_avg,tar_vote2_ts_avg,tar_elo1_ts_avg,tar_elo2_ts_avg,tar_oddsprob1_ts_avg,tar_oddsprobx_ts_avg,tar_oddsprob2_ts_avg,tar_drift1_ts_avg,tar_driftx_ts_avg,tar_drift2_ts_avg,tar_graph1_ts_avg,tar_graph2_ts_avg,tar_possession1_ts_avg,tar_possession2_ts_avg,tar_shont1_ts_avg,tar_shont2_ts_avg,tar_shofft1_ts_avg,tar_shofft2_ts_avg,tar_corners1_ts_avg,tar_corners2_ts_avg,tar_offsides1_ts_avg,tar_offsides2_ts_avg,tar_fouls1_ts_avg,tar_fouls2_ts_avg,tar_cards1_ts_avg,tar_cards2_ts_avg,tar_gksaves1_ts_avg,tar_gksaves2_ts_avg,tar_precision1_ts_avg,tar_precision2_ts_avg,tar_w1_ts_form,tar_wx_ts_form,tar_w2_ts_form,tar_ht1_ts_form,tar_ht2_ts_form,tar_ft1_ts_form,tar_ft2_ts_form,tar_psht_ts_form,tar_psft_ts_form,tar_vote1_ts_form,tar_votex_ts_form,tar_vote2_ts_form,tar_elo1_ts_form,tar_elo2_ts_form,tar_oddsprob1_ts_form,tar_oddsprobx_ts_form,tar_oddsprob2_ts_form,tar_drift1_ts_form,tar_driftx_ts_form,tar_drift2_ts_form,tar_graph1_ts_form,tar_graph2_ts_form,tar_possession1_ts_form,tar_possession2_ts_form,tar_shont1_ts_form,tar_shont2_ts_form,tar_shofft1_ts_form,tar_shofft2_ts_form,tar_corners1_ts_form,tar_corners2_ts_form,tar_offsides1_ts_form,tar_offsides2_ts_form,tar_fouls1_ts_form,tar_fouls2_ts_form,tar_cards1_ts_form,tar_cards2_ts_form,tar_gksaves1_ts_form,tar_gksaves2_ts_form,tar_precision1_ts_form,tar_precision2_ts_form,opp_w1_ts_avg,opp_wx_ts_avg,opp_w2_ts_avg,opp_ht1_ts_avg,opp_ht2_ts_avg,opp_ft1_ts_avg,opp_ft2_ts_avg,opp_psht_ts_avg,opp_psft_ts_avg,opp_vote1_ts_avg,opp_votex_ts_avg,opp_vote2_ts_avg,opp_elo1_ts_avg,opp_elo2_ts_avg,opp_oddsprob1_ts_avg,opp_oddsprobx_ts_avg,opp_oddsprob2_ts_avg,opp_drift1_ts_avg,opp_driftx_ts_avg,opp_drift2_ts_avg,opp_graph1_ts_avg,opp_graph2_ts_avg,opp_possession1_ts_avg,opp_possession2_ts_avg,opp_shont1_ts_avg,opp_shont2_ts_avg,opp_shofft1_ts_avg,opp_shofft2_ts_avg,opp_corners1_ts_avg,opp_corners2_ts_avg,opp_offsides1_ts_avg,opp_offsides2_ts_avg,opp_fouls1_ts_avg,opp_fouls2_ts_avg,opp_cards1_ts_avg,opp_cards2_ts_avg,opp_gksaves1_ts_avg,opp_gksaves2_ts_avg,opp_precision1_ts_avg,opp_precision2_ts_avg,opp_w1_ts_form,opp_wx_ts_form,opp_w2_ts_form,opp_ht1_ts_form,opp_ht2_ts_form,opp_ft1_ts_form,opp_ft2_ts_form,opp_psht_ts_form,opp_psft_ts_form,opp_vote1_ts_form,opp_votex_ts_form,opp_vote2_ts_form,opp_elo1_ts_form,opp_elo2_ts_form,opp_oddsprob1_ts_form,opp_oddsprobx_ts_form,opp_oddsprob2_ts_form,opp_drift1_ts_form,opp_driftx_ts_form,opp_drift2_ts_form,opp_graph1_ts_form,opp_graph2_ts_form,opp_possession1_ts_form,opp_possession2_ts_form,opp_shont1_ts_form,opp_shont2_ts_form,opp_shofft1_ts_form,opp_shofft2_ts_form,opp_corners1_ts_form,opp_corners2_ts_form,opp_offsides1_ts_form,opp_offsides2_ts_form,opp_fouls1_ts_form,opp_fouls2_ts_form,opp_cards1_ts_form,opp_cards2_ts_form,opp_gksaves1_ts_form,opp_gksaves2_ts_form,opp_precision1_ts_form,opp_precision2_ts_form,w1_tt_avg,w2_tt_avg,diff_w_tt_avg,w1_tt_form,w2_tt_form,diff_w_tt_form,w1_ts_avg,w2_ts_avg,diff_w_ts_avg,w1_ts_form,w2_ts_form,diff_w_ts_form,ht1_tt_avg,ht2_tt_avg,diff_ht_tt_avg,ht1_tt_form,ht2_tt_form,diff_ht_tt_form,ht1_ts_avg,ht2_ts_avg,diff_ht_ts_avg,ht1_ts_form,ht2_ts_form,diff_ht_ts_form,ft1_tt_avg,ft2_tt_avg,diff_ft_tt_avg,ft1_tt_form,ft2_tt_form,diff_ft_tt_form,ft1_ts_avg,ft2_ts_avg,diff_ft_ts_avg,ft1_ts_form,ft2_ts_form,diff_ft_ts_form,graph1_tt_avg,graph2_tt_avg,diff_graph_tt_avg,graph1_tt_form,graph2_tt_form,diff_graph_tt_form,graph1_ts_avg,graph2_ts_avg,diff_graph_ts_avg,graph1_ts_form,graph2_ts_form,diff_graph_ts_form,possession1_tt_avg,possession2_tt_avg,diff_possession_tt_avg,possession1_tt_form,possession2_tt_form,diff_possession_tt_form,possession1_ts_avg,possession2_ts_avg,diff_possession_ts_avg,possession1_ts_form,possession2_ts_form,diff_possession_ts_form,shont1_tt_avg,shont2_tt_avg,diff_shont_tt_avg,shont1_tt_form,shont2_tt_form,diff_shont_tt_form,shont1_ts_avg,shont2_ts_avg,diff_shont_ts_avg,shont1_ts_form,shont2_ts_form,diff_shont_ts_form,shofft1_tt_avg,shofft2_tt_avg,diff_shofft_tt_avg,shofft1_tt_form,shofft2_tt_form,diff_shofft_tt_form,shofft1_ts_avg,shofft2_ts_avg,diff_shofft_ts_avg,shofft1_ts_form,shofft2_ts_form,diff_shofft_ts_form,corners1_tt_avg,corners2_tt_avg,diff_corners_tt_avg,corners1_tt_form,corners2_tt_form,diff_corners_tt_form,corners1_ts_avg,corners2_ts_avg,diff_corners_ts_avg,corners1_ts_form,corners2_ts_form,diff_corners_ts_form,offsides1_tt_avg,offsides2_tt_avg,diff_offsides_tt_avg,offsides1_tt_form,offsides2_tt_form,diff_offsides_tt_form,offsides1_ts_avg,offsides2_ts_avg,diff_offsides_ts_avg,offsides1_ts_form,offsides2_ts_form,diff_offsides_ts_form,fouls1_tt_avg,fouls2_tt_avg,diff_fouls_tt_avg,fouls1_tt_form,fouls2_tt_form,diff_fouls_tt_form,fouls1_ts_avg,fouls2_ts_avg,diff_fouls_ts_avg,fouls1_ts_form,fouls2_ts_form,diff_fouls_ts_form,cards1_tt_avg,cards2_tt_avg,diff_cards_tt_avg,cards1_tt_form,cards2_tt_form,diff_cards_tt_form,cards1_ts_avg,cards2_ts_avg,diff_cards_ts_avg,cards1_ts_form,cards2_ts_form,diff_cards_ts_form,gksaves1_tt_avg,gksaves2_tt_avg,diff_gksaves_tt_avg,gksaves1_tt_form,gksaves2_tt_form,diff_gksaves_tt_form,gksaves1_ts_avg,gksaves2_ts_avg,diff_gksaves_ts_avg,gksaves1_ts_form,gksaves2_ts_form,diff_gksaves_ts_form,precision1_tt_avg,precision2_tt_avg,diff_precision_tt_avg,precision1_tt_form,precision2_tt_form,diff_precision_tt_form,precision1_ts_avg,precision2_ts_avg,diff_precision_ts_avg,precision1_ts_form,precision2_ts_form,diff_precision_ts_form,wx_tt_avg,wx_tt_form,wx_ts_avg,wx_ts_form,psht_tt_avg,psht_tt_form,psht_ts_avg,psht_ts_form,psft_tt_avg,psft_tt_form,psft_ts_avg,psft_ts_form,diff_vote12,diff_elo,diff_op
0,0,england,fa-cup,6528580,9,2015-01-03 15:00:00+00:00,gateshead,west bromwich albion,1845,8,0,0,1,0.0,7.0,home,,,,15,9,2015-01-03 15:00:00+00:00,2015-01-03,18,20,0.100264,0.147757,0.751979,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.2,-0.066667,-0.133333,0.466667,0.233333,0.866667,0.9,0.233333,-0.033333,-0.07769,0.011146,0.066543,,,,,,,,,-0.079179,0.112512,-0.101,-0.165667,0.041333,-0.026667,0.031034,-0.021839,-0.055128,-0.083333,-0.010417,-0.03125,-0.101786,-0.056548,-0.133333,0.069697,-0.012698,0.009524,0.006867,-0.048814,0.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.073014,0.140602,0.786384,1605.22229,1702.563721,,,,,,,,,0.54,0.46,0.12,0.24,0.275862,0.103448,0.038462,0.0,0.0,0.375,0.214286,0.107143,0.0,0.090909,0.190476,0.095238,0.029412,0.156863,0.088435,0.047619,-0.136054,-0.156463,-0.428571,-0.190476,-0.435374,0.272109,0.244898,0.008349,0.034576,-0.042925,-26.377263,0.008213,0.154478,0.127505,0.119378,0.001751,-0.008019,0.00682,0.029799,0.09265,-0.09551,-0.135782,-0.008707,-0.084082,-0.026976,-0.049965,-0.058608,-0.01596,-0.041241,-0.091837,-0.070092,-0.11431,-0.141002,-0.037724,-0.105928,-0.027535,-0.014646,-0.036342,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,-0.435897,0.435897,0.0,-0.974359,-0.230769,-0.846154,0.076923,-0.74359,-0.923077,-0.02681,0.013172,0.013638,,,,,,,,,-0.028495,0.028495,0.029231,0.047692,-0.070769,0.024615,0.030062,0.047745,0.018738,-0.014793,0.008013,0.064103,0.038462,-0.038919,0.083916,-0.037296,0.029304,-0.028083,0.102562,-0.027284,,,,0.031973,-0.022449,0.054422,,,,,,,,,,0.019048,0.038435,-0.019388,,,,,,,,,,0.215646,0.354762,-0.139116,,,,,,,,,,0.006735,0.071156,-0.06442,,,,,,,,,,-0.118391,-0.130588,0.012197,,,,,,,,,,-0.021374,-0.017687,-0.003687,,,,,,,,,,-0.009465,-0.024408,0.014943,,,,,,,,,,-0.035544,-0.070971,0.035426,,,,,,,,,,-0.051127,-0.036246,-0.014881,,,,,,,,,,-0.108048,-0.06332,-0.044728,,,,,,,,,,-0.085529,-0.035652,-0.049876,,,,,,,,,,-0.020117,-0.048202,0.028086,,,,,,,,,,-0.014737,-0.03173,0.016993,,,,,,,,-0.009524,,,,0.252721,,,,0.105782,,,-0.651715,,


In [None]:
df_teams=pd.read_csv('data/teams.csv', index_col=None)
df_teams=df_teams[['tid','op_tid']].drop_duplicates()

df=df.merge(df_teams, left_on='tid1', right_on='tid')
df=df.rename(columns={'op_tid':'op_tid1'})
df=df.drop(columns=['tid'])
df=df.merge(df_teams, left_on='tid2', right_on='tid')
df=df.rename(columns={'op_tid':'op_tid2'})
df=df.drop(columns=['tid'])

df_op=dp._provide_op()
df_op=df_op.rename(columns={'tid1':'op_tid1','tid2':'op_tid2'})
df=df.merge(df_op[['op_tid1','op_tid2', 'oddsprob_home', 'oddsprob_draw', 'oddsprob_away', 'drift_home', 'drift_away', 'drift_draw']], on=['op_tid1','op_tid2'], how='left')

In [64]:
df.merge(df_op[['op_tid1','op_tid2', 'oddsprob_home', 'oddsprob_draw', 'oddsprob_away', 'drift_home', 'drift_away', 'drift_draw']], on=['op_tid1','op_tid2'], how='left')

Unnamed: 0,awayScoreHT,country,country_id,ds,homeScoreHT,liga,mid,round,sc1,sc2,t1,t2,tid1,tid2,winner,formation_h,formation_a,home_formation,away_formation,vote_home,vote_draw,vote_away,votes,y,pop_r,de,elo1,elo2,op_tid1,op_tid2,oddsprob_home,oddsprob_draw,oddsprob_away,drift_home,drift_away,drift_draw
0,,england,15,2021-01-31 12:00:00+00:00,,premier-league,8897050,21,,,chelsea,burnley,4,3,draw,,,,,0.762910,0.160065,0.077026,27770.0,2021,4.0,2021-01-31,1828.057861,1719.795166,256,135,0.713223,0.186128,0.100649,-0.068265,0.210811,0.081265
1,,england,15,2021-01-31 14:00:00+00:00,,premier-league,8897051,21,,,leicester city,leeds united,6,796,draw,4-2-3-1,4-1-4-1,14.0,12.0,0.689995,0.206004,0.104001,23019.0,2021,3.0,2021-01-31,1821.147705,1676.518799,239,1503,0.533874,0.238475,0.227651,-0.041660,0.058843,0.025857
2,,england,15,2021-01-31 16:30:00+00:00,,premier-league,8897027,21,,,west ham united,liverpool,120,181,draw,4-2-3-1,4-3-3,14.0,18.0,0.193081,0.186881,0.620038,28066.0,2021,4.0,2021-01-31,1757.818237,1944.577148,444,1183,0.218530,0.240785,0.540685,-0.091515,0.037856,-0.022932
3,,england,15,2021-01-31 19:15:00+00:00,,premier-league,8897039,21,,,brighton hove albion,tottenham,609,274,draw,3-5-2,4-2-3-1,9.0,14.0,0.119534,0.192975,0.687491,21609.0,2021,3.0,2021-01-31,1662.046997,1836.129150,984,680,0.291167,0.286855,0.421978,-0.078422,0.078682,-0.059461
4,,spain,42,2021-01-31 15:15:00+00:00,,laliga,8966464,21,,,cadiz,atletico madrid,940,134,draw,4-4-2,4-4-2,20.0,20.0,0.126301,0.173107,0.700591,22668.0,2021,3.0,2021-01-31,1598.655518,1927.645752,1588,261,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
74,,austria,2,2021-01-31 13:30:00+00:00,,bundesliga,8858506,15,,,skn st polten,wolfsberger ac,1033,330,draw,,,,,0.195041,0.347658,0.457300,1815.0,2021,0.0,2021-01-31,1419.237427,1550.491699,1395,368,,,,,,
75,,turkey,45,2021-01-31 13:00:00+00:00,,tff-1-lig,8917288,19,,,boluspor,adana demirspor,410,153,draw,,,,,0.223048,0.267658,0.509294,807.0,2021,0.0,2021-01-31,,,6537,4729,,,,,,
76,,belgium,4,2021-01-31 12:30:00+00:00,,pro-league,8833161,23,,,club brugge,standard liege,82,270,draw,,,,,0.663458,0.254644,0.081898,7375.0,2021,1.0,2021-01-31,1688.555908,1521.353760,562,1355,,,,,,
77,,germany,20,2021-01-31 13:00:00+00:00,,3-liga,8897913,22,,,tsv 1860 munchen,fsv zwickau,296,1161,draw,,,,,0.693064,0.254796,0.052140,2033.0,2021,0.0,2021-01-31,,,1193,2159,0.505812,0.280372,0.213816,0.063097,-0.084783,-0.082277


In [75]:
df_op


Unnamed: 0,bn,country,country_id,ds,liga,mid,odds_away,odds_draw,odds_home,sc1,sc2,t1,t2,op_tid1,op_tid2,winner,oddsprob_home,oddsprob_draw,oddsprob_away,drift_home,drift_away,drift_draw
1,0.769231,spain,79,2021-01-31 16:00:00+00:00,segunda-division-b-group-4,j3Y9nKmF,3.31,3.01,2.21,0,0,ucam murcia,cordoba,1607,201,draw,0.413793,0.304438,0.281769,-0.039783,0.024380,0.023873
2,0.538462,portugal,66,2021-01-31 15:00:00+00:00,campeonato-de-portugal,0QsOhlr3,4.96,3.32,1.66,0,0,canelas 2010,sc espinho,5144,6636,draw,0.540894,0.273849,0.185257,-0.026903,0.044446,-0.016034
3,0.538462,england,25,2021-01-31 14:00:00+00:00,women-s-super-league,QVy5J7LG,32.79,15.32,1.02,0,0,manchester city w,west ham w,1188,1859,draw,0.897301,0.069221,0.033478,-0.011119,0.233352,-0.010093
4,0.692308,hungary,35,2021-01-31 12:00:00+00:00,merkantil-bank-liga,QoKnxPsq,3.54,3.38,1.93,0,0,budaorsi sc,szolnoki mav,5396,5461,draw,0.471456,0.272429,0.256115,-0.008963,0.001408,0.011767
5,0.076923,italy,41,2021-01-31 13:30:00+00:00,serie-d-group-i,6Xcr5myQ,2.67,2.98,2.50,0,0,s agata,santa maria cilento,6613,5365,draw,0.360790,0.302825,0.336386,0.000974,-0.001958,0.000974
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
252,0.461538,world,91,2021-01-31 09:00:00+00:00,club-friendly,8rRnc5CU,2.42,3.68,2.44,0,0,gwelup croatia sc aus,ecu joondalup aus,7649,7662,draw,0.370748,0.249496,0.379756,0.007891,-0.008454,0.000853
253,0.461538,spain,79,2021-01-31 11:00:00+00:00,tercera-division-group-5,SYQqLkB7,3.71,2.90,2.03,0,0,granollers,ue figueres,4994,6560,draw,0.437974,0.314028,0.247998,-0.031605,0.110817,-0.086772
254,0.076923,italy,41,2021-01-31 13:30:00+00:00,serie-d-group-b,SjlFggSk,3.95,3.05,1.90,0,0,vis nova giussano,scanzorosciate,5245,5420,draw,0.475769,0.295257,0.228974,0.003220,0.003220,-0.007784
255,0.769231,spain,79,2021-01-31 11:00:00+00:00,segunda-division-b-group-3,4KLgJQrk,6.72,3.47,1.53,0,0,ibiza,alcoyano,3844,1396,draw,0.590045,0.263150,0.146805,-0.035511,0.091277,0.013168


In [63]:
df_op[(df_op['country']=='germany') & ~(df_op['liga'].str.contains('segunda')) & ~(df_op['liga'].str.contains('tercera'))]


Unnamed: 0,bn,country,country_id,ds,liga,mid,odds_away,odds_draw,odds_home,sc1,sc2,t1,t2,op_tid1,op_tid2,winner,oddsprob_home,oddsprob_draw,oddsprob_away,drift_home,drift_away,drift_draw
72,0.846154,germany,32,2021-01-31 14:00:00+00:00,regionalliga-west,0Onkbtzj,14.65,7.33,1.14,0,0,dortmund ii,lotte,4588,3038,draw,0.799656,0.134329,0.066015,-0.028469,0.060777,-0.073277
143,0.923077,germany,32,2021-01-31 12:00:00+00:00,3-liga,G4toVcV0,2.63,3.5,2.53,0,0,duisburg,lubeck,1056,4044,draw,0.371608,0.272519,0.355873,-0.030964,0.014786,0.008388
206,0.923077,germany,32,2021-01-31 13:00:00+00:00,3-liga,bsBOrHa0,4.47,3.37,1.84,0,0,munich 1860,zwickau,1193,2159,draw,0.505812,0.280372,0.213816,0.063097,-0.084783,-0.082277


In [34]:
df_teams

Unnamed: 0,tid,op_tid
0,1751,3706
1,1318,1700
2,519,2986
3,267,721
4,614,992
...,...,...
1336,1092,2457
1337,1263,392
1339,420,1522
1341,1161,2159


In [37]:
df=df.merge(df_teams, left_on='tid1', right_on='tid')
df=df.rename(columns={'op_tid':'op_tid1'})
df=df.drop(columns=['tid'])
df=df.merge(df_teams, left_on='tid2', right_on='tid')
df=df.rename(columns={'op_tid':'op_tid2'})
df=df.drop(columns=['tid'])
df


Unnamed: 0,awayScoreHT,country,country_id,ds,homeScoreHT,liga,mid,round,sc1,sc2,t1,t2,tid1,tid2,winner,formation_h,formation_a,home_formation,away_formation,vote_home,vote_draw,vote_away,votes,y,pop_r,de,elo1,elo2,op_tid1,op_tid1.1,op_tid2
0,,england,15,2021-01-31 12:00:00+00:00,,premier-league,8897050,21,,,chelsea,burnley,4,3,draw,,,,,0.762910,0.160065,0.077026,27770.0,2021,4.0,2021-01-31,1828.057861,1719.795166,256,256,135
1,,england,15,2021-01-31 14:00:00+00:00,,premier-league,8897051,21,,,leicester city,leeds united,6,796,draw,4-2-3-1,4-1-4-1,14.0,12.0,0.689995,0.206004,0.104001,23019.0,2021,3.0,2021-01-31,1821.147705,1676.518799,239,239,1503
2,,england,15,2021-01-31 16:30:00+00:00,,premier-league,8897027,21,,,west ham united,liverpool,120,181,draw,4-2-3-1,4-3-3,14.0,18.0,0.193081,0.186881,0.620038,28066.0,2021,4.0,2021-01-31,1757.818237,1944.577148,444,444,1183
3,,england,15,2021-01-31 19:15:00+00:00,,premier-league,8897039,21,,,brighton hove albion,tottenham,609,274,draw,3-5-2,4-2-3-1,9.0,14.0,0.119534,0.192975,0.687491,21609.0,2021,3.0,2021-01-31,1662.046997,1836.129150,984,984,680
4,,spain,42,2021-01-31 15:15:00+00:00,,laliga,8966464,21,,,cadiz,atletico madrid,940,134,draw,4-4-2,4-4-2,20.0,20.0,0.126301,0.173107,0.700591,22668.0,2021,3.0,2021-01-31,1598.655518,1927.645752,1588,1588,261
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
74,,austria,2,2021-01-31 13:30:00+00:00,,bundesliga,8858506,15,,,skn st polten,wolfsberger ac,1033,330,draw,,,,,0.195041,0.347658,0.457300,1815.0,2021,0.0,2021-01-31,1419.237427,1550.491699,1395,1395,368
75,,turkey,45,2021-01-31 13:00:00+00:00,,tff-1-lig,8917288,19,,,boluspor,adana demirspor,410,153,draw,,,,,0.223048,0.267658,0.509294,807.0,2021,0.0,2021-01-31,,,6537,6537,4729
76,,belgium,4,2021-01-31 12:30:00+00:00,,pro-league,8833161,23,,,club brugge,standard liege,82,270,draw,,,,,0.663458,0.254644,0.081898,7375.0,2021,1.0,2021-01-31,1688.555908,1521.353760,562,562,1355
77,,germany,20,2021-01-31 13:00:00+00:00,,3-liga,8897913,22,,,tsv 1860 munchen,fsv zwickau,296,1161,draw,,,,,0.693064,0.254796,0.052140,2033.0,2021,0.0,2021-01-31,,,1193,1193,2159


In [27]:
df_.to_csv('data/stats_generated.csv', index=False)

In [75]:
df=pd.read_csv('data/stats_generated.csv', index_col=None)
df=df.dropna()

In [82]:
COL_CUR=['side', 'country_id', 'round', 'ds', 'de', 'form1', 'form2', 'vote1', 'votex', 'vote2', 'pop_r', 'elo1', 'elo2']
COL_PREV=['w1', 'wx', 'w2',  'ht1', 'ht2', 'ft1', 'ft2', 'ps_ht', 'ps_ft']
COL_CAT=['country_id','form1', 'form2']
COL_BIN=['side']

COL_INF=['country', 'liga', 'mid', 'round', 'ds', 't1', 't2','tid1', 'tid2', 'w1', 'wx', 'w2',  'ft1', 'ft2','winner']

In [88]:
scaler=MinMaxScaler()
nums=scaler.fit_transform(df[df.columns[25:]].values)
nums_df=pd.DataFrame(nums, columns=df.columns[25:])
df.reset_index(drop=True, inplace=True)
df=pd.concat([df[df.columns[:25]],nums_df], axis=1)

Unnamed: 0,pop_r,elo1,elo2,tar_w1_tt_avg,tar_wx_tt_avg,tar_w2_tt_avg,tar_ht1_tt_avg,tar_ht2_tt_avg,tar_ft1_tt_avg,tar_ft2_tt_avg,tar_ps_ht_tt_avg,tar_ps_ft_tt_avg,tar_vote1_tt_avg,tar_votex_tt_avg,tar_vote2_tt_avg,tar_elo1_tt_avg,tar_elo2_tt_avg,opp_w1_tt_avg,opp_wx_tt_avg,opp_w2_tt_avg,opp_ht1_tt_avg,opp_ht2_tt_avg,opp_ft1_tt_avg,opp_ft2_tt_avg,opp_ps_ht_tt_avg,opp_ps_ft_tt_avg,opp_vote1_tt_avg,opp_votex_tt_avg,opp_vote2_tt_avg,opp_elo1_tt_avg,opp_elo2_tt_avg,tar_w1_ts_avg,tar_wx_ts_avg,tar_w2_ts_avg,tar_ht1_ts_avg,tar_ht2_ts_avg,tar_ft1_ts_avg,tar_ft2_ts_avg,tar_ps_ht_ts_avg,tar_ps_ft_ts_avg,tar_vote1_ts_avg,tar_votex_ts_avg,tar_vote2_ts_avg,tar_elo1_ts_avg,tar_elo2_ts_avg,opp_w1_ts_avg,opp_wx_ts_avg,opp_w2_ts_avg,opp_ht1_ts_avg,opp_ht2_ts_avg,opp_ft1_ts_avg,opp_ft2_ts_avg,opp_ps_ht_ts_avg,opp_ps_ft_ts_avg,opp_vote1_ts_avg,opp_votex_ts_avg,opp_vote2_ts_avg,opp_elo1_ts_avg,opp_elo2_ts_avg,diff_w1_tt_avg,diff_wx_tt_avg,diff_w2_tt_avg,diff_ht1_tt_avg,diff_ht2_tt_avg,diff_ft1_tt_avg,diff_ft2_tt_avg,diff_ps_ht_tt_avg,diff_ps_ft_tt_avg,diff_vote1_tt_avg,diff_votex_tt_avg,diff_vote2_tt_avg,diff_elo1_tt_avg,diff_elo2_tt_avg,diff_w1_ts_avg,diff_wx_ts_avg,diff_w2_ts_avg,diff_ht1_ts_avg,diff_ht2_ts_avg,diff_ft1_ts_avg,diff_ft2_ts_avg,diff_ps_ht_ts_avg,diff_ps_ft_ts_avg,diff_vote1_ts_avg,diff_votex_ts_avg,diff_vote2_ts_avg,diff_elo1_ts_avg,diff_elo2_ts_avg,diff_vote12,diff_elo
0,1.00,0.472463,0.472463,1.000000,0.000000,0.000000,0.571429,0.000000,0.500000,0.000000,0.823529,0.782609,0.858299,0.162021,0.104519,1.000000,0.635697,0.708861,0.189873,0.101266,0.211573,0.041772,0.292194,0.094937,0.650782,0.624656,0.708244,0.362246,0.156602,0.836029,0.678962,1.000000,0.000000,0.000000,0.500000,0.000000,0.500000,0.000000,0.777778,0.785714,0.861679,0.162021,0.097464,0.995360,0.635697,0.000000,0.000000,1.000000,0.000000,0.400000,0.166667,0.375000,0.333333,0.428571,0.332264,0.305874,0.560808,0.892168,0.931487,0.645570,0.405063,0.449367,0.709916,0.476503,0.624684,0.442814,0.640587,0.618465,0.598509,0.322713,0.469866,0.647809,0.455834,1.000000,0.500000,0.000000,0.750000,0.300000,0.666667,0.306452,0.833333,0.750000,0.785620,0.368704,0.246279,0.596018,0.186188,0.725685,0.500000
1,0.75,0.628003,0.657831,1.000000,0.000000,0.000000,0.000000,0.000000,0.333333,0.000000,0.588235,0.695652,0.085197,0.391887,0.799607,0.464370,0.471198,0.442748,0.282443,0.274809,0.196292,0.103817,0.258270,0.213740,0.607993,0.544972,0.475809,0.485643,0.336685,0.648486,0.676920,1.000000,0.000000,0.000000,0.000000,0.000000,0.333333,0.000000,0.555556,0.714286,0.107016,0.391887,0.745632,0.470917,0.471198,1.000000,0.000000,0.000000,0.000000,0.000000,0.333333,0.000000,0.555556,0.714286,0.122613,0.410600,0.720589,0.470917,0.471198,0.778626,0.358779,0.362595,0.385496,0.441603,0.545038,0.371251,0.483921,0.613010,0.243569,0.416984,0.767833,0.334032,0.289995,0.500000,0.500000,0.500000,0.500000,0.500000,0.500000,0.500000,0.500000,0.500000,0.491586,0.482920,0.513713,0.500000,0.500000,0.626219,0.472435
2,1.00,0.811409,0.523643,1.000000,0.000000,0.000000,0.857143,0.000000,1.000000,0.166667,0.941176,0.956522,0.874874,0.211592,0.060606,0.464370,0.471198,0.408333,0.266667,0.325000,0.180952,0.120000,0.229167,0.225000,0.592157,0.523913,0.424758,0.522580,0.371003,0.468630,0.516784,1.000000,0.000000,0.000000,0.750000,0.000000,1.000000,0.125000,0.888889,0.928571,0.877859,0.211592,0.056515,0.470917,0.471198,0.000000,0.000000,1.000000,0.000000,0.600000,0.166667,0.750000,0.222222,0.214286,0.065924,0.211592,0.876629,0.470917,0.471198,0.795833,0.366667,0.337500,0.894444,0.432500,0.962500,0.464862,0.784043,0.824457,0.795494,0.224639,0.320414,0.496160,0.453465,1.000000,0.500000,0.000000,0.875000,0.200000,0.916667,0.177419,1.000000,1.000000,0.938040,0.500000,0.050917,0.500000,0.500000,0.878608,0.765927
3,1.00,0.756613,0.697733,1.000000,0.000000,0.000000,0.571429,0.000000,0.666667,0.000000,0.823529,0.869565,0.858765,0.293601,0.033987,0.464370,0.471198,0.379032,0.274194,0.346774,0.168203,0.087097,0.206989,0.181452,0.606262,0.535063,0.489853,0.558270,0.283177,0.581739,0.598190,1.000000,0.000000,0.000000,0.500000,0.000000,0.666667,0.000000,0.777778,0.857143,0.862134,0.293601,0.031693,0.470917,0.471198,0.000000,0.000000,1.000000,0.500000,0.000000,0.333333,0.375000,0.777778,0.500000,0.674786,0.450061,0.143268,0.470917,0.471198,0.810484,0.362903,0.326613,0.735215,0.451008,0.775806,0.390700,0.676819,0.750877,0.742185,0.265652,0.355826,0.394199,0.370364,1.000000,0.500000,0.000000,0.500000,0.500000,0.666667,0.306452,0.500000,0.750000,0.601075,0.357198,0.438903,0.500000,0.500000,0.810986,0.554411
4,0.50,0.472463,0.472463,0.000000,0.000000,1.000000,0.000000,0.000000,0.000000,0.166667,0.588235,0.434783,0.121502,0.220921,0.852222,0.464370,0.471198,0.000000,0.000000,1.000000,0.095238,0.133333,0.055556,0.277778,0.549020,0.405797,0.245585,0.347700,0.653541,0.464370,0.471198,0.000000,0.000000,1.000000,0.000000,0.000000,0.000000,0.125000,0.555556,0.500000,0.142456,0.220921,0.794695,0.470917,0.471198,0.000000,0.000000,1.000000,0.000000,0.200000,0.000000,0.250000,0.444444,0.428571,0.062467,0.153514,0.908948,0.470917,0.471198,0.500000,0.500000,0.500000,0.444444,0.425000,0.466667,0.433071,0.531915,0.521739,0.418542,0.387746,0.614952,0.500000,0.500000,0.500000,0.500000,0.500000,0.500000,0.400000,0.500000,0.435484,0.583333,0.550000,0.543154,0.561524,0.437437,0.500000,0.500000,0.699136,0.500000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
130135,0.00,0.472463,0.472463,0.300000,0.208333,0.491667,0.121429,0.130000,0.161111,0.236111,0.561765,0.482609,0.230542,0.609309,0.530193,0.355792,0.456196,0.250000,0.250000,0.500000,0.107143,0.075000,0.187500,0.208333,0.588235,0.510870,0.433433,0.623915,0.307892,0.464370,0.471198,0.283333,0.216667,0.500000,0.091667,0.123333,0.144444,0.170833,0.527778,0.535714,0.136381,0.592159,0.616564,0.365543,0.450849,0.250000,0.250000,0.500000,0.093750,0.075000,0.187500,0.156250,0.555556,0.562500,0.446946,0.623915,0.287108,0.470917,0.471198,0.525000,0.479167,0.495833,0.508333,0.530937,0.484167,0.516732,0.478457,0.478804,0.366806,0.487068,0.628617,0.402124,0.484687,0.516667,0.483333,0.500000,0.498958,0.524167,0.478472,0.507527,0.479167,0.481250,0.332450,0.471017,0.680405,0.401952,0.478411,0.706570,0.500000
130136,0.00,0.472463,0.472463,0.303030,0.262626,0.434343,0.144300,0.131313,0.180135,0.232323,0.570410,0.494510,0.284858,0.538426,0.510493,0.409076,0.472245,0.250000,0.250000,0.500000,0.095238,0.233333,0.208333,0.347222,0.490196,0.449275,0.200614,0.665290,0.532040,0.464370,0.471198,0.224490,0.285714,0.489796,0.096939,0.163265,0.156463,0.196429,0.507937,0.526239,0.160630,0.538090,0.618908,0.419868,0.474626,0.250000,0.250000,0.500000,0.083333,0.233333,0.208333,0.260417,0.462963,0.511905,0.219680,0.665290,0.496126,0.470917,0.471198,0.526515,0.506313,0.467172,0.528620,0.442614,0.483081,0.430789,0.565280,0.533926,0.555305,0.387670,0.487533,0.450157,0.501069,0.487245,0.517857,0.494898,0.506803,0.464966,0.474065,0.466974,0.533730,0.510034,0.468142,0.383904,0.567234,0.452500,0.503638,0.458511,0.500000
130137,0.00,0.472463,0.472463,0.272727,0.227273,0.500000,0.103896,0.127273,0.166667,0.242424,0.556150,0.482213,0.201240,0.682482,0.522227,0.464370,0.471198,0.714286,0.142857,0.142857,0.163265,0.057143,0.238095,0.166667,0.621849,0.559006,0.490831,0.714909,0.198766,0.464370,0.471198,0.333333,0.111111,0.555556,0.166667,0.088889,0.166667,0.194444,0.580247,0.531746,0.083622,0.639458,0.646376,0.470917,0.471198,0.714286,0.142857,0.142857,0.142857,0.057143,0.238095,0.125000,0.587302,0.602041,0.502975,0.714909,0.185349,0.470917,0.471198,0.279221,0.542208,0.678571,0.465368,0.539448,0.457143,0.545634,0.446532,0.442405,0.309888,0.471288,0.687145,0.500000,0.500000,0.309524,0.484127,0.706349,0.511905,0.515873,0.464286,0.535842,0.494709,0.450794,0.273758,0.431135,0.752452,0.500000,0.500000,0.215050,0.500000
130138,0.00,0.269385,0.329496,0.273684,0.315789,0.410526,0.123308,0.103158,0.170175,0.226316,0.578328,0.492449,0.238697,0.657029,0.496170,0.247452,0.328089,0.500000,0.204545,0.295455,0.207792,0.095455,0.285985,0.176136,0.617647,0.579051,0.532626,0.543209,0.245968,0.385910,0.368423,0.191489,0.340426,0.468085,0.085106,0.140426,0.156028,0.196809,0.515366,0.525836,0.123363,0.638746,0.606588,0.256519,0.330758,0.500000,0.204545,0.295455,0.181818,0.095455,0.285985,0.132102,0.583333,0.618506,0.543774,0.543209,0.229365,0.394096,0.368423,0.386842,0.555622,0.557536,0.450718,0.504333,0.430514,0.530226,0.468001,0.435048,0.307040,0.600781,0.644760,0.375189,0.458826,0.345745,0.567940,0.586315,0.451644,0.522485,0.435022,0.533397,0.449025,0.435131,0.273188,0.587198,0.706563,0.371987,0.460040,0.213806,0.444451


In [106]:
df_info=df[COL_INF]
labels=df[['w1', 'wx', 'w2']].values

encoder = OneHotEncoder()
countries=encoder.fit_transform(df[['country_id']]).toarray()
encoder = OneHotEncoder()
form1=encoder.fit_transform(df[['form1']]).toarray()
encoder = OneHotEncoder()
form2=encoder.fit_transform(df[['form2']]).toarray()
side=df[['side']].values

data=np.hstack([nums,countries,form1,form2,side])

In [108]:
data.shape,labels.shape

((130140, 187), (130140, 3))