In [1]:
import pandas as pd
from datetime import datetime
import time
import numpy as np
import Levenshtein
import multiprocessing

In [2]:
folder = '../data_sources/soccerway_data/'

In [3]:
DFORM = "%Y-%m-%d"

In [4]:
def parsenum(s):
    try:
        return float(s)
    except:
        return np.nan

In [5]:
def get_ratios(query,targets):
    out = np.zeros(len(targets))
    for i in range(len(targets)):
        out[i] = Levenshtein.ratio(query,targets[i])
    return out

In [6]:
def filter_sorted(arr,val,col):
    """
    arr: 2-D array, with column col sorted ascending
    val: value to filter for
    col: column number of 2-D array that is sorted and we look for vals in."""
    return arr[np.searchsorted(arr[:,col],val,side='left'):np.searchsorted(arr[:,col],val,side='right'),:]

def get_max_smaller_sorted(arr,val,col):
    """
    arr: 2-D array, with column col sorted ascending
    val: value, needs to be comparable
    col: column number of 2-D array that is sorted."""
    return arr[np.searchsorted(arr[:,col],val,side='left')-1,:]

## load data with simple transforms

In [7]:
t_i_df = pd.read_csv(folder + 't_info.csv').drop_duplicates().set_index('team_id')
p_v_df = pd.read_csv(folder + 'p_values.csv')

mv_np = p_v_df.drop_duplicates().sort_values(['playerid','date']).values

m_o_df = pd.read_csv(folder + 'm_odds.csv')\
           .assign(odds=lambda df: df['odds'].apply(parsenum))\
           .assign(odds1=lambda df: df['odds1'].apply(parsenum))\
           .assign(odds2=lambda df: df['odds2'].apply(parsenum))\
           .assign(oddsx=lambda df: df['oddsx'].apply(parsenum))

In [8]:
p_i_df = pd.read_csv(folder + 'p_info.csv')\
           .assign(dobform = lambda df: pd.to_datetime(df['dob']))\
           .assign(mvals = lambda df:df['playerid'].map(lambda x: filter_sorted(mv_np,x,1)))\
           .assign(height = lambda df: df['height'].apply(parsenum))\
           .drop(['dob'],axis=1).set_index('playerid').sort_index()

In [13]:
p_i_df.dtypes

height            float64
dobform    datetime64[ns]
mvals              object
dtype: object

In [14]:
p_i_df.head().loc[1,'mvals']

array([['2004-10-04', 1, 250000],
       ['2007-06-19', 1, 200000],
       ['2009-04-23', 1, 0]], dtype=object)

### preprocess odds + team data

In [15]:
CUTOFF = 0.65

In [17]:
m_o_df[['at','ht']]\
              .melt().head()

Unnamed: 0,variable,value
0,at,Tyrnavos
1,at,Iraklis
2,at,Panachaiki
3,at,A. Asteras
4,at,AEL Larissa


In [16]:
%%time
poss_pairs_o = m_o_df[['at','ht']]\
              .melt().groupby('value')\
              .apply(lambda df: t_i_df.loc[get_ratios(df['value'].iloc[0],
                                                      t_i_df['name'].values)\
                                                       >= CUTOFF,
                                           []
                                          ])\
              .reset_index('team_id')

CPU times: user 1min 8s, sys: 120 ms, total: 1min 8s
Wall time: 1min 8s


In [19]:
poss_pairs_o.head(10)

Unnamed: 0_level_0,team_id
value,Unnamed: 1_level_1
07 Vestur Sorvagur,27811
1. FC Koln,3
1. FC Koln,438
1. FC Koln,973
1. FC Koln,1461
1. FC Koln,8086
1. FC Koln,21048
1. FC Koln (Ger),3
1. FSV Mainz 05,39
1. FSV Mainz 05,851


## OTTHON DICSŐSÉGÉRT MEGNÉZNI
elbaszódik-e többszörös párosítás miatt

In [20]:
%%time
prep = m_o_df.merge(poss_pairs_o.rename(columns = lambda x: 'away_%s' % x),left_on='at',right_index=True)\
             .merge(poss_pairs_o.rename(columns = lambda x: 'home_%s' % x),left_on='ht',right_index=True)\
             .set_index(['date','score','home_team_id','away_team_id'])\
             .sort_index().drop(['at','ht'],axis=1)

CPU times: user 1.14 s, sys: 28.1 ms, total: 1.17 s
Wall time: 1.17 s


In [24]:
prep.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,odds,odds1,odds2,oddsx
date,score,home_team_id,away_team_id,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2004-02-11,0-0,235,306,3.1,2.35,2.57,3.1
2004-02-11,0-0,235,4779,3.1,2.35,2.57,3.1
2004-02-11,0-0,235,24368,3.1,2.35,2.57,3.1
2004-02-11,0-0,8823,306,3.1,2.35,2.57,3.1
2004-02-11,0-0,8823,4779,3.1,2.35,2.57,3.1
2004-02-11,0-0,8823,24368,3.1,2.35,2.57,3.1
2004-02-11,0-2,1020,631,1.55,5.2,1.55,3.58
2004-02-11,0-2,1020,6918,1.55,5.2,1.55,3.58
2004-02-11,0-2,1020,9250,1.55,5.2,1.55,3.58
2004-02-11,1-1,164,762,3.18,2.58,2.4,3.18


In [25]:
def filter_mv(pdata,date):
    try:
        return get_max_smaller_sorted(pdata,date,0)[-1]
    except:
        return np.nan

In [26]:
def list_of_mvals(val_list,date_list):
    out = np.zeros(len(val_list))
    for i in range(len(out)):
        out[i] = filter_mv(val_list[i],date_list[i])
    return out

In [47]:
p_i_df.head().reindex([1,3,3,2,4,0,6,7])

Unnamed: 0_level_0,height,dobform,mvals
playerid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,170.0,1980-09-23,"[[2004-10-04, 1, 250000], [2007-06-19, 1, 2000..."
2,186.0,1968-12-18,"[[2004-10-04, 2, 0], [2005-07-27, 2, 250000]]"
3,180.0,1967-12-29,"[[2004-10-04, 3, 0]]"
4,179.0,1968-03-09,"[[2004-10-04, 4, 400000], [2005-02-19, 4, 3000..."
5,191.0,1973-05-17,"[[2004-10-04, 5, 1400000], [2006-01-09, 5, 130..."


In [65]:
p_i_df.reindex(m_i_df['away3_id']).sort_index().equals(
    p_i_df.merge(m_i_df[['away3_id']],left_on='playerid',right_on='away3_id',
            how='right').set_index('away3_id').sort_index()
    )

True

In [73]:
def get_fitted_player_values(kwargs):
    return kwargs['p_i_df']\
                  .reindex(kwargs['m_df'].iloc[:,0])\
                  .assign(iskeeper=lambda _: (kwargs['m_df'].loc[:,kwargs['c'].replace('_id','_pos')] == KEEPER_POS).values)\
                  .assign(mv=lambda df_p: list_of_mvals(df_p['mvals'].values,
                                                        kwargs['m_df'].iloc[:,1].values))\
                  .drop('mvals',axis=1)\
                  .assign(age=lambda df:(kwargs['m_df'].loc[:,'dateform'].values - \
                                        df.loc[:,'dobform'].values)\
                                      .astype(float) / (10 ** 9 * 24 * 60 * 60 * 365))\
                  .rename(columns=lambda x: '_'.join([kwargs['c'].replace('_id',''),x]))\
                  .reset_index(drop=True)

In [66]:
p_i_df.dtypes

height            float64
dobform    datetime64[ns]
mvals              object
dtype: object

## where process would start

In [28]:
m_i_df = pd.read_csv(folder + 'm_info.csv')

In [29]:
KEEPER_POS = 'top: 80%; left: 40%;'

In [69]:
[side for side in ['home','away']]

['home', 'away']

In [74]:
%%time
player_cols = [c for c in m_i_df.columns if c.endswith('_id')]
pool = multiprocessing.Pool(multiprocessing.cpu_count())

merged = m_i_df.merge(prep,
                      left_on=['date','score','home_team','away_team'],right_index=True,how='left')\
               .reset_index(drop=True).assign(dateform=lambda df: pd.to_datetime(df['date']))\
               .pipe(lambda df: pd.concat(
                   [df.rename(columns={'odds1':'home_odds','odds2':'away_odds'}),
                    df['score'].str.split('-',expand=True).astype(int)\
                               .rename(columns={0:'home_score',
                                                1:'away_score'})] + \
                   pool.map(get_fitted_player_values,[{'p_i_df':p_i_df,
                                                       'c':c,
                                                       'm_df':df.loc[:,[c,'date','dateform',
                                                                 c.replace('_id','_pos')]]}
                                                      for c in player_cols]) + \
                   [t_i_df.reindex(df[tc])\
                          .rename(columns= lambda x: '_'.join([tc.split('_')[0],x]))\
                          .reset_index(drop=True)
                                    for tc in ['home_team','away_team']]
                   ,axis=1))\
               .pipe(lambda df: pd.concat(
                   [df] + \
                   [df.loc[:,df.columns.str.contains(side)]\
                      .assign(avg_val=lambda df2: df2.loc[:,df2.columns.str.endswith('_mv')]\
                                                     .mean(axis=1))\
                      .assign(avg_height=lambda df2: df2.loc[:,df2.columns.str.endswith('_height')]
                                                        .mean(axis=1))\
                      .assign(avg_age=lambda df2: df2.loc[:,df2.columns.str.endswith('_age')]
                                                        .mean(axis=1))\
                      .loc[:,lambda df3: df3.columns.str.startswith('avg_')]\
                      .rename(columns=lambda x: x.replace('avg',side))
                   for side in ['home','away']],axis=1))\
               .pipe(lambda df: pd.concat(
                   [df] + \
                   [df.assign(side_valdiff=lambda df2: df2['%s_val' % side] - df2['%s_val' % opp])\
                      .assign(side_heightdiff=lambda df2: df2['%s_height' % side] - df2['%s_height' % opp])\
                      .assign(side_agediff=lambda df2: df2['%s_age' % side] - df2['%s_age' % opp])\
                      .assign(side_seatdiff=lambda df2: df2['%s_seats' % side] - df2['%s_seats' % opp])\
                      .assign(side_favorite=lambda df2: df2['%s_odds' % opp] > df2['%s_odds' % side])\
                      .assign(side_underdog=lambda df2: df2['%s_odds' % opp] < df2['%s_odds' % side])\
                      .assign(side_winner=lambda df2: df2['%s_score' % opp] < df2['%s_score' % side])\
                      .assign(side_loser=lambda df2: df2['%s_score' % opp] > df2['%s_score' % side])\
                      .assign(side_gd=lambda df2: df2['%s_score' % side] - df2['%s_score' % opp])\
                      .assign(side_totgoals=lambda df2: df2['%s_score' % side] + df2['%s_score' % opp])\
                      .assign(side_noconcede=lambda df2: df2['%s_score' % opp] == 0)\
                      .assign(side_concede=lambda df2: df2['%s_score' % opp])\
                      .assign(side_oddsdraw=lambda df2: df2['oddsx'])\
                      .assign(side_loseprofit=lambda df2: (df2['side_loser'].astype(int) * \
                                                           df2['%s_odds' % opp]) - 1)\
                      .assign(side_ishome=lambda df2: side == 'home')\
                      .loc[:,lambda df3: df3.columns.str.startswith('side_')]\
                      .rename(columns=lambda x: x.replace('side',side))
                   for side,opp in [['home','away'],['away','home']]],axis=1))
pool.close()
pool.join()
pool.terminate()

CPU times: user 16.1 s, sys: 5.32 s, total: 21.4 s
Wall time: 24.7 s


## FIND THIS!

In [75]:
merged.shape

(168081, 211)

In [76]:
m_i_df.shape

(168075, 54)

In [77]:
def re_multi(c):
    if '_' in c:
        _sp = c.split('_')
        if _sp[0] in playercol_ids:
            return ('player',_sp[0],_sp[1])
        else:
            return ('team',_sp[0],_sp[1])
    else:
        return ('match','all',c)

In [78]:
playercol_ids = [c.replace('_id','') for c in player_cols]

In [79]:
def team_to_pc(c):
    if 'home' in c:
        return (c,'home','away')
    return (c,'away','home')

In [80]:
playercol_ids_with_teams = [team_to_pc(c) for c in playercol_ids]

In [81]:
merged.columns = pd.MultiIndex.from_tuples([re_multi(c) for c in merged.columns],names=['kind','inst','info'])

In [86]:
merged.loc[:,('team','home')].columns

  return self._getitem_tuple(key)


Index(['formation', 'manager', 'team', 'odds', 'score', 'name', 'seats', 'val',
       'height', 'age', 'valdiff', 'heightdiff', 'agediff', 'seatdiff',
       'favorite', 'underdog', 'winner', 'loser', 'gd', 'totgoals',
       'noconcede', 'concede', 'oddsdraw', 'loseprofit', 'ishome'],
      dtype='object', name='info')

In [91]:
merged.loc[:,('player','away1')]

  return self._getitem_tuple(key)


info,id,pos,height,dobform,iskeeper,mv,age
0,34370,top: 80%; left: 40%;,196.0,1981-11-03,True,4500000.0,33.906849
1,59373,top: 61%; left: 73%;,180.0,1988-07-10,False,300000.0,20.736986
2,45601,top: 80%; left: 40%;,188.0,1990-06-15,True,100000.0,21.515068
3,93763,top: 80%; left: 40%;,189.0,1992-06-02,True,250000.0,22.430137
4,128969,top: 80%; left: 40%;,184.0,1992-04-08,True,4000000.0,23.054795
5,89649,top: 80%; left: 40%;,188.0,1993-06-20,True,200000.0,21.364384
6,27539,top: 80%; left: 40%;,205.0,1977-07-21,True,900000.0,34.183562
7,14555,top: 80%; left: 40%;,188.0,1985-09-03,True,3100000.0,26.057534
8,28124,top: 80%; left: 40%;,190.0,1983-07-19,True,1200000.0,28.183562
9,7110,top: 80%; left: 40%;,186.0,1978-10-01,True,850000.0,32.983562


In [155]:
%%time
full_player_df = pd.concat([pd.concat([merged['player'][c],
                      merged['match']['all'][['dateform']],
                     merged['team'][ct],
                     merged['team'][cnt].rename(columns=lambda x: 'opposition_%s' % x)],axis=1)
                   for c,ct,cnt in playercol_ids_with_teams],axis=0).reset_index(drop=True)

CPU times: user 4.82 s, sys: 4.2 s, total: 9.02 s
Wall time: 13.5 s


In [156]:
%%time
full_team_df = pd.concat([pd.concat([merged['team'][ct],
                                     merged['match']['all'][['dateform']],
                                     merged['team'][cnt].rename(columns=lambda x: 'opposition_%s' % x)],axis=1)
                   for ct,cnt in [['home','away'],['away','home']]],axis=0).reset_index(drop=True)

CPU times: user 417 ms, sys: 76.3 ms, total: 493 ms
Wall time: 987 ms


In [157]:
%%time
general_out = [
          {'most-used-formation':full_team_df['formation'].value_counts().index[0]},
          {'number-of-players-with-no-games':(~p_i_df.index.isin(full_player_df['id'].unique())).sum()},
          {'largest-odds-overcome-in-game':full_team_df.loc[full_team_df['winner']].sort_values('odds',ascending=False).iloc[0,:]['odds']},
          {'biggest-value-difference':full_team_df.sort_values('valdiff').iloc[0,:]['valdiff']},
          {'biggest-value-difference-upset':full_team_df[full_team_df['winner']].sort_values('valdiff').iloc[0,:]['valdiff']}, # an upset means the unexpected team won
          {'biggest-value-difference-with-higher-odds':full_team_df.loc[full_team_df['favorite'],'valdiff'].min()},
          {'biggest-stadium-capacity-difference-upset':full_team_df.loc[full_team_df['winner'],'valdiff'].min()},
          {'largest-height-difference-overcome-in-game':full_team_df.loc[full_team_df['winner'],'heightdiff'].min()},
          {'biggest-age-difference-between-teams-match-id':full_team_df.sort_values('agediff',ascending=False).index[0]},
    ]

CPU times: user 366 ms, sys: 200 µs, total: 366 ms
Wall time: 412 ms


In [158]:
general_out

[{'most-used-formation': '4-2-3-1'},
 {'number-of-players-with-no-games': 227},
 {'largest-odds-overcome-in-game': 24.050000000000001},
 {'biggest-value-difference': -50600000.0},
 {'biggest-value-difference-upset': -40636363.63636364},
 {'biggest-value-difference-with-higher-odds': -29454545.454545453},
 {'biggest-stadium-capacity-difference-upset': -40636363.63636364},
 {'largest-height-difference-overcome-in-game': -184.0},
 {'biggest-age-difference-between-teams-match-id': 298381}]

In [159]:
%%time
team_level_out = [
          {'capacity-of-stadium-of-team-with-most-games':t_i_df.loc[full_team_df.groupby('team').agg({'score':'count'}).idxmax().values].iloc[0,1]},
          {'id-of-oldest-team-to-win-a-game':full_team_df[full_team_df['winner']].sort_values('age').iloc[0]['team']},
          {'median-of-winning-team-average-age':full_team_df.loc[full_team_df['winner'],'age'].median()},
          {'median-of-favorite-team-average-age':full_team_df.loc[full_team_df['favorite'],'age'].median()}, # favorite means has lower odds of winning
          {'median-of-underdog-team-average-age':full_team_df.loc[full_team_df['underdog'],'age'].median()}, # underdog means has higher odds of winning
          {'team-with-most-wins-as-underdog':full_team_df.loc[full_team_df['underdog'] & \
                                                              full_team_df['winner'],'team']\
                                                         .value_counts().index[0]},
          {'team-with-most-losses-as-favorite':full_team_df.loc[full_team_df['favorite'] & \
                                                              full_team_df['loser'],'team']\
                                                         .value_counts().index[0]},
          {'team-with-lowest-average-odds-of-draw':full_team_df.groupby('team').agg({'oddsdraw':'mean'}).sort_values('oddsdraw').index[0]},
          {'stadium-capactiy-of-team-with-most-avg-goals-in-a-game':full_team_df.groupby('team').agg({'totgoals':'mean'})['totgoals'].idxmax()},#átlagosan leggólgazdagabb meccseket játszó csapat stadionjának befogadóképessége
          {'team-with-highest-profit-for-losing':full_team_df.groupby('team').agg({'loseprofit':'sum'})['loseprofit'].idxmax()},#a csapat, akinek, ha minden meccsén ellenük fogadsz, összesítve a legnagyobb profitot termeli (mindig ugyanakkora összeggel fogadsz rá) 
          {'largest-std-in-goal-difference-team':full_team_df.groupby('team').agg({'gd':'std'})['gd'].idxmax()},#a legnagyobb gólkülönbség szórással rendelkező csapat    
]

CPU times: user 117 ms, sys: 0 ns, total: 117 ms
Wall time: 141 ms


In [160]:
team_level_out

[{'capacity-of-stadium-of-team-with-most-games': 75000.0},
 {'id-of-oldest-team-to-win-a-game': 54045},
 {'median-of-winning-team-average-age': 26.25155666251557},
 {'median-of-favorite-team-average-age': 26.41818181818182},
 {'median-of-underdog-team-average-age': 26.286924034869237},
 {'team-with-most-wins-as-underdog': 18},
 {'team-with-most-losses-as-favorite': 15},
 {'team-with-lowest-average-odds-of-draw': 34499},
 {'stadium-capactiy-of-team-with-most-avg-goals-in-a-game': 22820},
 {'team-with-highest-profit-for-losing': 265},
 {'largest-std-in-goal-difference-team': 36039}]

In [161]:
#%time
player_level_out = [ 
          {'player-with-highest-number-of-games':full_player_df['id'].value_counts().index[0]},
          {'player-with-highest-number-of-games-where-his-team-didnt-concede':full_player_df.loc[full_player_df['noconcede'],'id'].value_counts().index[0]},
          {'most-games-played-in-same-position-by-player':full_player_df.groupby(['id','formation']).agg({'score':'count'})['score'].max()},
          {'most-different-positions-by-player':full_player_df.groupby('id').agg({'pos':'nunique'})['pos'].max()},
          {'most-different-formations-by-player':full_player_df.groupby('id').agg({'formation':'nunique'})['formation'].max()},
          {'goalkeeper-with-most-clean-sheets':full_player_df.loc[full_player_df['iskeeper'].fillna(False) & \
                                                                  full_player_df['noconcede'],'id']\
                                                             .value_counts().index[0]},#a legtöbb kapott gól nélküli meccset lehozó kapus
          {'dob-of-gk-with-most-average-goals-conceded':full_player_df.loc[full_player_df['iskeeper'].fillna(False),
                                                                           ['id','dobform','opposition_score']]
                                                                      .groupby('id')\
                                                                      .agg({'opposition_score':'mean'})\
                                                                      .pipe(lambda df: p_i_df.loc[df['opposition_score'].idxmax(),'dobform'])},#az átlagosan legtöbb gólt kapó kapus születési dátuma
          {'player-with-most-different-teams':full_player_df.groupby('id').agg({'team':'nunique'})['team'].idxmax()},#a legtöbb csapatban pályára lépő játékos
          {'oldest-player-to-win-a-home-game':full_player_df.loc[full_player_df.loc[full_player_df['winner'] & \
                                                                 full_player_df['ishome'],'age']\
                                                            .idxmax(),'id']}#a legidősebb hazai pályán győztes meccset játszó játékos           
    ]

In [92]:
p_i_df.loc[17259,:]

height                                                   193
dobform                                  1986-03-27 00:00:00
mvals      [[2004-10-04, 17259, 0], [2005-03-17, 17259, 7...
Name: 17259, dtype: object

In [162]:
player_level_out

[{'player-with-highest-number-of-games': 2219},
 {'player-with-highest-number-of-games-where-his-team-didnt-concede': 2219},
 {'most-games-played-in-same-position-by-player': 350},
 {'most-different-positions-by-player': 35},
 {'most-different-formations-by-player': 24},
 {'goalkeeper-with-most-clean-sheets': 17259},
 {'dob-of-gk-with-most-average-goals-conceded': Timestamp('1973-04-24 00:00:00')},
 {'player-with-most-different-teams': 4277},
 {'oldest-player-to-win-a-home-game': 1941427    366116
  1953670     16893
  Name: id, dtype: int64}]

In [None]:
posagg = full_player_df.groupby('position').agg({'val':'mean',
                                                 'height':'mean',
                                                 'age':'mean'})
player_level_out2 = [ 
          {'position-with-highest-average-value':posagg['val'].idxmax()},
          {'position-with-largest-average-height':posagg['height'].idxmax()},
          {'position-with-youngest-average-age':posagg['age'].idxmin()}
    ]

In [None]:
def get_streaks(tdf):
    return tdf.sort_values('date')\
       .pipe(lambda df: pd.concat([
                        df] + 
                        [df.loc[:,[res]]
                           .assign(streak_no= lambda df2: (df2[res] != df2[res].shift()).cumsum())\
                           .groupby('streak_no')\
                           .cumsum()\
                           .rename(columns = lambda x: x + '_streak')
                         for res in ['winner','loser']
    ]))
       

In [None]:
w_streaks = full_team_df.groupby('team').apply(get_streaks)

w_h_streaks = full_team_df.loc[full_team_df['ishome']].groupby('team').apply(get_streaks)

rolling_out = [
          {'longest-time-in-days-between-two-games-for-player':full_player_df.groupby('id')\
                                                                             .agg(lambda df: (df['dateform'].shift() - \
                                                                                             df['dateform']).max())\
                                                                             .idxmax()},
          {'longest-losing-streak-team':w_streaks.loc[w_streaks['loser_streak'].idxmax(),'team']},#a leghosszabb vesztes széria by team
          {'longest-home-winning-streak-stadium-capacity':w_h_streaks.loc[w_h_streaks['winner_streak'].idxmax(),'team']},#leghosszabb hazai pályás győzelmi széria helyszínének befogadóképessége
          
]

bonus_out = [
          {'win-ratio-of-actual-highest-rated-player':None},#az adott időpillanatban legértékesebb játékos átlagos win ratioja
]

In [13]:
sample_output = [ 
          {'most-used-formation':None},
          {'number-of-players-with-no-games':None},
          {'player-with-highest-number-of-games':None},
          {'player-with-highest-number-of-games-where-his-team-didnt-concede':None},
          {'most-games-played-in-same-position-by-player':None},
          {'most-different-positions-by-player':None},
          {'most-different-formations-by-player':None},
          {'largest-odds-overcome-in-game':None},
          {'largest-height-difference-overcome-in-game':None},
          {'longest-time-in-days-between-two-games-for-player':None},
          {'biggest-value-difference':None},
          {'biggest-value-difference-upset':None}, # an upset means the unexpected team won
          {'biggest-value-difference-with-higher-odds':None},
          {'biggest-stadium-capacity-difference-upset':None},
          {'capacity-of-stadium-of-team-with-most-games':None},
          {'id-of-oldest-team-to-win-a-game':None},
          {'biggest-age-difference-between-teams-match-id':None},
          {'median-of-winning-team-average-age':None},
          {'median-of-favorite-team-average-age':None}, # favorite means has lower odds of winning
          {'median-of-underdog-team-average-age':None}, # underdog means has higher odds of winning
          {'team-with-most-wins-as-underdog':None},
          {'team-with-most-losses-as-favorite':None},
          {'team-with-lowest-average-odds-of-draw':None},
          {'position-with-highest-average-value':None},
          {'position-with-largest-average-height':None},
          {'position-with-youngest-average-age':None},
          {'goalkeeper-with-most-clean-sheets':None},#a legtöbb kapott gól nélküli meccset lehozó kapus
          {'dob-of-gk-with-most-average-goals-conceded':None},#az átlagosan legtöbb gólt kapó kapus születési dátuma
          {'stadium-capactiy-of-team-with-most-avg-goals-in-a-game':None},#átlagosan leggólgazdagabb meccseket játszó csapat stadionjának befogadóképessége
          {'team-with-highest-profit-for-losing':None},#a csapat, akinek, ha minden meccsén ellenük fogadsz, összesítve a legnagyobb profitot termeli (mindig ugyanakkora összeggel fogadsz rá) 
          {'largest-std-in-goal-difference-team':None},#a legnagyobb gólkülönbség szórással rendelkező csapat
          {'player-with-most-different-teams':None},#a legtöbb csapatban pályára lépő játékos
          {'longest-losing-streak-team':None},#a leghosszabb vesztes széria by team
          {'longest-home-winning-streak-stadium-capacity':None},#leghosszabb hazai pályás győzelmi széria helyszínének befogadóképessége
          {'win-ratio-of-actual-highest-rated-player':None},#az adott időpillanatban legértékesebb játékos átlagos win ratioja
          {'oldest-player-to-win-a-home-game':None}#a legidősebb hazai pályán győztes meccset játszó játékos           
    ]