In [461]:
import pandas as pd
import pickle
import aiohttp
import asyncio
import nest_asyncio

from understat import Understat

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
nest_asyncio.apply()

In [462]:
def routine(functionality):
    loop = asyncio.get_event_loop()
    return loop.run_until_complete(functionality)

In [463]:
async def get_team_fixtures(team, season, date=None):

    async with aiohttp.ClientSession() as session:
        understat = Understat(session)
        fixtures = await understat.get_team_fixtures(
            team_name=team,
            season=season
            )
    df = pd.DataFrame(fixtures)
    
    df['team'] = team
    df['homeTeam'] = df['h'].apply(lambda x: x.get('title'))
    df['awayTeam'] = df['a'].apply(lambda x: x.get('title'))
    df['date'] = pd.to_datetime(df['datetime'])
    
    if date:
        df = df[df.date >= date]
    else:
        df = df[df.date >= pd.Timestamp('today')]
    
    return df[['team', 'homeTeam', 'awayTeam', 'date']].head(1)

In [464]:
async def get_player_stats(player, season, date=None):    
    async with aiohttp.ClientSession() as session:
        understat = Understat(session)
        
        player = await understat.get_league_players(
            league_name ="Serie A", 
            season='2021',
            player_name=player
        )
        
        if len(player) == 0:
            return pd.DataFrame(
            data=[[0, 'Sub', 0, 0, 0, 0, 0, 0]],
            columns=['player_id', 'position', 'G90P', 'A90P', 's90P', 'xG90P', 'xA90P', 'time'])
        
        player_id = player[0]['id']
        
        matches = pd.DataFrame(await understat.get_player_matches(player_id))
        
        position = matches['position'].tolist()[0]
        
        matches = matches[[
            'season',
            'goals',
            'assists',
            'shots',
            'xG',
            'xA',
            'time',
            'date'
        ]]
        
        matches = matches.astype({
            'goals': 'float',
            'assists': 'float',
            'shots': 'float',
            'xG': 'float',
            'xA': 'float',
            'time': 'float'
        })        
        matches['date'] = pd.to_datetime(matches['date'])
        
        matches = matches[matches['season'] == season]
        if date:
            matches = matches[matches['date'] <= date]
        matches = matches[matches['time'] >= 15]
        
        matches = matches.drop(columns=['date', 'season'])
        
        aggregated = matches.sum()
        
        time = aggregated['time']
        if time > 0:
            G90P = aggregated['goals'] * 90 / aggregated['time']
            A90P = aggregated['assists'] * 90 / aggregated['time']
            s90P = aggregated['shots'] * 90 / aggregated['time']
            xG90P = aggregated['xG'] * 90 / aggregated['time']
            xA90P = aggregated['xA'] * 90 / aggregated['time']
        else:
            G90P = 0
            A90P = 0
            s90P = 0
            xG90P = 0
            xA90P = 0

        return pd.DataFrame(
            data=[[player_id, position, G90P, A90P, s90P, xG90P, xA90P, time]],
            columns=['player_id', 'position', 'G90P', 'A90P', 's90P', 'xG90P', 'xA90P', 'time'])
            

In [465]:
async def get_team_stats(team_name, season, date=None, opponent=False):
    async with aiohttp.ClientSession() as session:
        understat = Understat(session)
        results = pd.DataFrame(await understat.get_team_results(team_name, season))
        
        results['date'] = pd.to_datetime(results['datetime'])
        if date:
            results = results[results['date'] <= date]
        
        results['GH'] = results['goals'].apply(lambda x: float(x.get('h')))
        results['GA'] = results['goals'].apply(lambda x: float(x.get('a')))
        results['xGH'] = results['xG'].apply(lambda x: float(x.get('h')))
        results['xGA'] = results['xG'].apply(lambda x: float(x.get('a')))
        
        results['GT'] = results['GH']
        results.loc[results['side']=='a', 'GT'] = results['GA']
        results['GCT'] = results['GA']
        results.loc[results['side']=='a', 'GCT'] = results['GH']
        
        results['xGT'] = results['xGH']
        results.loc[results['side']=='a', 'xGT'] = results['xGA']
        results['xGCT'] = results['xGA']
        results.loc[results['side']=='a', 'xGCT'] = results['xGH']
        
        results['time'] = 90
        
        results = results[['GT', 'GCT', 'xGT', 'xGCT', 'time']]
        
        aggregated = results.sum()
        
        G90 = aggregated['GT'] * 90 / aggregated['time']
        GC90 = aggregated['GCT'] * 90 / aggregated['time']
        xG90T = aggregated['xGT'] * 90 / aggregated['time']
        xGC90T = aggregated['xGCT'] * 90 / aggregated['time']
        
        if opponent:
            columns = ['team', 'G90OT', 'GC90OT', 'xG90OT', 'xGC90OT']
        else:
            columns = ['team', 'G90T', 'GC90T', 'xG90T', 'xGC90T']
        return pd.DataFrame(
            data=[[team_name, G90, GC90, xG90T, xGC90T]],
            columns=columns)

In [466]:
league = 'fanta-camplus-citta-studi'
fanta_team = 'AI sereje ja dejeDe jebe tu de jebere'

In [467]:
my_team = pd.read_csv('resources/leagues/{}.csv'.format(league), index_col=0)
my_team = my_team[my_team.fantaSquad == fanta_team]
my_team = my_team[my_team.role != 'P']
my_team = my_team.reset_index(drop=True)
my_team = my_team[['role', 'name', 'team']]

In [468]:
fixtures = None
teams = my_team.team.tolist()

for team in teams:
    if fixtures is None:
        fixtures = routine(get_team_fixtures(team, '2021'))
    else:
        fixtures = fixtures.append(routine(get_team_fixtures(team, '2021')), ignore_index=True)
fixtures = fixtures[['homeTeam', 'awayTeam', 'date']]

In [469]:
team_stats = None
teams = my_team.team.tolist()

for team in teams:
    if team_stats is None:
        team_stats = routine(get_team_stats(team, '2021'))
    else:
        team_stats = team_stats.append(routine(get_team_stats(team, '2021')), ignore_index=True)
team_stats = team_stats[['G90T', 'GC90T', 'xG90T', 'xGC90T']]

In [470]:
teams = my_team.team.tolist()
home_teams = fixtures.homeTeam.tolist()
away_teams = fixtures.awayTeam.tolist()
opponent_teams = []
for team, home_team, away_team in zip(teams, home_teams, away_teams):
    if home_team == team:
        opponent_teams.append(away_team)
    else:
        opponent_teams.append(home_team)

opponent_stats = None

for team in opponent_teams:
    if opponent_stats is None:
        opponent_stats = routine(get_team_stats(team, '2021', opponent=True))
    else:
        opponent_stats = opponent_stats.append(routine(get_team_stats(team, '2021', opponent=True)), ignore_index=True)
opponent_stats = opponent_stats[['G90OT', 'GC90OT', 'xG90OT', 'xGC90OT']]

In [471]:
player_stats = None
players = my_team.name.tolist()

for player in players:
    if player_stats is None:
        player_stats = routine(get_player_stats(player, '2021'))
    else:
        player_stats = player_stats.append(routine(get_player_stats(player, '2021')), ignore_index=True)
player_stats = player_stats[['position', 'G90P', 'A90P', 's90P', 'xG90P', 'xA90P', 'time']]

In [472]:
scoring = my_team.join([fixtures, player_stats, team_stats, opponent_stats])
scoring['homeOrAway'] = 'h'
scoring.loc[scoring.team == scoring.awayTeam, 'homeOrAway'] = 'a'
scoring['date'] = pd.to_datetime(scoring['date'])
scoring['month'] = scoring['date'].dt.month
scoring['day'] = scoring['date'].dt.day
scoring['weekDay'] = scoring['date'].dt.weekday
scoring['season'] = 2021
scoring['timePlayer'] = scoring['time']

In [473]:
scoring = scoring[[
    'role',
    'name',
    'team',
    
    'homeTeam',
    'awayTeam',
    'homeOrAway',
    
    'season',
    'month',
    'day',
    'weekDay',
    
    'position',
    'G90P',
    'A90P',
    's90P',
    'xG90P',
    'xA90P',
    'timePlayer',
    
    'G90T',
    'GC90T',
    'xG90T',
    'xGC90T',
    
    'G90OT',
    'GC90OT',
    'xG90OT',
    'xGC90OT'
]]

In [474]:
metadata = scoring[['role', 'name', 'team']]
X = scoring.drop(columns=['role', 'name', 'team'])
X.head()

Unnamed: 0,homeTeam,awayTeam,homeOrAway,season,month,day,weekDay,position,G90P,A90P,s90P,xG90P,xA90P,timePlayer,G90T,GC90T,xG90T,xGC90T,G90OT,GC90OT,xG90OT,xGC90OT
0,Spezia,Sampdoria,a,2021,1,23,6,DL,0.055762,0.0,0.167286,0.020621,0.072102,1614.0,1.318182,1.818182,1.084322,1.585449,1.045455,1.909091,1.06003,1.810059
1,Inter,Venezia,h,2021,1,22,5,DMR,0.08991,0.08991,0.629371,0.075845,0.109134,1001.0,2.428571,0.761905,2.30721,0.973313,0.904762,1.809524,0.940814,1.783486
2,Verona,Bologna,a,2021,1,21,4,MR,0.199852,0.066617,0.532939,0.096415,0.061458,1351.0,1.333333,1.666667,1.186769,1.405169,1.863636,1.681818,1.495431,1.309407
3,Lazio,Atalanta,a,2021,1,22,5,ML,0.196078,0.0,2.745098,0.266097,0.062404,459.0,2.095238,1.238095,1.862982,1.125639,2.090909,1.772727,1.423872,1.415425
4,Inter,Venezia,a,2021,1,22,5,DL,0.0,0.111386,0.222772,0.003278,0.062576,808.0,0.904762,1.809524,0.940814,1.783486,2.428571,0.761905,2.30721,0.973313


In [475]:
with open('model/transformer.pkl', 'rb') as f:
    ct = pickle.load(f)

In [476]:
X_values = ct.transform(X)

In [477]:
feature_names = ct.transformers_[0][2]
encoded_features = ct.transformers_[1][1].get_feature_names()

temp = []
for index, feature in enumerate(ct.transformers_[1][2]):
    for column in encoded_features:
        if 'x'+str(index)+'_' in column:
            temp.append(column.replace('x'+str(index)+'_', feature+'_'))
            
feature_names.extend(temp)

In [478]:
X = pd.DataFrame(data=X_values, columns=feature_names)

X.head()

Unnamed: 0,season,month,day,weekDay,G90P,A90P,s90P,xG90P,xA90P,timePlayer,G90T,GC90T,xG90T,xGC90T,G90OT,GC90OT,xG90OT,xGC90OT,homeTeam_AC Milan,homeTeam_Atalanta,homeTeam_Benevento,homeTeam_Bologna,homeTeam_Brescia,homeTeam_Cagliari,homeTeam_Carpi,homeTeam_Cesena,homeTeam_Chievo,homeTeam_Crotone,homeTeam_Empoli,homeTeam_Fiorentina,homeTeam_Frosinone,homeTeam_Genoa,homeTeam_Inter,homeTeam_Juventus,homeTeam_Lazio,homeTeam_Lecce,homeTeam_Napoli,homeTeam_Palermo,homeTeam_Parma,homeTeam_Parma Calcio 1913,homeTeam_Pescara,homeTeam_Roma,homeTeam_SPAL 2013,homeTeam_Salernitana,homeTeam_Sampdoria,homeTeam_Sassuolo,homeTeam_Spezia,homeTeam_Torino,homeTeam_Udinese,homeTeam_Venezia,homeTeam_Verona,awayTeam_AC Milan,awayTeam_Atalanta,awayTeam_Benevento,awayTeam_Bologna,awayTeam_Brescia,awayTeam_Cagliari,awayTeam_Carpi,awayTeam_Cesena,awayTeam_Chievo,awayTeam_Crotone,awayTeam_Empoli,awayTeam_Fiorentina,awayTeam_Frosinone,awayTeam_Genoa,awayTeam_Inter,awayTeam_Juventus,awayTeam_Lazio,awayTeam_Lecce,awayTeam_Napoli,awayTeam_Palermo,awayTeam_Parma,awayTeam_Parma Calcio 1913,awayTeam_Pescara,awayTeam_Roma,awayTeam_SPAL 2013,awayTeam_Salernitana,awayTeam_Sampdoria,awayTeam_Sassuolo,awayTeam_Spezia,awayTeam_Torino,awayTeam_Udinese,awayTeam_Venezia,awayTeam_Verona,homeOrAway_h,position_AMC,position_AML,position_AMR,position_DC,position_DL,position_DMC,position_DML,position_DMR,position_DR,position_FW,position_FWL,position_FWR,position_MC,position_ML,position_MR,position_Sub
0,1.656786,-1.471394,0.738638,0.6657,-0.319785,-0.499094,-0.901708,-0.611322,-0.184259,0.987887,-0.078215,0.816176,-0.501856,0.588886,-0.519508,0.986117,-0.558098,1.086789,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1.656786,-1.471394,0.626035,0.111395,-0.190222,-0.019583,-0.540919,-0.323868,0.124068,0.144659,1.709442,-1.08962,2.074106,-0.776914,-0.745962,0.80604,-0.808662,1.027601,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1.656786,-1.471394,0.513432,-0.442911,0.226916,-0.143809,-0.616211,-0.216796,-0.272886,0.626111,-0.053822,0.542803,-0.286057,0.186644,0.797412,0.575071,0.357013,-0.028354,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
3,1.656786,-1.471394,0.626035,0.111395,0.212598,-0.499094,1.111014,0.666446,-0.265008,-0.600903,1.172796,-0.23045,1.138357,-0.437044,1.163223,0.73949,0.206611,0.207789,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
4,1.656786,-1.471394,0.626035,0.111395,-0.531355,0.094953,-0.858385,-0.701599,-0.263577,-0.120827,-0.743795,0.800554,-0.80415,1.030746,1.706714,-1.088683,2.063181,-0.776961,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [479]:
with open('model/model.pkl', 'rb') as f:
    model = pickle.load(f)

In [480]:
y = model.predict(X)
scoring['bonus'] = y
final = scoring[['role', 'name', 'bonus']].sort_values(by=['role', 'bonus'], ascending=False)

In [481]:
final

Unnamed: 0,role,name,bonus
3,D,Robin Gosens,0.624009
6,D,Amir Rrahmani,0.322071
7,D,Milan Skriniar,0.285972
1,D,Matteo Darmian,0.271816
5,D,Kostas Manolas,0.234713
0,D,Tommaso Augello,0.188837
2,D,Lorenzo De Silvestri,0.147431
4,D,Ridgeciano Haps,0.001678
10,C,Antonio Candreva,0.799357
8,C,Giacomo Bonaventura,0.565136


In [482]:
formations = [
    [3, 5, 2],
    [3, 4, 3],
    [4, 5, 1],
    [4, 4, 2],
    [4, 3, 3],
    [5, 3, 2],
    [5, 2, 3],
    [5, 4, 1]
]

In [483]:
best_total = 0
best_formation = None

for formation in formations:
    score_defenders = (final[final.role == 'D'].head(formation[0]).sum())['bonus']
    score_midfielders = (final[final.role == 'C'].head(formation[1]).sum())['bonus']
    score_strikers = (final[final.role == 'A'].head(formation[2]).sum())['bonus']
    
    total = score_defenders + score_midfielders + score_strikers
    
    if total > best_total:
        best_total = total
        best_formation = formation

In [484]:
best_formation

[3, 4, 3]

In [485]:
best_total + 66

75.03118705749512

In [489]:
filename = '{}_{}_{}'.format(pd.Timestamp('today').day, pd.Timestamp('today').month, pd.Timestamp('today').year)

In [491]:
final.to_csv('resources/predictions/{}.csv'.format(filename))