In [37]:
import pprint
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from json_shot_scraper import flatten_shot, flatten_goal, flatten_complete_pass, flatten_incomplete_pass, flatten_corner
from  player_scraper import flatten_player, flatten_sub
from dataframe_cleaner import (pass_to_shot, corner_to_shot, transpose_coordinates, coord_to_yards, 
                               shot_distance_angle, dummy_columns, drop_own_goals, goal_dummy, minutes_played)

In [2]:
from html_scraper import db
from mongo_to_db import game_to_cleaned_df, create_frame, create_master_df

In [3]:
pd.set_option('display.max_columns', 50)

In [4]:
with open ('data/test.json') as f:
    data = json.load(f)
    game_id = 448628

# Players

In [38]:
players = list(data['players'].items())
player_list_dicts = [flatten_player(player, game_id) for player in players]
player_df = pd.DataFrame(player_list_dicts)

In [39]:
player_df.head()

Unnamed: 0,game_id,name,player_id,position_id,squad_number,substitute,team_id
0,448628,Luciano Pocrnjic,382.0,1,1.0,False,122
1,448628,Fabián Assmann,8724.0,1,35.0,True,122
2,448628,Emanuel Iñíguez,205120.0,2,21.0,False,122
3,448628,Emiliano Amor,137579.0,2,32.0,False,122
4,448628,Leonel Galeano,47808.0,2,6.0,False,122


In [16]:
starters = player_df[(player_df['substitute'] == False) & player_df['squad_number'].notnull()].copy()

In [17]:
starters

Unnamed: 0,game_id,name,player_id,position_id,squad_number,substitute,team_id
0,448628,Luciano Pocrnjic,382.0,1,1.0,False,122
2,448628,Emanuel Iñíguez,205120.0,2,21.0,False,122
3,448628,Emiliano Amor,137579.0,2,32.0,False,122
4,448628,Leonel Galeano,47808.0,2,6.0,False,122
5,448628,Lucas Villalba,89096.0,2,23.0,False,122
8,448628,Nahuel Yeri,55627.0,3,25.0,False,122
9,448628,Fernando Godoy,45616.0,3,15.0,False,122
10,448628,Iván Colman,114498.0,3,28.0,False,122
13,448628,Facundo Castillón,20478.0,4,19.0,False,122
14,448628,Fernando Telechea,29345.0,4,9.0,False,122


# Subs

In [8]:
subs = list(data['incidences']['substitutions'].items())
subs_dicts = [flatten_sub(sub, game_id) for sub in subs]
subs_df = pd.DataFrame(subs_dicts)

In [9]:
subs_df

Unnamed: 0,game_id,player_off,player_on,sub_id,sub_type,team_id,time_of_event(min)
0,448628,56334.0,186030.0,23171005,7,490,40.183333
1,448628,126700.0,15393.0,23171619,7,490,45.0
2,448628,114498.0,63356.0,23172011,7,122,65.05
3,448628,20478.0,163289.0,23172324,7,122,75.233333
4,448628,145031.0,19308.0,23172334,7,490,75.566667
5,448628,29345.0,10752.0,23172547,7,122,81.75


# Merge

In [19]:
player_df.head()

Unnamed: 0,game_id,name,player_id,position_id,squad_number,substitute,team_id
0,448628,Luciano Pocrnjic,382.0,1,1.0,False,122
1,448628,Fabián Assmann,8724.0,1,35.0,True,122
2,448628,Emanuel Iñíguez,205120.0,2,21.0,False,122
3,448628,Emiliano Amor,137579.0,2,32.0,False,122
4,448628,Leonel Galeano,47808.0,2,6.0,False,122


In [20]:
player_df['minutes'] = player_df['substitute'] * 90

In [21]:
player_df.head()

Unnamed: 0,game_id,name,player_id,position_id,squad_number,substitute,team_id,minutes
0,448628,Luciano Pocrnjic,382.0,1,1.0,False,122,0
1,448628,Fabián Assmann,8724.0,1,35.0,True,122,90
2,448628,Emanuel Iñíguez,205120.0,2,21.0,False,122,0
3,448628,Emiliano Amor,137579.0,2,32.0,False,122,0
4,448628,Leonel Galeano,47808.0,2,6.0,False,122,0


In [22]:
subs_df
    

Unnamed: 0,game_id,player_off,player_on,sub_id,sub_type,team_id,time_of_event(min)
0,448628,56334.0,186030.0,23171005,7,490,40.183333
1,448628,126700.0,15393.0,23171619,7,490,45.0
2,448628,114498.0,63356.0,23172011,7,122,65.05
3,448628,20478.0,163289.0,23172324,7,122,75.233333
4,448628,145031.0,19308.0,23172334,7,490,75.566667
5,448628,29345.0,10752.0,23172547,7,122,81.75


In [35]:
for indx, row1 in subs_df.iterrows():
    player_off = row1['player_off']
    player_on = row1['player_on']
    minute = row1['time_of_event(min)']
    for indx, row2 in player_df.iterrows():
        if player_off == row2['player_id']:
            player_df.loc[indx, 'minutes'] = minute
        elif player_on == row2['player_id']:
            player_df.loc[indx, 'minutes'] = 90 - minute

In [40]:
player_df.head()

Unnamed: 0,game_id,name,player_id,position_id,squad_number,substitute,team_id
0,448628,Luciano Pocrnjic,382.0,1,1.0,False,122
1,448628,Fabián Assmann,8724.0,1,35.0,True,122
2,448628,Emanuel Iñíguez,205120.0,2,21.0,False,122
3,448628,Emiliano Amor,137579.0,2,32.0,False,122
4,448628,Leonel Galeano,47808.0,2,6.0,False,122


In [48]:
player_minutes_df = minutes_played(subs_df, player_df)

In [50]:
player_minutes_df.head()

Unnamed: 0,game_id,name,player_id,position_id,squad_number,substitute,team_id,minutes,minutes_played
0,448628,Luciano Pocrnjic,382.0,1,1.0,False,122,0.0,0.0
1,448628,Fabián Assmann,8724.0,1,35.0,True,122,90.0,90.0
2,448628,Emanuel Iñíguez,205120.0,2,21.0,False,122,0.0,0.0
3,448628,Emiliano Amor,137579.0,2,32.0,False,122,0.0,0.0
4,448628,Leonel Galeano,47808.0,2,6.0,False,122,0.0,0.0
