In [1]:
import requests
import pandas as pd
from tqdm import tqdm

import matplotlib.pyplot as plt

In [2]:
base_url = "https://raw.githubusercontent.com/statsbomb/open-data/master/data/";

comp_url = base_url + "matches/{}/{}.json";

match_url = base_url + "events/{}.json";

In [3]:
def parse_data(comp_id, season_id):
    '''
    Returns the sum of two decimal numbers in binary digits.

    Parameters:
            comp_id (int): Competition id for Stats_Bomb database
            season_id (int): Season id for Stats_Bomb database

    Returns:
            events_df (arr): DataFrame containing all shots in 2018 World Cup
            and some of their attributes
    '''
    ## Get matches from url ##
    matches = requests.get(url=comp_url.format(comp_id, season_id)).json();
    
    ## Obtain match_ids ##
    match_ids = [m['match_id'] for m in matches];
    
    all_events = [];
    
    ## Iterate through, show progress bar ##
    for match_id in tqdm(match_ids):
        
        ## Get events ##
        events = requests.get(url=match_url.format(match_id)).json();
        
        shots = [x for x in events if x['type']['name'] == "Shot"];
        for s in shots:
            attributes = {
                "match_id":match_id,
                "team": s["possession_team"]["name"],
                "player": s['player']['name'],
                "x": s['location'][0],
                "y": s['location'][1],
                "outcome": s['shot']['outcome']['name'],
            }
            all_events.append(attributes)
    
    events_df = pd.DataFrame(all_events)
    
    return events_df
    

In [4]:
## Comp. id for the 2018 World Cup ##
comp_id = 43;

## Season id for the 2018 season ## 
season_id = 3;


In [5]:
## Extract data for all shots ## 
df = parse_data(comp_id, season_id);

100%|██████████████████████████████████████████████████████████████████████████████████| 64/64 [00:33<00:00,  1.91it/s]


In [6]:
## Exploration part ##

## Let's compare two prolific strikers and see who converted the most goals ##

## Let's normalize over total shots so we're not being biased ## 

## Harry Kane ##
HK = df.loc[df["player"]=="Harry Kane"]

## Antoine Griezmann ##
AG = df.loc[df["player"]=="Antoine Griezmann"]


In [7]:
## Create Stacked Bar Chart Visualising the results ## 

## First, some plotting aesthetics ##
