In [1]:
import os

# retrieve local copies of raw and prepared data from dvc
# checkout the readme for instructions about how to gain access

# os.system("dvc pull")

home = os.path.expanduser("~/transfermarkt-datasets") 
os.chdir(home)

In [2]:
# initialize the asset runner

import pandas as pd
from prep.asset_runner import AssetRunner

runner = AssetRunner('data/raw')

In [3]:
# kickoff processing. this will load and post-process the data
# after the processing, raw and prepared datasets are available as pandas dataframes


runner.process_assets()

Name          Path      Seasons
------------  --------  ------------------------------------------------
games         data/raw  [2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021]
clubs         data/raw  [2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021]
players       data/raw  [2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021]
appearances   data/raw  [2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021]
competitions  data/raw  [2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021]

--- Loading 5 assets ---
---- Processing games
metric      season    home_club_goals    away_club_goals
--------  --------  -----------------  -----------------
count     43161.00           43161.00           43161.00
mean       2017.24               1.59               1.33
std           9.97               1.44               1.37
min          -1.00               0.00               0.00
25%        2015.00               1.00               0.00
50%        2017.00               1.00               1.00
75%        2019.00        

In [4]:
# list available assets

print(runner.prettify_asset_processors())

Name          Path      Seasons
------------  --------  ------------------------------------------------
games         data/raw  [2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021]
clubs         data/raw  [2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021]
players       data/raw  [2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021]
appearances   data/raw  [2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021]
competitions  data/raw  [2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021]


In [5]:
# get a reference to one asset

asset = runner.assets[0]
asset['name']

'games'

In [6]:
# get a reference to the pandas dataframes containing raw and prepared data

clubs = asset['processor']

raw = clubs.raw_dfs
prep = clubs.prep_df

prep.describe()

Unnamed: 0,season,home_club_goals,away_club_goals
count,43161.0,43161.0,43161.0
mean,2017.241607,1.589861,1.325317
std,9.973919,1.436884,1.371118
min,-1.0,0.0,0.0
25%,2015.0,1.0,0.0
50%,2017.0,1.0,1.0
75%,2019.0,2.0,2.0
max,2021.0,16.0,16.0


In [7]:
# if you only need the perpare dataset you can fetch it like this

players = runner.get_asset_df("players")

Unnamed: 0,player_id,last_season,current_club_id,name,pretty_name,country_of_birth,country_of_citizenship,date_of_birth,position,sub_position,foot,height_in_cm,market_value_in_gbp,highest_market_value_in_gbp,url
26,38790,2014,28095,dmitri-golubov,Dmitri Golubov,UdSSR,Russia,1985-06-24,Attack,Centre-Forward,Both,178,,675000.0,https://www.transfermarkt.co.uk/dmitri-golubov...
34,106539,2014,28095,aleksandr-vasiljev,Aleksandr Vasiljev,Russia,Russia,1992-01-23,Attack,Right Winger,Right,182,,225000.0,https://www.transfermarkt.co.uk/aleksandr-vasi...
46,164389,2014,2288,rory-donnelly,Rory Donnelly,Northern Ireland,Northern Ireland,1992-02-18,Attack,Centre-Forward,Right,188,113000.0,225000.0,https://www.transfermarkt.co.uk/rory-donnelly/...
50,45247,2014,1162,damien-perquis,Damien Perquis,France,France,1986-03-08,Goalkeeper,Goalkeeper,Right,186,,900000.0,https://www.transfermarkt.co.uk/damien-perquis...
51,97205,2014,28095,nikita-bezlikhotnov,Nikita Bezlikhotnov,UdSSR,Russia,1990-08-19,Attack,Right Winger,Right,179,,675000.0,https://www.transfermarkt.co.uk/nikita-bezlikh...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6577,334802,2021,932,jan-kuchta,Jan Kuchta,,Czech Republic,1997-01-08,Attack,Centre-Forward,Right,184,4500000.0,4500000.0,https://www.transfermarkt.co.uk/jan-kuchta/pro...
6578,268112,2021,932,gyrano-kerk,Gyrano Kerk,Netherlands,Netherlands,1995-12-02,Attack,Right Winger,Right,184,5400000.0,5850000.0,https://www.transfermarkt.co.uk/gyrano-kerk/pr...
6579,333802,2021,932,francois-kamano,Francois Kamano,Guinea,Guinea,1996-05-01,Attack,Left Winger,Right,182,5850000.0,13500000.0,https://www.transfermarkt.co.uk/francois-kaman...
6580,539065,2021,932,alexis-beka-beka,Alexis Beka Beka,France,France,2001-03-29,Midfield,Defensive Midfield,Right,178,3150000.0,3150000.0,https://www.transfermarkt.co.uk/alexis-beka-be...
