### Loading scraped data into the database (initial filling of tables or adding recently scraped data)

In [1]:
import numpy as np
import pandas as pd

import os
import sys

root_path = os.path.abspath(os.path.join('..')) # <- adjust such that root_path always points at the root project dir (i.e. if current file is two folders deep, use '../..'). 
if root_path not in sys.path:
    sys.path.append(root_path)

import database_server.db_utilities as dbu 
import database_server.db_inserts as inserts

### Table COUNTRIES

In [4]:
# check table
df = dbu.select_query("SELECT * FROM countries;")
df

Unnamed: 0,code,name
0,GER,Germany
1,FRA,France
2,ITA,Italy
3,ESP,Spain
4,ENG,England


In [3]:
inserts.insert_countries()

INSERT 0 1
rows processed: 1/5
INSERT 0 1
rows processed: 2/5
INSERT 0 1
rows processed: 3/5
INSERT 0 1
rows processed: 4/5
INSERT 0 1
rows processed: 5/5
------------------------------------------------------------
Inserted 5 new rows into countries table. 0 inserts rejected.


(5, 0)

### Table LEAGUES

In [7]:
# check table 
result_df = dbu.select_query("Select * from leagues;")
result_df

Unnamed: 0,id,fbref_id,name,country
0,1,9,Premier League,ENG
1,2,12,La Liga,ESP
2,3,13,Ligue 1,FRA
3,4,20,Bundesliga,GER
4,5,11,Serie A,ITA


In [6]:
inserts.insert_leagues()

INSERT 0 1
rows processed: 1/5
INSERT 0 1
rows processed: 2/5
INSERT 0 1
rows processed: 3/5
INSERT 0 1
rows processed: 4/5
INSERT 0 1
rows processed: 5/5
------------------------------------------------------------
Inserted 5 new rows into leagues table. 0 inserts rejected.


(5, 0)

### Tables TEAMS, MATCHES & MATCHSTATS

In [2]:
# check tables
result_df = dbu.select_query("Select * from teams;")
print(result_df.shape)
result_df.head()

(143, 4)


Unnamed: 0,id,fbref_id,name,country
0,1,822bd0ba,Liverpool,ENG
1,2,60c6b05f,West Brom,ENG
2,3,b2b47a98,Newcastle Utd,ENG
3,4,a2d435b3,Leicester City,ENG
4,5,943e8050,Burnley,ENG


In [3]:
# check tables
result_df = dbu.select_query("Select * from matches;")
print(result_df.shape)
result_df.head()

(10760, 9)


Unnamed: 0,id,fbref_id,league_id,home_team_id,away_team_id,schedule_date,schedule_time,schedule_round,schedule_day
0,1,a68e623d,3,89,109,2017-08-04,20:45:00,Matchweek 1,Fri
1,2,37f2c25f,3,92,111,2017-08-05,17:15:00,Matchweek 1,Sat
2,3,4d28b63b,3,100,105,2017-08-05,20:00:00,Matchweek 1,Sat
3,4,68b9eea2,3,103,97,2017-08-05,20:00:00,Matchweek 1,Sat
4,5,b2829d08,3,108,110,2017-08-05,20:00:00,Matchweek 1,Sat


In [8]:
# check tables
result_df = dbu.select_query("Select * from matchstats limit 10;")
print(result_df.shape)
result_df.head()

(10, 153)


Unnamed: 0,venue,result,gf,ga,xg,xga,attendance,captain,formation,referee,...,misc_performance_crdr,misc_performance_2crdy,misc_performance_fls,misc_performance_fld,misc_performance_off,misc_performance_og,misc_performance_recov,misc_aerialduels_won,misc_aerialduels_lost,misc_aerialduels_won_perc
0,Home,W,3,0,1.1,1.4,39226,Gianluigi Buffon,4-2-3-1,Fabio Maresca,...,0,0,13,10,2,0,57,8,9,47.1
1,Away,W,4,2,2.1,1.2,26296,Gianluigi Buffon,4-2-3-1,Luca Banti,...,0,0,8,12,0,1,43,10,8,55.6
2,Home,W,3,0,1.3,0.6,39457,Stephan Lichtsteiner,4-3-3,Michael Fabbri,...,0,0,15,6,4,0,50,10,10,50.0
3,Away,W,3,1,1.0,1.4,21584,Gianluigi Buffon,4-2-3-1,Davide Massa,...,0,0,14,15,1,0,56,13,7,65.0
4,Home,W,1,0,0.9,0.4,35652,Andrea Barzagli,4-2-3-1,Daniele Doveri,...,0,0,10,23,3,0,54,9,8,52.9


In [5]:
inserts.insert_match_data(include_archive=False, include_new=True)

INSERT 0 0
rows processed: 10/99
INSERT 0 0
rows processed: 20/99
INSERT 0 0
rows processed: 30/99
INSERT 0 0
rows processed: 40/99
INSERT 0 0
rows processed: 50/99
INSERT 0 0
rows processed: 60/99
INSERT 0 0
rows processed: 70/99
INSERT 0 0
rows processed: 80/99
INSERT 0 0
rows processed: 90/99
INSERT 0 0
rows processed: 99/99
------------------------------------------------------------
Inserted 0 new rows into teams table. 99 inserts rejected.
INSERT 0 0
rows processed: 50/1827
INSERT 0 0
rows processed: 100/1827
INSERT 0 0
rows processed: 150/1827
INSERT 0 0
rows processed: 200/1827
INSERT 0 0
rows processed: 250/1827
INSERT 0 0
rows processed: 300/1827
INSERT 0 0
rows processed: 350/1827
INSERT 0 0
rows processed: 400/1827
INSERT 0 0
rows processed: 450/1827
INSERT 0 0
rows processed: 500/1827
INSERT 0 0
rows processed: 550/1827
INSERT 0 0
rows processed: 600/1827
INSERT 0 0
rows processed: 650/1827
INSERT 0 0
rows processed: 700/1827
INSERT 0 0
rows processed: 750/1827
INSERT 0 0


### Table TEAMWAGES

In [9]:
# check table
teamwages_df = dbu.select_query("SELECT * FROM teamwages;")
teamwages_df.head()

Unnamed: 0,team_id,season_str,n_players,pct_estimated,weekly_wages_eur,weekly_wages_gbp,weekly_wages_usd,annual_wages_eur,annual_wages_gbp,annual_wages_usd
0,28,2017-2018,35,100.0,3810955,3195596,3883356,198169670,166171000,201934520
1,29,2017-2018,45,100.0,3629433,3043385,3698385,188730521,158256000,192316043
2,18,2017-2018,32,100.0,3474446,2913423,3540453,180671167,151498000,184103578
3,27,2017-2018,45,100.0,2790189,2339654,2843197,145089806,121662000,147846240
4,1,2017-2018,34,100.0,2194158,1839865,2235843,114096241,95673000,116263857


In [13]:
inserts.insert_teamwages()

INSERT 0 25
rows processed: 25/584
INSERT 0 25
rows processed: 50/584
INSERT 0 25
rows processed: 75/584
INSERT 0 25
rows processed: 100/584
INSERT 0 25
rows processed: 125/584
INSERT 0 25
rows processed: 150/584
INSERT 0 25
rows processed: 175/584
INSERT 0 25
rows processed: 200/584
INSERT 0 25
rows processed: 225/584
INSERT 0 25
rows processed: 250/584
INSERT 0 25
rows processed: 275/584
INSERT 0 25
rows processed: 300/584
INSERT 0 25
rows processed: 325/584
INSERT 0 25
rows processed: 350/584
INSERT 0 25
rows processed: 375/584
INSERT 0 25
rows processed: 400/584
INSERT 0 25
rows processed: 425/584
INSERT 0 25
rows processed: 450/584
INSERT 0 25
rows processed: 475/584
INSERT 0 25
rows processed: 500/584
INSERT 0 25
rows processed: 525/584
INSERT 0 25
rows processed: 550/584
INSERT 0 25
rows processed: 575/584
INSERT 0 9
rows processed: 584/584
------------------------------------------------------------
Inserted 584 new rows into teamwages table. 0 inserts rejected.


(584, 0)