In [4]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
import pandas as pd
import sqlite3

PATH = 'chromedriver'

In [5]:
driver = webdriver.Chrome(PATH)

# Scraping Function

In [6]:
#takes a link, the table id, and the index of the header row, returns the data as a df
def get_data(url, findId, headerIndex):
    driver.get(url)
    res = driver.find_element(By.ID, findId)
    rows = res.find_elements(By.TAG_NAME, "tr")
    #get column names from first row
    cols = []
    for ele in rows[headerIndex].find_elements(By.TAG_NAME, "th"):
        cols.append(ele.text)
    cols[1:]
    #create dataframe and append table rows to it
    stats = pd.DataFrame(columns = cols[1:])
    for row in rows:
        player = []
        for td in row.find_elements(By.TAG_NAME, "td"):
            player.append(td.text)
        #try except is needed here so that header rows don't throw an error when added to the df and are instead skipped
        try:
            stats.loc[len(stats)] = player
        except:
            continue
    return(stats)

In [7]:
#Get Player data for all major stats
gsc_stats  = get_data('https://fbref.com/en/comps/22/gca/Major-League-Soccer-Stats', 'stats_gca', 1)
defense_stats = get_data('https://fbref.com/en/comps/22/defense/Major-League-Soccer-Stats', 'stats_defense', 1)
keeper_stats = get_data('https://fbref.com/en/comps/22/keepers/Major-League-Soccer-Stats', 'stats_keeper', 1)
misc_stats = get_data('https://fbref.com/en/comps/22/misc/Major-League-Soccer-Stats', 'stats_misc', 1)
pass_stats = get_data('https://fbref.com/en/comps/22/passing/Major-League-Soccer-Stats', 'stats_passing', 1)
possession_stats = get_data('https://fbref.com/en/comps/22/possession/Major-League-Soccer-Stats', 'stats_possession', 1)
shooting_stats = get_data('https://fbref.com/en/comps/22/shooting/Major-League-Soccer-Stats', 'stats_shooting', 1)
time_stats = get_data('https://fbref.com/en/comps/22/playingtime/Major-League-Soccer-Stats', 'stats_playing_time', 1)

In [8]:
#Get Team data for all major stats
east_squads = get_data('https://fbref.com/en/comps/22/Major-League-Soccer-Stats#all_stats_squads_standard', 'results2022221Eastern-Conference_overall', 0)
west_squads = get_data('https://fbref.com/en/comps/22/Major-League-Soccer-Stats#all_stats_squads_standard', 'results2022221Western-Conference_overall', 0)

squad_goalkeeping = get_data('https://fbref.com/en/comps/22/keepers/Major-League-Soccer-Stats', 'stats_squads_keeper_for', 1)
squad_gsc  = get_data('https://fbref.com/en/comps/22/gca/Major-League-Soccer-Stats', 'stats_squads_gca_for', 1)
squad_defense = get_data('https://fbref.com/en/comps/22/defense/Major-League-Soccer-Stats', 'stats_squads_defense_for', 1)
squad_misc = get_data('https://fbref.com/en/comps/22/misc/Major-League-Soccer-Stats', 'stats_squads_misc_for', 1)
squad_pass = get_data('https://fbref.com/en/comps/22/passing/Major-League-Soccer-Stats', 'stats_squads_passing_for', 1)
squad_possession = get_data('https://fbref.com/en/comps/22/possession/Major-League-Soccer-Stats', 'stats_squads_possession_for', 1)
squad_shooting = get_data('https://fbref.com/en/comps/22/shooting/Major-League-Soccer-Stats', 'stats_squads_shooting_for', 1)
squad_time = get_data('https://fbref.com/en/comps/22/playingtime/Major-League-Soccer-Stats', 'stats_squads_playing_time_for', 1)


In [9]:
#creates function to add squad name to each squad df instead of an id number, since there will never be duplicate team names
squad_dict = {0:'Atlanta Utd', 1: 'Austin FC', 2:'CF Montréal', 3:'Charlotte FC', 4: 'Chicago Fire', 5:'Colorado Rapids', 6:'Columbus Crew', 7:'D.C. United', 8:'FC Cincinnati', 9:'FC Dallas', 10:'Houston Dynamo', 11:'Inter Miami', 12:'LA Galaxy', 13:'Los Angeles FC', 14:'Minnesota Utd', 15:'Nashville', 16:'New England', 17:'NY Red Bulls', 18:'NYCFC', 19:'Orlando City', 20:'Philadelphia', 21:'Portland Timbers', 22:'Real Salt Lake', 23:'San Jose', 24:'Seattle', 25:'Sporting KC', 26:'Toronto FC', 27:'Vancouver'}

def add_squads(df):
    df.reset_index(inplace = True)
    df['squad'] = df['index'].apply(lambda x: squad_dict[x])
    df.drop('index', inplace = True, axis = 1)

In [10]:
add_squads(squad_goalkeeping)
add_squads(squad_gsc)
add_squads(squad_defense)
add_squads(squad_misc)
add_squads(squad_pass)
add_squads(squad_possession)
add_squads(squad_shooting)
add_squads(squad_time)

In [11]:
#Combine east and west team dfs to one big team df
squads = pd.concat([east_squads, west_squads]).reset_index(drop=True)

In [12]:
squads.head()

Unnamed: 0,Squad,MP,W,D,L,GF,GA,GD,Pts,Pts/MP,xG,xGA,xGD,xGD/90,Last 5,Attendance,Top Team Scorer,Goalkeeper,Notes
0,Philadelphia,33,18,10,5,68,26,42,64,1.94,59.1,38.7,20.5,0.62,W W W D L,18057,Dániel Gazdag - 19,Andre Blake,
1,CF Montréal,33,19,5,9,60,49,11,62,1.88,49.6,35.2,14.5,0.44,W D W W W,15090,Romell Quioto - 15,Sebastian Breza,
2,NYCFC,33,15,7,11,55,40,15,52,1.58,58.4,37.9,20.5,0.62,L D L W W,17180,Valentín Castellanos - 13,Sean Johnson,
3,NY Red Bulls,33,14,8,11,48,41,7,50,1.52,44.8,38.5,6.3,0.19,W L W L L,15447,Lewis Morgan - 14,Carlos Coronel,
4,FC Cincinnati,33,11,13,9,59,54,5,46,1.39,52.8,47.7,5.1,0.15,D W W D L,22487,Brandon Vazquez - 17,Roman Celentano,


# Player Database prep and creation

In [13]:
defense_stats.reset_index()

#Create dictionary of player ids so id can be used as a unique identifier for players, as 2 players could have the same name
player_ids = dict(zip(time_stats.Player, time_stats.index))

In [14]:
def set_id(df):
    df['player_id'] = df['Player'].apply(lambda x: player_ids[x])
    df.drop('Player', axis=1, inplace = True)



In [15]:
#Add Player ID to each table instead of player name
set_id(gsc_stats)
set_id(defense_stats)
set_id(keeper_stats)
set_id(pass_stats)
set_id(possession_stats)
set_id(shooting_stats)
set_id(misc_stats)
time_stats['player_id'] = time_stats['Player'].apply(lambda x: player_ids[x])
players = time_stats[['player_id', 'Player', 'Nation', 'Pos', 'Squad', 'Age', 'Born', '90s']]
time_stats.drop('Player', axis=1, inplace = True)

In [16]:
#Remove calculated and redundant fields from dataframes
gsc_stats.drop(['Nation', 'Pos', 'Squad', 'Age', 'Born', '90s', 'Matches', 'SCA', 'SCA90', 'GCA', 'GCA90'], axis =1, inplace = True)
defense_stats.drop(['Nation', 'Pos', 'Squad', 'Age', 'Born', '90s', 'Tkl%', '%', 'Matches', 'Tkl+Int'], axis=1, inplace = True)
keeper_stats.drop(['Nation', 'Pos', 'Squad', 'Age', 'Born', 'MP', 'Min', '90s', 'GA90', 'Save%', 'CS%', 'Matches', 'W', 'D', 'L'], axis=1, inplace = True)
pass_stats.drop(['Nation', 'Pos', 'Squad', 'Age', 'Born', '90s', 'Cmp%', 'Cmp%', 'Cmp%', 'Cmp%', 'A-xA', '1/3', 'Matches'], axis=1, inplace = True)
possession_stats.drop(['Nation', 'Pos', 'Squad', 'Age', 'Born', '90s', 'Touches', 'Succ%', '1/3', 'Rec%', 'Matches'], axis=1, inplace = True)
shooting_stats.drop(['Nation', 'Pos', 'Squad', 'Age', 'Born', '90s', 'SoT%', 'Sh/90', 'SoT/90', 'G/Sh', 'G/SoT', 'npxG/Sh', 'G-xG', 'np:G-xG', 'Matches'], axis=1, inplace = True)
misc_stats.drop(['Nation', 'Pos', 'Squad', 'Age', 'Born', '90s', 'Int', 'TklW', 'Won%', 'Matches'], axis=1, inplace = True)
time_stats.drop(['Nation', 'Pos', 'Squad', 'Age', 'Born', 'Mn/MP', 'Min%', '90s', '+/-', '+/-90', 'xG+/-', 'xG+/-90', 'Matches'], axis=1, inplace = True)

In [17]:
# Rename columns for database upload, drop calculated columns that weren't dropped earlier due to column name overlap
keeper_stats.rename(columns = {'Starts':'starts', 'GA':'ga', 'SoTA':'sota', 'Saves':'saves', 'CS':'cs', 'PKatt':'pkAtt', 'PKA':'pka', 'PKsv':'pksv', 'PKm':'pkm', 'player_id':'player_id'}, inplace = True)
gsc_stats.set_axis(['scPassLive', 'scPassDead', 'scDrib', 'scSh', 'scFld', 'scDef', 'gcPassLive', 'gcPassDead', 'gcDrib', 'gcSh', 'gcFld', 'gcDef', 'player_id' ], axis=1, inplace=True)
defense_stats.set_axis(['tkl', 'tklW', 'tklDefThird', 'tklMidThird', 'tklAttThird', 'tklVsDrib', 'tklAttVsDrib', 'past', 'press', 'pressW', 'pressDefThird', 'pressMidThird', 'pressAttThird', 'blk', 'blkSh', 'blkSot', 'blkPass', 'int', 'clr', 'err', 'player_id'], axis=1, inplace = True)
pass_stats.set_axis(['totCmp', 'totAtt', 'totDist', 'prgDist', 'shortCmp', 'shortAtt', 'medCmp', 'medAtt', 'lngCmp', 'lngAtt', 'ast', 'xA', 'kp', 'ppa', 'crsPa', 'progPasses', 'player_id' ], axis=1, inplace=True)
pass_stats.drop(['totCmp', 'totAtt'], axis = 1, inplace = True)
possession_stats.set_axis(['defPen', 'defThird', 'midThird', 'attThird', 'attPen', 'liveTouches', 'succDrib', 'attDrib', 'playersBeat', 'megs', 'carries', 'totDist', 'prgDist', 'progCarries', 'cpa', 'mis', 'tackled', 'passTarg', 'passRec', 'progPassRec', 'player_id'], axis=1, inplace=True)
shooting_stats.set_axis(['gls', 'sh', 'sot', 'avgDist', 'fk', 'pk', 'pkAtt', 'xG', 'npxG', 'player_id'], axis=1, inplace=True)
misc_stats.set_axis(['crdY', 'crdR', 'crdY2', 'fls', 'fld', 'off', 'crs', 'pkWon', 'PKcon', 'og', 'recov', 'airW', 'airL', 'player_id'], axis=1, inplace=True)
time_stats.set_axis(['mp', 'min', 'starts', 'mn/start', 'compl', 'subs', 'mn/sub', 'unSub', 'ppm', 'onG', 'onGa', 'on-off', 'onxG', 'onxGa', 'xon-off', 'player_id'], axis=1, inplace=True)
players.set_axis(['id', 'name', 'nation', 'pos', 'squad', 'age', 'born', '90s'], axis = 1, inplace = True)

In [18]:
#remove days from age, make it an int field
players['age'] = players['age'].apply(lambda x: x[0:2])
avg_age = int(players[players['age'] != ''].age.astype('int').mean())
players['age'] = players['age'].apply(lambda x: avg_age if x == '' else x)
players['age'] = players['age'].astype('int')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players['age'] = players['age'].apply(lambda x: x[0:2])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players['age'] = players['age'].apply(lambda x: avg_age if x == '' else x)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players['age'] = players['age'].astype('int')


In [19]:
#For players without an age impute the avg age
avg_born = players[players['born'] != ''].born.astype('int').mean()
players['born'] = players['born'].apply(lambda x: avg_born if x == '' else x)
players['born'] = players['born'].astype('int')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players['born'] = players['born'].apply(lambda x: avg_born if x == '' else x)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players['born'] = players['born'].astype('int')


In [20]:
#convert on-offs to floats
time_stats['on-off'] = time_stats['on-off'].apply(lambda x: 0 if x == '' else x)
time_stats['on-off'] = time_stats['on-off'].astype('float')

time_stats['xon-off'] = time_stats['xon-off'].apply(lambda x: 0 if x == '' else x)
time_stats['xon-off'] = time_stats['xon-off'].astype('float')

time_stats.head()

Unnamed: 0,mp,min,starts,mn/start,compl,subs,mn/sub,unSub,ppm,onG,onGa,on-off,onxG,onxGa,xon-off,player_id
0,23,432,2,69.0,0,21,14,6,2.13,8,5,-0.76,7.5,6.7,-0.53,0
1,31,2432,28,85.0,23,3,19,0,1.26,35,40,0.82,38.9,34.2,0.78,1
2,30,2174,26,79.0,10,4,33,1,1.7,37,27,-0.15,40.9,24.7,0.18,2
3,15,382,0,,0,15,25,13,0.67,7,6,0.72,6.0,6.9,0.34,3
4,25,1810,21,82.0,12,4,20,4,1.48,32,32,0.85,30.1,27.6,0.24,4


In [21]:
#add to db next
#connect to mlsdb and create/update players table
with sqlite3.connect('mlsdb.sqlite') as conn:
    players.to_sql('players', conn, if_exists='replace', index = False, dtype = {'id':'INTEGER', 'name':'TEXT', 'nation':'TEXT', 'pos':'TEXT', 'squad':'TEXT', 'age':'INTEGER', 'born':'INTEGER', '90s':'REAL'})
    keeper_stats.to_sql('playerKeepers', conn, if_exists='replace', index = False, dtype={'starts':'INTEGER', 'ga':'INTEGER', 'sota':'INTEGER', 'saves':'INTEGER', 'cs':'INTEGER', 'ppkAtt':'INTEGER', 'pka':'INTEGER', 'pksv':'INTEGER', 'pkm':'INTEGER', 'player_id':'INTEGER'})
    defense_stats.to_sql('playerDefense', conn, if_exists='replace', index = False, dtype={'tkl':'INTEGER', 'tklW':'INTEGER', 'tklDefThird':'INTEGER', 'tklMidThird':'INTEGER', 'tklAttThird':'INTEGER', 'tklVsDrib':'INTEGER', 'tklAttVsDrib':'INTEGER', 'past':'INTEGER', 'press':'INTEGER', 'pressW':'INTEGER', 'pressDefThird':'INTEGER', 'pressMidThird':'INTEGER', 'pressAttThird':'INTEGER', 'blk':'INTEGER', 'blkSh':'INTEGER', 'blkSot':'INTEGER', 'blkPass':'INTEGER', 'int':'INTEGER', 'clr':'INTEGER', 'err':'INTEGER', 'player_id':'INTEGER'}) 
    gsc_stats.to_sql('playerGsc', conn, if_exists='replace', index = False, dtype = {'scPassLive':'INTEGER', 'scPassDead':'INTEGER', 'scDrib':'INTEGER', 'scSh':'INTEGER', 'scFld':'INTEGER', 'scDef':'INTEGER','gcPassLive':'INTEGER', 'gcPassDead':'INTEGER', 'gcDrib':'INTEGER', 'gcSh':'INTEGER', 'gcFld':'INTEGER', 'gcDef':'INTEGER', 'player_id':'INTEGER'})
    misc_stats.to_sql('playerMisc', conn, if_exists='replace', index = False, dtype={'crdY':'INTEGER', 'crdR':'INTEGER', 'crdY2':'INTEGER', 'fls':'INTEGER', 'fld':'INTEGER', 'off':'INTEGER', 'crs':'INTEGER', 'pkWon':'INTEGER', 'PKcon':'INTEGER','og':'INTEGER', 'recov':'INTEGER', 'airW':'INTEGER', 'airL':'INTEGER', 'player_id':'INTEGER'})  
    pass_stats.to_sql('playerPassing', conn, if_exists='replace', index = False, dtype={'totDist':'INTEGER', 'prgDist':'INTEGER', 'shortCmp':'INTEGER', 'shortAtt':'INTEGER', 'medCmp':'INTEGER', 'medAtt':'INTEGER','lngCmp':'INTEGER', 'lngAtt':'INTEGER', 'ast':'INTEGER', 'xA':'INTEGER', 'kp':'INTEGER', 'ppa':'INTEGER', 'crsPa':'INTEGER', 'progPasses':'INTEGER','player_id':'INTEGER'})
    possession_stats.to_sql('playerPossession', conn, if_exists='replace', index = False, dtype={'defPen':'INTEGER', 'defThird':'INTEGER', 'midThird':'INTEGER', 'attThird':'INTEGER', 'attPen':'INTEGER', 'liveTouches':'INTEGER', 'succDrib':'INTEGER', 'attDrib':'INTEGER', 'playersBeat':'INTEGER', 'megs':'INTEGER', 'carries':'INTEGER', 'totDist':'INTEGER', 'prgDist':'INTEGER', 'progCarries':'INTEGER', 'cpa':'INTEGER', 'mis':'INTEGER', 'tackled':'INTEGER', 'passTarg':'INTEGER', 'passRec':'INTEGER', 'progPassRec':'INTEGER', 'player_id':'INTEGER'})
    shooting_stats.to_sql('playerShooting', conn, if_exists='replace', index = False, dtype={'gls':'INTEGER', 'sh':'INTEGER', 'sot':'INTEGER', 'avgDist':'INTEGER', 'fk':'INTEGER', 'pk':'INTEGER', 'pkAtt':'INTEGER', 'xG':'INTEGER', 'npxG':'INTEGER', 'player_id':'INTEGER'})    
    time_stats.to_sql('playerTime', conn, if_exists='replace', index = False, dtype = {'mp':'INTEGER', 'min':'INTEGER', 'starts':'INTEGER', 'mn/start':'INTEGER', 'compl':'INTEGER', 'subs':'INTEGER', 'mn/sub':'INTEGER', 'unSub':'INTEGER','ppm':'REAL', 'onG':'INTEGER', 'onGa':'INTEGER', 'on-off':'REAL', 'onxG':'REAL', 'onxGa':'REAL', 'xon-off':'REAL', 'player_id':'INTEGER'}) 

# Squad Database Prep and Creation

In [22]:
#we don't need to set ID here as squad name is an adaquate primary key since it will never be duplicated
squads.drop(['Pts/MP', 'Pts', 'GD', 'GF', 'xGD/90', 'xGD', 'Top Team Scorer', 'Notes', 'xG'], axis=1, inplace = True)
squad_goalkeeping.drop(['# Pl', 'MP', 'Starts', 'Min', '90s', 'GA', 'GA90', 'Save%', 'W', 'D', 'L', 'CS%'], axis = 1, inplace = True)
squad_gsc.drop(['# Pl', '90s', 'SCA90', 'GCA90', 'SCA', 'GCA'], axis = 1, inplace = True)
#drop first tkl after renaming
squad_defense.drop(['# Pl', '90s', 'Tkl%', '%', 'Tkl+Int'], axis = 1, inplace = True)
squad_misc.drop(['# Pl', '90s', 'Int', 'TklW', 'Won%'], axis = 1, inplace = True)
#drop first cmp after renaming
squad_pass.drop(['# Pl', '90s', 'Cmp%', 'A-xA', '1/3'], axis = 1, inplace = True)
#drop first # pl after renaming
squad_possession.drop([ '90s', 'Touches', 'Succ%', '1/3', 'Rec%'], axis = 1, inplace = True)
squad_shooting.drop(['# Pl', '90s', 'SoT%', 'Sh/90', 'SoT/90', 'G/Sh', 'G/SoT', 'npxG/Sh', 'G-xG', 'np:G-xG'], axis = 1, inplace = True)
squad_time.drop(['# Pl', 'MP', 'Mn/MP', 'Min%', '90s', 'Starts', 'Mn/Start', 'Compl', 'PPM', '+/-', '+/-90', 'xG+/-', 'xG+/-90', 'onG', 'onGA', 'onxG', 'onxGA'], axis = 1, inplace = True)

In [23]:
squads.set_axis(['squad', 'mp', 'w', 'd', 'l', 'ga', 'xGA', 'last5', 'attendence', 'goalkeeper'], axis=1, inplace=True)
squad_goalkeeping.set_axis(['sota', 'saves', 'cs', 'pkAtt', 'pkA', 'pkSv', 'pkM', 'squad'], axis=1, inplace=True)
squad_defense.set_axis(['tkl', 'tklW', 'tklDefThird', 'tklMidThird', 'tklAttThird', 'tklVsDrib', 'attVsDrib', 'past', 'press', 'pressW', 'pressDefThird', 'pressMidThird', 'pressAttThird', 'blk', 'blkSh', 'blkSot', 'blkPass', 'int', 'clr', 'err', 'squad'], axis=1, inplace=True)
squad_misc.set_axis(['crdY', 'crdR', 'crdY2', 'fls', 'fld', 'off', 'crs', 'pkWon', 'pkCon', 'og', 'recov', 'airW', 'airL', 'squad'], axis=1, inplace=True)
squad_pass.set_axis(['totCmp', 'totAtt', 'totDist', 'prgDist', 'shortCmp', 'shortAtt', 'medCmp', 'medAtt', 'lngCmp', 'lngAtt', 'ast', 'xA', 'kp', 'ppa', 'crsPa', 'progPasses', 'squad'], axis=1, inplace=True)
squad_possession.set_axis(['# Pl', 'poss', 'defPen', 'defThird', 'midThird', 'attThird', 'attPen', 'live', 'dribW', 'dribAtt', 'playersBeat', 'megs', 'carries', 'totDist', 'prgDist', 'progCarries', 'cpa', 'mis', 'dis', 'targ', 'rec', 'progRec', 'squad' ], axis=1, inplace=True)
squad_possession.drop('# Pl', axis = 1, inplace = True)
squad_shooting.set_axis(['gls', 'sh', 'sot', 'avgDist', 'fk', 'pkW', 'pkAtt', 'xG', 'npxG', 'squad'], axis=1, inplace=True)
squad_time.set_axis(['age','min','subs','mn/sub','unsub', 'squad'], axis=1, inplace=True)
squad_gsc.set_axis(['scPassLive', 'scPassDead', 'scDrib', 'scSh', 'scFld', 'scDef', 'gcPassLive', 'gcPassDead', 'gcDrib', 'gcSh', 'gcFld', 'gcDef', 'squad' ], axis=1, inplace=True)


In [24]:
#add to db next
#connect to mlsdb and create/update players table
with sqlite3.connect('mlsdb.sqlite') as conn:
    squads.to_sql('squads', conn, if_exists='replace', index = False, dtype={'squad':'TEXT', 'mp':'INTEGER', 'w':'INTEGER', 'd':'INTEGER', 'l':'INTEGER', 'ga':'INTEGER', 'xGA':'REAL', 'last5':'TEXT', 'attendence':'INTEGER', 'goalkeeper':'TEXT'})
    squad_goalkeeping.to_sql('squadKeepers', conn, if_exists='replace', index = False, dtype={'sota':'INTEGER', 'saves':'INTEGER', 'cs':'INTEGER', 'pkAtt':'INTEGER', 'pkA':'INTEGER', 'pkSv':'INTEGER', 'pkM':'INTEGER', 'squad':'TEXT'})
    squad_defense.to_sql('squadDefense', conn, if_exists='replace', index = False, dtype= {'tkl':'INTEGER', 'tklW':'INTEGER', 'tklDefThird':'INTEGER', 'tklMidThird':'INTEGER', 'tklAttThird':'INTEGER', 'tklVsDrib':'INTEGER', 'attVsDrib':'INTEGER', 'past':'INTEGER', 'press':'INTEGER', 'pressW':'INTEGER', 'pressDefThird':'INTEGER', 'pressMidThird':'INTEGER', 'pressAttThird':'INTEGER', 'blk':'INTEGER', 'blkSh':'INTEGER', 'blkSot':'INTEGER', 'blkPass':'INTEGER', 'int':'INTEGER', 'clr':'INTEGER', 'err':'INTEGER', 'squad':'TEXT'}) 
    squad_gsc.to_sql('squadGsc', conn, if_exists='replace', index = False, dtype = {'scPassLive':'INTEGER', 'scPassDead':'INTEGER', 'scDrib':'INTEGER', 'scSh':'INTEGER', 'scFld':'INTEGER', 'scDef':'INTEGER','gcPassLive':'INTEGER', 'gcPassDead':'INTEGER', 'gcDrib':'INTEGER', 'gcSh':'INTEGER', 'gcFld':'INTEGER', 'gcDef':'INTEGER', 'squad':'TEXT'})
    squad_misc.to_sql('squadMisc', conn, if_exists='replace', index = False, dtype={'crdY':'INTEGER', 'crdR':'INTEGER', 'crdY2':'INTEGER', 'fls':'INTEGER', 'fld':'INTEGER', 'off':'INTEGER', 'crs':'INTEGER', 'pkWon':'INTEGER', 'pkCon':'INTEGER', 'og':'INTEGER', 'recov':'INTEGER', 'airW':'INTEGER', 'airL':'INTEGER', 'squad':'REAL'})  
    squad_pass.to_sql('squadPassing', conn, if_exists='replace', index = False, dtype = {'totCmp':'INTEGER', 'totAtt':'INTEGER', 'totDist':'INTEGER', 'prgDist':'INTEGER', 'shortCmp':'INTEGER', 'shortAtt':'INTEGER', 'medCmp':'INTEGER', 'medAtt':'INTEGER', 'lngCmp':'INTEGER', 'lngAtt':'INTEGER', 'ast':'INTEGER', 'xA':'INTEGER', 'kp':'INTEGER', 'ppa':'INTEGER', 'crsPa':'INTEGER', 'progPasses':'INTEGER', 'squad':'TEXT'})
    squad_possession.to_sql('squadPossession', conn, if_exists='replace', index = False, dtype={'poss':'INTEGER', 'defPen':'INTEGER', 'defThird':'INTEGER', 'midThird':'INTEGER', 'attThird':'INTEGER', 'attPen':'INTEGER', 'live':'INTEGER', 'dribW':'INTEGER', 'dribAtt':'INTEGER', 'playersBeat':'INTEGER', 'megs':'INTEGER', 'carries':'INTEGER', 'totDist':'INTEGER','prgDist':'INTEGER', 'progCarries':'INTEGER', 'cpa':'INTEGER', 'mis':'INTEGER', 'dis':'INTEGER', 'targ':'INTEGER', 'rec':'INTEGER', 'progRec':'INTEGER','squad':'TEXT'})
    squad_shooting.to_sql('squadShooting', conn, if_exists='replace', index = False, dtype={'gls':'INTEGER', 'sh':'INTEGER', 'sot':'INTEGER', 'avgDist':'REAL', 'fk':'INTEGER', 'pkW':'INTEGER', 'pkAtt':'INTEGER', 'xG':'REAL', 'npxG':'REAL', 'squad':'TEXT'})    
    squad_time.to_sql('squadTime', conn, if_exists='replace', index = False, dtype={'age':'REAL', 'min':'INTEGER', 'subs':'INTEGER', 'mn/sub':'INTEGER', 'unsub':'INTEGER', 'squad':'TEXT'}) 