In [1]:
# Dependencies
import requests
import os
from os.path  import basename
from bs4 import BeautifulSoup
import pandas as pd
from glob import glob
import datetime
from datetime import datetime
import time
from random import randint
import unicodedata


In [2]:
leagues_df = pd.read_csv("../data/leagues_data.csv")

In [3]:
leagues_df

Unnamed: 0,league_ID,country,tier,league_name,league_link
0,1,Brazil,1,Brazil Serie A,https://www.transfermarkt.com/campeonato-brasi...
1,2,United States,1,MLS (US),https://www.transfermarkt.com/major-league-soc...
2,3,Mexico,1,Liga MX (MEX),https://www.transfermarkt.com/liga-mx-clausura...
3,4,Argentina,1,Superliga (ARG),https://www.transfermarkt.com/primera-division...


In [4]:
# to be able to scrape this website we need to use 'User Agents'
# more info about user agents in 'https://webscraping.com/blog/User-agents/'
# You can find your User-Agent at 'http://whatsmyuseragent.com/'
headers = {'User-Agent': 
           'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36'}

In [5]:
convert_url = "https://www.google.com/search?q=euro+to+dollar&oq=eur&aqs=chrome.1.69i57j35i39j0j46j0l2j69i61l2.2373j1j4&sourceid=chrome&ie=UTF-8"
html = requests.get(convert_url, headers=headers)
soup = BeautifulSoup(html.content, 'html.parser')
curr_value = soup.find('div', class_= "b1hJbf")
curr_value = round(float(curr_value["data-exchange-rate"]),2)

def get_value_us(x):
    value = []
    for char in x:
        value.append(char)
    if value[-1] == "m":
        float_value = "".join(value[1:-1])
        return round(float(float_value)*curr_value,2)
    else:
        float_value = "".join(value[1:-3])
        return round((float(float_value)*curr_value)/1000,2)


In [6]:
def strip_accents(text):

    try:
        text = unicode(text, 'utf-8')
    except NameError: # unicode is a default on python 3 
        pass

    text = unicodedata.normalize('NFD', text)\
           .encode('ascii', 'ignore')\
           .decode("utf-8")

    return str(text.strip())

In [7]:
def fix_heights(df):
    heights = []
    
    for index, row in df.iterrows():
        if pd.notnull(row["Height"]):
            heights.append(row["Height"])
        else:
            nat = row["Nat"]
            position = row["position"]
            field_position = row["field_position"]
            comp_players = df.loc[((df["Nat"] == nat) & (df["position"] == position))]
            avg_df = round(comp_players["Height"].mean(skipna = True),2)
            row["Height"] = avg_df
            if pd.isnull(row["Height"]):
                comp_players2 = df.loc[((df["Nat"] == nat) & (df["field_position"] == field_position))]
                avg_df2 = round(comp_players2["Height"].mean(skipna = True),2)
                row["Height"] = avg_df2
                if pd.isnull(row["Height"]):
                    comp_players3 = df.loc[((df["position"] == position))]
                    avg_df3 = round(comp_players3["Height"].mean(skipna = True),2)
                    row["Height"] = avg_df3
            heights.append(row["Height"])
    
    
    df.height = heights

In [8]:
def scrape_league_data(leagues_df):
    team_id = 1
    team_ID = []
    links = []
    names = []
    logos = []
    squads = []
    foreigners = []
    total_MVs = []
    avg_MVs = []
    league_ID = []
    
    for index, row in leagues_df.iterrows():
        url = row["league_link"]
        league_name = row["league_name"]
        tier = row["tier"]
        country = row["country"]
        league_id = row["league_ID"]
        time.sleep(1.1)
        print(f"scraping: {country}_{tier}_{league_name}")
        html = requests.get(url, headers=headers)
        soup = BeautifulSoup(html.content, 'html.parser')
        htmltable = soup.find('table', class_= "items")


        results = htmltable.findAll("tr", class_ =["odd","even"])


        for result in results:
            features = result.findAll("td")
            links.append(("https://www.transfermarkt.com"+result.find("a", href=True)\
                          ["href"]+"/plus/1").replace("startseite", "kader"))
            logo = result.find("img", src=True)["src"]
            logo = logo.split("?")[0]
            logo = logo.replace("tiny", "header")
            logos.append(logo)
            name = features[1].text
            names.append(strip_accents(name))
            squad = features[3].text
            squads.append(squad)
            foreigner = features[5].text
            foreigners.append(foreigner)
            total_MV = get_value_us(features[6].text)
            total_MVs.append(total_MV)
#             print(features[7])
            avg_MV = get_value_us(features[7].text)
            avg_MVs.append(avg_MV)
            team_ID.append(team_id)
            league_ID.append(league_id)
            team_id = team_id + 1

            
#         break


    # Create a Dataframe and export to a .csv file
    df = pd.DataFrame(list(zip(team_ID, league_ID, names, squads, foreigners,avg_MVs, total_MVs, logos,links)), \
columns =["team_ID", "league_ID","club","squad", "foreigners", "avg_market_value_m", "total_MV_m",'Logo_img', "link_page"]) 
    df['league_ID'] = df['league_ID'].astype(int)
    df['team_ID'] = df['team_ID'].astype(int)

    df.to_csv(f'../data/teams_trmk.csv',index=False)




In [64]:
def scrape_team_data(teams_df):
    player_id = 1
    name = []
    player_page = []
    position = []
    f_posi = []
    Age = []
    Nat = []
    Height = []
    foot = []
    dt_joined = []
    prev_team = []
    contract_expires = []
    market_value = []
    team_ID = []
    players_ID = []
    

    df_league = pd.read_csv(teams_df)
    for index, row in df_league.iterrows():
        team_id = row["team_ID"]
        team = row["link_page"]
        league_id = row["league_ID"]
        team_name = team.split('/')[-8]
        print(team_name)
        time.sleep(randint(1,2))
        html = requests.get(team, headers=headers)
        soup = BeautifulSoup(html.content, 'html.parser')
        htmltable = soup.find('table', class_= "items")

        results = htmltable.findAll("tr", class_ =["odd","even"])


        for result in results:
            features = result.findAll("td")

            name_1 = features[2].find("img", alt=True)["alt"]

            player_page.append("https://www.transfermarkt.com" + features[3].find("a", href=True)["href"])

            position_1 = features[4].text
            
            if position_1 in ("Centre-Back", "Left-Back", "Right-Back", "Defender"):
                field_posit = "DEF"
            elif position_1 in ("Defensive Midfield", "Central Midfield" , "Right Midfield" , "Left Midfield" , \
            "Attacking Midfield" , "Midfield"):
                field_posit = "MID"
            elif position_1 in ("Left Winger" , "Right Winger" , "Centre-Forward" , "Second Striker" , "Forward"):
                field_posit = "ATT"
            else:
                field_posit = "GLK"

            try:
                age_1 = int((features[5].text.split("(",)[-1])[:-1])
            except:
                age_1 = ""

            nat = features[6].img["alt"]
            
            if league_id == 3 or   league_id == 4:
                try:
                    Height_1 = float((features[8].text.split(" ")[0]).replace(",", "."))
                except:
                    Height_1 = ""

                foot_1 = features[9].text
                dt_joined_1 = features[10].text
                try:
                    dt_joined_1 = datetime.strptime(dt_joined_1, '%b %d, %Y').date()
                except:
                    dt_joined_1 = ""
                try:
                    prev_team_1 = features[11].img["alt"]
                except:
                    prev_team_1 = "N.A."

                contract_expires_1 = features[12].text
                try:
                    contract_expires_1 = datetime.strptime(contract_expires_1, '%d.%m.%Y').date()
                except:
                    contract_expires_1 = ""

                try:
                    market_value_1 = get_value_us(features[13].text[:-2])
                except:
                    market_value_1 = 0
            else:
                try:
                    Height_1 = float((features[7].text.split(" ")[0]).replace(",", "."))
                except:
                    Height_1 = ""

                foot_1 = features[8].text

                dt_joined_1 = features[9].text
                try:
                    dt_joined_1 = datetime.strptime(dt_joined_1, '%b %d, %Y').date()
                except:
                    dt_joined_1 = ""

                try:
                    prev_team_1 = features[10].img["alt"]
                except:
                    prev_team_1 = "N.A."


                contract_expires_1 = features[11].text
                try:
                    contract_expires_1 = datetime.strptime(contract_expires_1, '%d.%m.%Y').date()
                except:
                    contract_expires_1 = ""

                try:
                    market_value_1 = get_value_us(features[12].text[:-2])
                except:
                    market_value_1 = 0

            name.append(strip_accents(name_1))
            position.append(position_1)
            f_posi.append(field_posit)
            Age.append(age_1)
            Nat.append(nat)
            Height.append(Height_1)
            foot.append(foot_1)
            dt_joined.append(dt_joined_1)
            prev_team.append(strip_accents(prev_team_1))
            contract_expires.append(contract_expires_1)
            market_value.append(market_value_1)
            team_ID.append(team_id)
            players_ID.append(player_id)
            player_id = player_id+1

#         time.sleep(randint(3,5))
        

    df = pd.DataFrame(list(zip(players_ID, team_ID, name, position,f_posi, Age,Nat, Height, foot,dt_joined,prev_team, contract_expires,\
                               market_value,player_page)), 
                      columns =["players_ID", "team_ID","name","position","field_position", "Age", "Nat","Height","foot",'dt_joined',"prev_team", \
                                "contract_expires", "market_value","player_page" ])
    df['players_ID'] = df['players_ID'].astype(int)
    df['team_ID'] = df['team_ID'].astype(int)
    df['dt_joined'] = pd.to_datetime(df['dt_joined'])
    df['contract_expires'] = pd.to_datetime(df['contract_expires'])
    
    fix_heights(df)

    df.to_csv(f"../data/players_trmk.csv" ,index=False)


In [61]:
scrape_league_data(leagues_df)

scraping: Brazil_1_Brazil Serie A
scraping: United States_1_MLS (US)
scraping: Mexico_1_Liga MX (MEX)
scraping: Argentina_1_Superliga (ARG)


In [65]:
teams_df = "../data/teams_trmk.csv"

scrape_team_data(teams_df)

flamengo-rio-de-janeiro
se-palmeiras-sao-paulo
gremio-foot-ball-porto-alegrense
corinthians-sao-paulo
sc-internacional-porto-alegre
atletico-mineiro
fc-sao-paulo
fc-santos
fluminense-football-club
clube-atletico-paranaense
vasco-da-gama-rio-de-janeiro
clube-atletico-bragantino-sp-
ec-bahia
botafogo-fr-rio-de-janeiro
goias-esporte-clube
coritiba-fc
fortaleza-esporte-clube
ceara-sporting-club-ce-
sport-club-do-recife
atletico-goianiense
los-angeles-football-club
atlanta-united-fc
inter-miami-cf
new-york-city-fc
toronto-fc
los-angeles-galaxy
seattle-sounders-fc
columbus-crew
sporting-kansas-city
fc-dallas
portland-timbers
minnesota-united-fc
d-c-united
fc-cincinnati
chicago-fire
orlando-city-sc
colorado-rapids
new-england-revolution
houston-dynamo
new-york-red-bulls
philadelphia-union
montreal-impact
vancouver-whitecaps
nashville-mls-team
san-jose-earthquakes
real-salt-lake-city
cf-monterrey
cf-america
tigres-uanl
cd-cruz-azul
cf-pachuca
deportivo-guadalajara
santos-laguna
atlas-guadalaja



In [12]:
# players_df = pd.read_csv("../data/players_trmk.csv")
# fix_heights(players_df)

## Save scraped data on SQL Database

In [66]:
from sqlalchemy import *
from sqlalchemy.schema import *
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.sql import text
import contextlib 
import json

In [67]:
rds_connection_string = "postgres:PostgreSQL@localhost:5432/trfmk_scrape_db"
engine = create_engine(f'postgresql://{rds_connection_string}')
metadata = MetaData()


In [68]:
leagues_df = pd.read_csv("../data/leagues_data.csv")
teams_df = pd.read_csv("../data/teams_trmk.csv")
players_df = pd.read_csv("../data/players_trmk.csv")

In [69]:
engine.execute(DropSchema('public', cascade = True))
engine.execute('CREATE SCHEMA IF NOT EXISTS public;')

players_df.to_sql(name = 'players',index_label= None, index= False, con=engine, if_exists='replace')
teams_df.to_sql(name = 'teams', index_label= None, index= False, con=engine, if_exists='replace')
leagues_df.to_sql(name = 'leagues', index_label= None, index= False, con=engine, if_exists='replace')

with engine.connect() as con:
    con.execute('ALTER TABLE leagues ADD CONSTRAINT pk_Leagues PRIMARY KEY ("league_ID")')
    con.execute('ALTER TABLE teams ADD CONSTRAINT pk_Teams PRIMARY KEY ("team_ID")')
    con.execute('ALTER TABLE players ADD CONSTRAINT pk_Players PRIMARY KEY ("players_ID")')
    con.execute('ALTER TABLE Teams ADD CONSTRAINT fk_Teams_league_ID FOREIGN KEY("league_ID") REFERENCES Leagues ("league_ID");')
    con.execute('ALTER TABLE Players ADD CONSTRAINT fk_Players_team_ID FOREIGN KEY("team_ID") REFERENCES Teams ("team_ID");')

In [70]:
# read database
pd.read_sql_query('SELECT * FROM  leagues JOIN (teams JOIN players USING("team_ID")) USING ("league_ID")', con=engine)

Unnamed: 0,league_ID,country,tier,league_name,league_link,team_ID,club,squad,foreigners,avg_market_value_m,...,field_position,Age,Nat,Height,foot,dt_joined,prev_team,contract_expires,market_value,player_page
0,1,Brazil,1,Brazil Serie A,https://www.transfermarkt.com/campeonato-brasi...,1,Clube de Regatas do Flamengo,29,3,4.81,...,GLK,35,Brazil,1.87,left,2017-07-16,Valencia CF,2020-12-31,3.30,https://www.transfermarkt.com/diego-alves/prof...
1,1,Brazil,1,Brazil Serie A,https://www.transfermarkt.com/campeonato-brasi...,1,Clube de Regatas do Flamengo,29,3,4.81,...,GLK,21,Brazil,1.96,both,2019-08-31,Clube de Regatas do Flamengo U20,2023-09-30,0.53,https://www.transfermarkt.com/hugo-souza/profi...
2,1,Brazil,1,Brazil Serie A,https://www.transfermarkt.com/campeonato-brasi...,1,Clube de Regatas do Flamengo,29,3,4.81,...,GLK,28,Brazil,1.94,right,2013-01-01,Clube de Regatas do Flamengo U20,2022-04-30,0.47,https://www.transfermarkt.com/cesar/profil/spi...
3,1,Brazil,1,Brazil Serie A,https://www.transfermarkt.com/campeonato-brasi...,1,Clube de Regatas do Flamengo,29,3,4.81,...,GLK,22,Brazil,1.88,right,2017-01-01,Clube de Regatas do Flamengo U20,2022-12-31,0.06,https://www.transfermarkt.com/gabriel-batista/...
4,1,Brazil,1,Brazil Serie A,https://www.transfermarkt.com/campeonato-brasi...,1,Clube de Regatas do Flamengo,29,3,4.81,...,DEF,27,Brazil,1.83,right,2019-01-01,Sao Paulo Futebol Clube,2023-12-31,5.66,https://www.transfermarkt.com/rodrigo-caio/pro...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2964,4,Argentina,1,Superliga (ARG),https://www.transfermarkt.com/primera-division...,88,Arsenal Futbol Club,29,2,0.29,...,ATT,23,Argentina,1.95,right,2018-07-01,Arsenal Futbol Club II,2020-06-30,0.47,https://www.transfermarkt.com/facundo-pons/pro...
2965,4,Argentina,1,Superliga (ARG),https://www.transfermarkt.com/primera-division...,88,Arsenal Futbol Club,29,2,0.29,...,ATT,26,Argentina,1.83,right,2018-07-11,Club Atletico Banfield,2020-06-30,0.38,https://www.transfermarkt.com/juan-manuel-garc...
2966,4,Argentina,1,Superliga (ARG),https://www.transfermarkt.com/primera-division...,88,Arsenal Futbol Club,29,2,0.29,...,ATT,25,Argentina,1.78,right,2019-07-01,Club Social y Atletico Guillermo Brown,2020-06-30,0.38,https://www.transfermarkt.com/lautaro-parisi/p...
2967,4,Argentina,1,Superliga (ARG),https://www.transfermarkt.com/primera-division...,88,Arsenal Futbol Club,29,2,0.29,...,ATT,24,Uruguay,1.71,right,2020-01-21,Deportivo Villa Espanola,2021-06-30,0.24,https://www.transfermarkt.com/jhonatan-candia/...


### Save query as JSON file

In [71]:
pd.read_sql_query('SELECT * FROM  leagues JOIN (teams JOIN players USING("team_ID")) USING ("league_ID")', con=engine)\
.to_json(r'../dashboard/static/data/data.json', orient='records')

# TEST

In [45]:
teams_df.tail(50)

Unnamed: 0,team_ID,league_ID,club,squad,foreigners,avg_market_value_m,total_MV_m,Logo_img,link_page
38,39,2,Houston Dynamo,29,21,0.92,26.73,https://tmssl.akamaized.net/images/wappen/head...,https://www.transfermarkt.com/houston-dynamo/k...
39,40,2,New York Red Bulls,27,13,0.98,26.41,https://tmssl.akamaized.net/images/wappen/head...,https://www.transfermarkt.com/new-york-red-bul...
40,41,2,Philadelphia Union,27,15,0.97,26.08,https://tmssl.akamaized.net/images/wappen/head...,https://www.transfermarkt.com/philadelphia-uni...
41,42,2,Montreal Impact,29,17,0.9,26.0,https://tmssl.akamaized.net/images/wappen/head...,https://www.transfermarkt.com/montreal-impact/...
42,43,2,Vancouver Whitecaps FC,28,14,0.92,25.82,https://tmssl.akamaized.net/images/wappen/head...,https://www.transfermarkt.com/vancouver-whitec...
43,44,2,Nashville SC,28,12,0.69,19.23,https://tmssl.akamaized.net/images/wappen/head...,https://www.transfermarkt.com/nashville-mls-te...
44,45,2,San Jose Earthquakes,29,13,0.66,19.09,https://tmssl.akamaized.net/images/wappen/head...,https://www.transfermarkt.com/san-jose-earthqu...
45,46,2,Real Salt Lake City,31,16,0.6,18.64,https://tmssl.akamaized.net/images/wappen/head...,https://www.transfermarkt.com/real-salt-lake-c...
46,47,3,CF Monterrey,38,14,2.88,109.27,https://tmssl.akamaized.net/images/wappen/head...,https://www.transfermarkt.com/cf-monterrey/kad...
47,48,3,CF America,40,18,2.54,101.3,https://tmssl.akamaized.net/images/wappen/head...,https://www.transfermarkt.com/cf-america/kader...


In [46]:
teams_test = teams_df.iloc[[2, 38, 63, 64]]
teams_test

Unnamed: 0,team_ID,league_ID,club,squad,foreigners,avg_market_value_m,total_MV_m,Logo_img,link_page
2,3,1,Gremio Foot-Ball Porto Alegrense,36,2,2.28,81.93,https://tmssl.akamaized.net/images/wappen/head...,https://www.transfermarkt.com/gremio-foot-ball...
38,39,2,Houston Dynamo,29,21,0.92,26.73,https://tmssl.akamaized.net/images/wappen/head...,https://www.transfermarkt.com/houston-dynamo/k...
63,64,3,Atletico de San Luis,36,15,0.72,25.96,https://tmssl.akamaized.net/images/wappen/head...,https://www.transfermarkt.com/atletico-san-lui...
64,65,4,Club Atletico River Plate,33,6,4.59,151.51,https://tmssl.akamaized.net/images/wappen/head...,https://www.transfermarkt.com/club-atletico-ri...


In [None]:
https://www.transfermarkt.com/club-atletico-river-plate/kader/verein/209/saison_id/2019/plus/1

In [None]:
https://www.transfermarkt.com/club-atletico-river-plate/kader/verein/209/saison_id/2019/plus/1

In [58]:
def scrape_test_data(teams_df):
    player_id = 1
    name = []
    player_page = []
    position = []
    f_posi = []
    Age = []
    Nat = []
    Height = []
    foot = []
    dt_joined = []
    prev_team = []
    contract_expires = []
    market_value = []
    team_ID = []
    players_ID = []
    

    df_league = teams_df
    for index, row in df_league.iterrows():
        team_id = row["team_ID"]
        team = row["link_page"]
        league_id = row["league_ID"]
        print(league_id)
        team_name = team.split('/')[-8]
        print(team_name)
        time.sleep(randint(1,2))
        html = requests.get(team, headers=headers)
        soup = BeautifulSoup(html.content, 'html.parser')
        htmltable = soup.find('table', class_= "items")

        results = htmltable.findAll("tr", class_ =["odd","even"])


        for result in results:
            features = result.findAll("td")

            name_1 = features[2].find("img", alt=True)["alt"]

            player_page.append("https://www.transfermarkt.com" + features[3].find("a", href=True)["href"])

            position_1 = features[4].text
            
            if position_1 in ("Centre-Back", "Left-Back", "Right-Back", "Defender"):
                field_posit = "DEF"
            elif position_1 in ("Defensive Midfield", "Central Midfield" , "Right Midfield" , "Left Midfield" , \
            "Attacking Midfield" , "Midfield"):
                field_posit = "MID"
            elif position_1 in ("Left Winger" , "Right Winger" , "Centre-Forward" , "Second Striker" , "Forward"):
                field_posit = "ATT"
            else:
                field_posit = "GLK"

            try:
                age_1 = int((features[5].text.split("(",)[-1])[:-1])
            except:
                age_1 = ""

            nat = features[6].img["alt"]
            
            if league_id == 3 or   league_id == 4:
                print("test1")
                try:
                    Height_1 = float((features[8].text.split(" ")[0]).replace(",", "."))
                except:
                    Height_1 = ""

                foot_1 = features[9].text
                dt_joined_1 = features[10].text
                try:
                    dt_joined_1 = datetime.strptime(dt_joined_1, '%b %d, %Y').date()
                except:
                    dt_joined_1 = ""
                try:
                    prev_team_1 = features[11].img["alt"]
                except:
                    prev_team_1 = "N.A."

                contract_expires_1 = features[12].text
                try:
                    contract_expires_1 = datetime.strptime(contract_expires_1, '%d.%m.%Y').date()
                except:
                    contract_expires_1 = ""

                try:
                    market_value_1 = get_value_us(features[13].text[:-2])
                except:
                    market_value_1 = 0
            else:
                print("test2")
                try:
                    Height_1 = float((features[7].text.split(" ")[0]).replace(",", "."))
                except:
                    Height_1 = ""

                foot_1 = features[8].text

                dt_joined_1 = features[9].text
                try:
                    dt_joined_1 = datetime.strptime(dt_joined_1, '%b %d, %Y').date()
                except:
                    dt_joined_1 = ""

                try:
                    prev_team_1 = features[10].img["alt"]
                except:
                    prev_team_1 = "N.A."


                contract_expires_1 = features[11].text
                try:
                    contract_expires_1 = datetime.strptime(contract_expires_1, '%d.%m.%Y').date()
                except:
                    contract_expires_1 = ""

                try:
                    market_value_1 = get_value_us(features[12].text[:-2])
                except:
                    market_value_1 = 0

            name.append(strip_accents(name_1))
            position.append(position_1)
            f_posi.append(field_posit)
            Age.append(age_1)
            Nat.append(nat)
            Height.append(Height_1)
            foot.append(foot_1)
            dt_joined.append(dt_joined_1)
            prev_team.append(strip_accents(prev_team_1))
            contract_expires.append(contract_expires_1)
            market_value.append(market_value_1)
            team_ID.append(team_id)
            players_ID.append(player_id)
            player_id = player_id+1

#         time.sleep(randint(3,5))
        
    df = pd.DataFrame(list(zip(players_ID, team_ID, name, position,f_posi, Age,Nat, Height, foot,dt_joined,prev_team, contract_expires,\
                               market_value,player_page)), 
                      columns =["players_ID", "team_ID","name","position","field_position", "Age", "Nat","Height","foot",'dt_joined',"prev_team", \
                                "contract_expires", "market_value","player_page" ])
    df['players_ID'] = df['players_ID'].astype(int)
    df['team_ID'] = df['team_ID'].astype(int)
    df['dt_joined'] = pd.to_datetime(df['dt_joined'])
    df['contract_expires'] = pd.to_datetime(df['contract_expires'])
    
    fix_heights(df)
    
    return df

In [59]:
scrape_test_data(teams_test)

1
gremio-foot-ball-porto-alegrense
test2
test2
test2
test2
test2
test2
test2
test2
test2
test2
test2
test2
test2
test2
test2
test2
test2
test2
test2
test2
test2
test2
test2
test2
test2
test2
test2
test2
test2
test2
test2
test2
test2
test2
test2
test2
2
houston-dynamo
test2
test2
test2
test2
test2
test2
test2
test2
test2
test2
test2
test2
test2
test2
test2
test2
test2
test2
test2
test2
test2
test2
test2
test2
test2
test2
test2
test2
test2
3
atletico-san-luis
test1
test1
test1
test1
test1
test1
test1
test1
test1
test1
test1
test1
test1
test1
test1
test1
test1
test1
test1
test1
test1
test1
test1
test1
test1
test1
test1
test1
test1
test1
test1
test1
test1
test1
test1
test1
4
club-atletico-river-plate
test1
test1
test1
test1
test1
test1
test1
test1
test1
test1
test1
test1
test1
test1
test1
test1
test1
test1
test1
test1
test1
test1
test1
test1
test1
test1
test1
test1
test1
test1
test1
test1
test1




Unnamed: 0,players_ID,team_ID,name,position,field_position,Age,Nat,Height,foot,dt_joined,prev_team,contract_expires,market_value,player_page
0,1,3,Paulo Victor,Goalkeeper,GLK,33,Brazil,1.87,left,2017-07-13,Clube de Regatas do Flamengo,2022-12-31,1.42,https://www.transfermarkt.com/paulo-victor/pro...
1,2,3,Julio Cesar,Goalkeeper,GLK,34,Brazil,1.91,right,2019-01-03,Fluminense Football Club,2020-12-31,0.94,https://www.transfermarkt.com/julio-cesar/prof...
2,3,3,Vanderlei,Goalkeeper,GLK,36,Brazil,1.95,right,2020-01-18,Santos FC,2021-12-31,0.94,https://www.transfermarkt.com/vanderlei/profil...
3,4,3,Brenno,Goalkeeper,GLK,21,Brazil,1.9,right,2019-01-01,Gremio Foot-Ball Porto Alegrense B,2021-12-31,0.06,https://www.transfermarkt.com/brenno/profil/sp...
4,5,3,Walter Kannemann,Centre-Back,DEF,29,Argentina,1.84,left,2016-07-15,Atlas Guadalajara,2022-12-31,5.66,https://www.transfermarkt.com/walter-kannemann...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
129,130,65,Matias Suarez,Centre-Forward,ATT,31,Argentina,1.83,both,2019-01-26,Club Atletico Belgrano,2022-06-30,3.30,https://www.transfermarkt.com/matias-suarez/pr...
130,131,65,Lucas Pratto,Centre-Forward,ATT,31,Argentina,1.88,right,2018-01-08,Sao Paulo Futebol Clube,2022-06-30,2.83,https://www.transfermarkt.com/lucas-pratto/pro...
131,132,65,Ignacio Scocco,Centre-Forward,ATT,34,Argentina,1.76,right,2017-07-01,Club Atletico Newell's Old Boys,2020-06-30,1.42,https://www.transfermarkt.com/ignacio-scocco/p...
132,133,65,Federico Girotti,Centre-Forward,ATT,20,Argentina,,-,2020-01-01,Club Atletico River Plate II,2022-12-31,0.00,https://www.transfermarkt.com/federico-girotti...
