**This is a scraper to extract info/data from FotMob's player profiles, based on the example of Leroy Sané**

In [1]:
import requests
from bs4 import BeautifulSoup

In [2]:
import pandas as pd
import json

In [3]:
url = 'https://www.fotmob.com/players/530859/leroy-sane'
page = requests.get(url)
soup = BeautifulSoup(page.text,'html.parser')
scripts = soup.find_all('script')

In [4]:
json_script = scripts[58].string

In [5]:
data = json.loads(json_script)

In [6]:
data = data['props']['pageProps']['data']

In [7]:
data['name']

'Leroy Sané'

In [8]:
data['birthDate']['utcTime']

'1996-01-11T00:00:00.000Z'

In [9]:
data['primaryTeam']['teamName']

'Bayern München'

In [10]:
data['primaryTeam']['teamColors']

{'color': '#C60428',
 'colorAlternate': '#DC052D',
 'colorAway': '#292229',
 'colorAwayAlternate': '#FFFFFF'}

In [11]:
data['positionDescription']['primaryPosition']

{'label': 'Right Winger', 'key': 'rightwinger'}

In [12]:
data['positionDescription']['nonPrimaryPositions']

[{'label': 'Attacking Midfielder', 'key': 'centerattackingmidfielder'},
 {'label': 'Left Winger', 'key': 'leftwinger'},
 {'label': 'Striker', 'key': 'striker'}]

In [13]:
positions = data['positionDescription']['positions']
positions = pd.json_normalize(positions)
positions

Unnamed: 0,occurences,position,isMainPosition,pitchPositionData,strPos.label,strPos.key,strPosShort.label,strPosShort.key,pitchPositionData.right,pitchPositionData.top,pitchPositionData.ratio
0,1,73,False,,Central Midfielder,centermidfielder,CM,centermidfielder_short,,,
1,27,82,True,,Right Winger,rightwinger,RW,rightwinger_short,0.83,0.26,
2,6,84,False,,Attacking Midfielder,centerattackingmidfielder,AM,centerattackingmidfielder_short,0.5,0.26,0.315385
3,16,87,False,,Left Winger,leftwinger,LW,leftwinger_short,0.17,0.26,0.507692
4,2,104,False,,Striker,striker,ST,striker_short,0.5,0.1,0.238462


In [14]:
player_info = data['playerInformation']
player_info = pd.json_normalize(player_info)
player_info

Unnamed: 0,title,translationKey,value.key,value.fallback,countryCode,icon.type,icon.id
0,Height,height_sentencecase,,183 cm,,,
1,Shirt,shirt,,10,,,
2,Age,age_sentencecase,,27,,,
3,Preferred foot,preferred_foot,left,Left,,,
4,Country,country_sentencecase,,Germany,GER,country,GER
5,Market value,transfer_value,,€68M,,,


In [15]:
mainLeague_name = data['mainLeague']['leagueName']
mainLeague_name

'Bundesliga 2023/2024'

In [16]:
data['mainLeague']

{'topStats': {'id': 'top-stat-card',
  'type': 'stat-values',
  'display': 'top-stat-card',
  'items': [{'title': 'Goals',
    'localizedTitleId': 'goals',
    'statValue': '8',
    'per90': 0.547112462006079,
    'percentileRank': 100,
    'percentileRankPer90': 94.5945945945946,
    'statFormat': 'number'},
   {'title': 'Assists',
    'localizedTitleId': 'goal_assist_title',
    'statValue': '8',
    'per90': 0.547112462006079,
    'percentileRank': 100,
    'percentileRankPer90': 91.8918918918919,
    'statFormat': 'number'},
   {'title': 'Rating',
    'localizedTitleId': 'rating',
    'statValue': '8.31',
    'per90': 8.306,
    'percentileRank': 100,
    'percentileRankPer90': 100,
    'statFormat': 'fraction'},
   {'title': 'Matches',
    'localizedTitleId': 'matches_uppercase',
    'statValue': '15',
    'per90': 15,
    'percentileRank': 74.32432432432432,
    'percentileRankPer90': 74.32432432432432,
    'statFormat': 'number'},
   {'title': 'Started',
    'localizedTitleId': 

In [17]:
mainLeague = data['mainLeague']
mainLeague = pd.json_normalize(mainLeague)
mainLeague

Unnamed: 0,leagueId,leagueName,topStats.id,topStats.type,topStats.display,topStats.items
0,54,Bundesliga 2023/2024,top-stat-card,stat-values,top-stat-card,"[{'title': 'Goals', 'localizedTitleId': 'goals..."


In [18]:
trophies = data['trophies']['playerTrophies']
trophies = pd.json_normalize(trophies)
trophies

Unnamed: 0,ccode,teamId,teamName,tournaments
0,GER,9823,Bayern München,"[{'ccode': 'GER', 'leagueId': 42, 'leagueName'..."
1,ENG,8456,Manchester City,"[{'ccode': 'ENG', 'leagueId': 132, 'leagueName..."
2,INT,8570,Germany,"[{'ccode': 'GER', 'leagueId': 43, 'leagueName'..."
3,GER,603864,Schalke 04 U19,"[{'ccode': 'GER', 'leagueId': -1, 'leagueName'..."


In [19]:
trophies_bayern = data['trophies']['playerTrophies'][0]['tournaments']
trophies_bayern = pd.json_normalize(trophies_bayern)
trophies_bayern

Unnamed: 0,ccode,leagueId,leagueName,seasonsWon,seasonsRunnerUp
0,GER,42,UEFA Champions League,[2019/2020],[]
1,GER,54,Bundesliga,"[2022/2023, 2021/2022, 2020/2021]",[]
2,GER,74,UEFA Super Cup,[2020/2021],[]
3,GER,8924,Super Cup,"[2022/2023, 2021/2022, 2020/2021]",[2023/2024]
4,GER,78,FIFA Club World Cup,[2020 Qatar],[]


In [20]:
trophies_mancity = data['trophies']['playerTrophies'][1]['tournaments']
trophies_mancity = pd.json_normalize(trophies_mancity)
trophies_mancity

Unnamed: 0,ccode,leagueId,leagueName,seasonsWon,seasonsRunnerUp
0,ENG,132,FA Cup,[2018/2019],[]
1,ENG,47,Premier League,"[2018/2019, 2017/2018]",[]
2,ENG,133,League Cup,"[2019/2020, 2018/2019, 2017/2018]",[]
3,ENG,247,Community Shield,"[2019/2020, 2018/2019]",[]


In [21]:
recentMatches = data['recentMatches']
recentMatches = pd.json_normalize(recentMatches)
recentMatches

Unnamed: 0,teamId,teamName,opponentTeamId,opponentTeamName,isHomeTeam,id,matchPageUrl,leagueId,leagueName,stage,...,minutesPlayed,goals,assists,yellowCards,redCards,playerOfTheMatch,onBench,matchDate.utcTime,ratingProps.num,ratingProps.bgcolor
0,9823,Bayern München,8721,Wolfsburg,False,4221879,/matches/wolfsburg-vs-bayern-munchen/2udogn#42...,54,Bundesliga,,...,90,0,0,0,0,False,False,2023-12-20T20:30:00.000Z,7.2,#1ec853
1,9823,Bayern München,10269,VfB Stuttgart,True,4221986,/matches/bayern-munchen-vs-vfb-stuttgart/3c6nc...,54,Bundesliga,,...,90,0,1,0,0,False,False,2023-12-17T19:30:00.000Z,8.1,#1ec853
2,9823,Bayern München,10260,Manchester United,False,4315006,/matches/bayern-munchen-vs-manchester-united/3...,42,Champions League Grp. A,,...,89,0,0,0,0,False,False,2023-12-12T21:00:00.000Z,7.1,#1ec853
3,9823,Bayern München,9810,Eintracht Frankfurt,False,4221979,/matches/eintracht-frankfurt-vs-bayern-munchen...,54,Bundesliga,,...,90,0,1,1,0,False,False,2023-12-09T15:30:00.000Z,8.1,#1ec853
4,9823,Bayern München,8391,FC København,True,4315005,/matches/fc-kobenhavn-vs-bayern-munchen/2qrohw...,42,Champions League Grp. A,,...,26,0,0,0,0,False,False,2023-11-29T21:00:00.000Z,6.6,#f08022
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57,9823,Bayern München,9905,Mainz 05,False,4053739,/matches/bayern-munchen-vs-mainz-05/37vb95#405...,209,DFB Pokal,1/8,...,71,1,0,0,0,False,False,2023-02-01T20:45:00.000Z,8.3,#1ec853
58,9823,Bayern München,9810,Eintracht Frankfurt,True,3903697,/matches/eintracht-frankfurt-vs-bayern-munchen...,54,Bundesliga,,...,70,1,0,0,0,False,False,2023-01-28T18:30:00.000Z,7.5,#1ec853
59,9823,Bayern München,8722,1. FC Köln,True,3903688,/matches/1-fc-koln-vs-bayern-munchen/2ue2rs#39...,54,Bundesliga,,...,90,0,0,0,0,False,False,2023-01-24T20:30:00.000Z,7.0,#1ec853
60,9823,Bayern München,178475,RB Leipzig,False,3903680,/matches/bayern-munchen-vs-rb-leipzig/856w9cm#...,54,Bundesliga,,...,90,0,0,0,0,False,False,2023-01-20T20:30:00.000Z,7.1,#1ec853


In [22]:
career_byteam = data['careerHistory']['careerItems']['senior']['teamEntries']+data['careerHistory']['careerItems']['youth']['teamEntries']
career_byteam = pd.json_normalize(career_byteam)
career_byteam

Unnamed: 0,participantId,teamId,team,teamGender,transferType,startDate,endDate,active,role,appearances,goals,assists,hasUncertainData
0,530859,9823,Bayern München,male,,2020-07-15T00:00:00,,True,,157,47,42.0,False
1,530859,8456,Manchester City,male,,2016-08-02T00:00:00,2020-07-15T00:00:00,False,,135,39,42.0,False
2,530859,10189,Schalke 04,male,,2014-03-01T00:00:00,2016-08-02T00:00:00,False,,57,13,7.0,False
3,530859,357215,Manchester City U23,male,,2020-02-27T00:00:00,2020-07-15T00:00:00,False,,1,0,0.0,False
4,530859,603864,Schalke 04 U19,male,,2012-11-23T00:00:00,2015-06-30T00:00:00,False,,44,16,,False


In [23]:
career_byseason = data['careerHistory']['careerItems']['senior']['seasonEntries']+data['careerHistory']['careerItems']['youth']['seasonEntries']
career_byseason = pd.json_normalize(career_byseason)
career_byseason

Unnamed: 0,seasonName,appearances,goals,assists,tournamentStats,teamId,team,teamGender,transferType,rating.num,rating.bgcolor
0,2023/2024,24,9,9.0,"[{'leagueId': 54, 'tournamentId': 20946, 'leag...",9823,Bayern München,male,,7.9,#1ec853
1,2022/2023,44,14,9.0,"[{'leagueId': 54, 'tournamentId': 17801, 'leag...",9823,Bayern München,male,,7.4,#1ec853
2,2021/2022,45,14,14.0,"[{'leagueId': 54, 'tournamentId': 16494, 'leag...",9823,Bayern München,male,,7.5,#1ec853
3,2020/2021,44,10,10.0,"[{'leagueId': 54, 'tournamentId': 15481, 'leag...",9823,Bayern München,male,,7.0,#1ec853
4,2019/2020,2,0,0.0,"[{'leagueId': 47, 'tournamentId': 14022, 'leag...",8456,Manchester City,male,,6.1,#f08022
5,2018/2019,47,16,17.0,"[{'leagueId': 47, 'tournamentId': 12776, 'leag...",8456,Manchester City,male,,7.4,#1ec853
6,2017/2018,49,14,19.0,"[{'leagueId': 47, 'tournamentId': 11522, 'leag...",8456,Manchester City,male,,7.6,#1ec853
7,2016/2017,37,9,6.0,"[{'leagueId': 47, 'tournamentId': 10418, 'leag...",8456,Manchester City,male,,7.4,#1ec853
8,2015/2016,42,9,7.0,"[{'leagueId': 54, 'tournamentId': 9556, 'leagu...",10189,Schalke 04,male,,-,
9,2014/2015,14,4,0.0,"[{'leagueId': 54, 'tournamentId': 8633, 'leagu...",10189,Schalke 04,male,,-,


In [24]:
career_byseason['tournamentStats']

0     [{'leagueId': 54, 'tournamentId': 20946, 'leag...
1     [{'leagueId': 54, 'tournamentId': 17801, 'leag...
2     [{'leagueId': 54, 'tournamentId': 16494, 'leag...
3     [{'leagueId': 54, 'tournamentId': 15481, 'leag...
4     [{'leagueId': 47, 'tournamentId': 14022, 'leag...
5     [{'leagueId': 47, 'tournamentId': 12776, 'leag...
6     [{'leagueId': 47, 'tournamentId': 11522, 'leag...
7     [{'leagueId': 47, 'tournamentId': 10418, 'leag...
8     [{'leagueId': 54, 'tournamentId': 9556, 'leagu...
9     [{'leagueId': 54, 'tournamentId': 8633, 'leagu...
10    [{'leagueId': 54, 'tournamentId': 7771, 'leagu...
11    [{'tournamentId': -1, 'leagueName': 'Premier L...
12    [{'tournamentId': -1, 'leagueName': 'UEFA Yout...
13    [{'tournamentId': -1, 'leagueName': 'UEFA Yout...
Name: tournamentStats, dtype: object

In [25]:
summary_22_23 = career_byseason['tournamentStats'][1]
summary_22_23 = pd.json_normalize(summary_22_23)
summary_22_23

Unnamed: 0,leagueId,tournamentId,leagueName,seasonRating,isFriendly,seasonName,goals,assists,appearances,rating.num,rating.bgcolor
0,54,17801,Bundesliga,7.26,False,2022/2023,8,7,32,7.3,#1ec853
1,209,17692,DFB Pokal,7.83,False,2022/2023,1,1,3,7.8,#1ec853
2,8924,20738,Super Cup,6.61,False,2022/2023,1,0,1,6.6,#f08022
3,42,17697,Champions League,7.64,False,2022/2023,4,1,8,7.6,#1ec853


In [26]:
traits = data['traits']['items']
traits = pd.json_normalize(traits)
traits

Unnamed: 0,key,title,value
0,chances_created,Chances created,0.856115
1,aerials_won,Aerial duels,0.064748
2,defensive_actions,Defensive actions,0.248201
3,goals,Goals,0.68705
4,shot_attempts,Shot attempts,0.852518
5,touches,Touches,0.827338


In [27]:
response = requests.get(
    'https://www.fotmob.com/api/playerStats?playerId=530859&seasonId=2023%2F2024-54',
    headers={'User-Agent': 'Mozilla/5.0'} # you'll be blocked if you don't use some type of user agent
)
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/116.0',
    'Accept': '*/*',
    'Accept-Language': 'en-US,en;q=0.5',
    # 'Accept-Encoding': 'gzip, deflate, br',
    'Prefer': 'safe',
    'Referer': 'https://www.sofascore.com/',
    'Origin': 'https://www.sofascore.com',
    'DNT': '1',
    'Connection': 'keep-alive',
    'Sec-Fetch-Dest': 'empty',
    'Sec-Fetch-Mode': 'cors',
    'Sec-Fetch-Site': 'same-site',
    'Sec-GPC': '1',
    'Cache-Control': 'max-age=0',
    # Requests doesn't support trailers
    # 'TE': 'trailers',
}

response = requests.get('https://www.fotmob.com/api/playerStats?playerId=530859&seasonId=2023%2F2024-54', headers=headers)

In [28]:
playerStats = response.json()

In [29]:
top_stats = playerStats['topStatCard']['items']
top_stats = pd.json_normalize(top_stats)
top_stats

Unnamed: 0,title,localizedTitleId,statValue,per90,percentileRank,percentileRankPer90,statFormat
0,Goals,goals,8.0,0.547112,100.0,94.594595,number
1,Assists,goal_assist_title,8.0,0.547112,100.0,91.891892,number
2,Rating,rating,8.31,8.306,100.0,100.0,fraction
3,Matches,matches_uppercase,15.0,15.0,74.324324,74.324324,number
4,Started,started,15.0,15.0,89.189189,40.540541,number
5,Minutes,minutes_played,1316.0,90.0,97.297297,100.0,number


In [30]:
shooting_stats = playerStats['statsSection']['items'][0]['items']
shooting_stats = pd.json_normalize(shooting_stats)
shooting_stats

Unnamed: 0,title,localizedTitleId,statValue,per90,percentileRank,percentileRankPer90,statFormat
0,Goals,goals,8.0,0.547112,100,94.594595,number
1,xG,expected_goals,8.65,0.591538,100,94.594595,fraction
2,xGOT,expected_goals_on_target,9.47,0.64761,100,94.594595,fraction
3,xG excl. penalty,non_penalty_xg,8.65,0.591538,100,95.945946,fraction
4,Shots,shots,46.0,3.145897,100,82.432432,number
5,Shots on target,ShotsOnTarget,22.0,1.504559,100,89.189189,number


In [31]:
passing_stats = playerStats['statsSection']['items'][1]['items']
passing_stats = pd.json_normalize(passing_stats)
passing_stats

Unnamed: 0,title,localizedTitleId,statValue,per90,percentileRank,percentileRankPer90,statFormat
0,Assists,assists,8.0,0.547112,100.0,91.891892,number
1,xA,expected_assists,6.84,0.468071,100.0,97.297297,fraction
2,Accurate passes,successful_passes,506.0,34.604863,94.594595,79.72973,number
3,Pass accuracy,successful_passes_accuracy,84.5,84.474124,83.783784,83.783784,percent
4,Accurate long balls,long_balls_accurate,14.0,0.957447,87.837838,62.162162,number
5,Long ball accuracy,long_ball_succeeeded_accuracy,73.7,73.684211,89.189189,89.189189,percent
6,Chances created,chances_created,49.0,3.351064,100.0,95.945946,number
7,Successful crosses,crosses_succeeeded,20.0,1.367781,93.243243,83.783784,number
8,Cross accuracy,crosses_succeeeded_accuracy,35.1,35.087719,82.432432,82.432432,percent


In [32]:
possession_stats = playerStats['statsSection']['items'][2]['items']
possession_stats = pd.json_normalize(possession_stats)
possession_stats

Unnamed: 0,title,localizedTitleId,statValue,per90,percentileRank,percentileRankPer90,statFormat
0,Dribbles,dribbles_succeeded,63.0,4.308511,100.0,100.0,number
1,Dribbles success rate,won_contest_subtitle,58.9,58.878505,83.783784,83.783784,percent
2,Touches,touches,962.0,65.790274,95.945946,85.135135,number
3,Touches in opposition box,touches_opp_box,103.0,7.044073,100.0,91.891892,number
4,Dispossessed,dispossessed,29.0,1.983283,1.351351,25.675676,number
5,Fouls won,fouls_won,15.0,1.025836,77.027027,40.540541,number


In [33]:
defending_stats = playerStats['statsSection']['items'][3]['items']
defending_stats = pd.json_normalize(defending_stats)
defending_stats

Unnamed: 0,title,localizedTitleId,statValue,per90,percentileRank,percentileRankPer90,statFormat
0,Tackles won,tackles_succeeded,5.0,0.341945,47.297297,18.918919,number
1,Tackles won %,tackles_succeeded_percent,50.0,50.0,28.378378,28.378378,percent
2,Duels won,duel_won,89.0,6.086626,100.0,66.216216,number
3,Duels won %,duel_won_percent,47.8,47.849462,74.324324,74.324324,percent
4,Aerials won,aerials_won,1.0,0.068389,10.810811,10.810811,number
5,Aerials won %,aerials_won_percent,33.3,33.333333,41.891892,41.891892,percent
6,Interceptions,interceptions,3.0,0.205167,48.648649,25.675676,number
7,Blocked scoring attempt,shot_blocked,13.0,0.889058,100.0,83.783784,number
8,Recoveries,recoveries,53.0,3.62462,81.081081,24.324324,number
9,Possession won final 3rd,poss_won_att_3rd_team_title,10.0,0.683891,86.486486,58.108108,number


In [34]:
discipline_stats = playerStats['statsSection']['items'][4]['items']
discipline_stats = pd.json_normalize(discipline_stats)
discipline_stats

Unnamed: 0,title,localizedTitleId,statValue,per90,percentileRank,percentileRankPer90,statFormat
0,Yellow cards,yellow_cards,2,0.136778,10.810811,55.405405,number
1,Red cards,red_cards,0,0.0,100.0,100.0,number


In [35]:
response2 = requests.get('https://www.fotmob.com/api/playerData?id=530859', headers=headers)
playerData = response2.json()
#basically, all of the above is contained here, we could have done the same 
#selections and dataframes starting from "playerData"
#playerData (uncomment to see the JSON, it's obviously very long)