# Shot Heatmaps Top 10 goalscorers in EPL

## Libraries to import

In [1]:
# Import libraries:
import json
import pandas as pd
from copy import deepcopy
from bs4 import BeautifulSoup
from urllib.request import urlopen

# Import plotting libraries:
import seaborn as sns
import plotly.express as px
import matplotlib.pyplot as plt
from matplotlib.patches import Arc


## Create field image

In [2]:
# Setting boundaries and midpoint:
x_lims = [0, 1.15]
y_lims = [0, 0.74]

x_mid = x_lims[1]/2
y_mid = y_lims[1]/2

# Setting color and linewidth:
background_color = "#1B1B1A"
line_color = "white"
line_width = 2.

NOTE: [Definition of a Function in Python](https://www.w3schools.com/python/python_functions.asp)

## Heatmap - Top goal goalscorer

### Scrape data for top goalscorer
Source --> [Understat website](https://understat.com/)

In [3]:
#selecting europe's Top 5 leagues
leagues = ['EPL/2022','La_liga/2022','Bundesliga/2022','Ligue_1/2022','Serie_A/2022']

In [4]:
#extracting all players id from the leagues
player_id_list =[]
final_jsonfinal_df= pd.DataFrame()
for i in leagues:
  scrape_url = "https://understat.com/league/{}".format(i)
  page_connect = urlopen(scrape_url)
  page_html = BeautifulSoup(page_connect, "html.parser")
  json_raw_string = page_html.findAll(name="script")[3].text
  start_ind = json_raw_string.index("\\")
  stop_ind = json_raw_string.index("')")
  json_data = json_raw_string[start_ind:stop_ind]
  json_data = json_data.encode("utf8").decode("unicode_escape")
  final_json_df = pd.json_normalize(json.loads(json_data))
  final_jsonfinal_df = pd.concat([final_jsonfinal_df, final_json_df], ignore_index=True)

player_id_list = final_jsonfinal_df["id"].to_list()
print(player_id_list)

['8260', '647', '998', '1250', '468', '556', '2517', '7752', '8865', '773', '7322', '2381', '6681', '482', '5543', '6055', '7420', '453', '5232', '6818', '7814', '8379', '8706', '239', '843', '6552', '10720', '10846', '314', '986', '1228', '7698', '10760', '204', '447', '522', '5220', '5786', '6034', '6756', '6854', '10806', '11296', '87', '501', '553', '618', '1679', '1776', '3585', '6049', '6122', '6345', '6853', '8291', '8941', '10177', '531', '620', '675', '700', '750', '1726', '2182', '2203', '3697', '6108', '6857', '7166', '7395', '7700', '8720', '10741', '10804', '10866', '343', '672', '762', '822', '922', '2248', '3635', '5221', '5553', '6063', '6482', '6827', '7365', '7892', '8150', '8327', '9492', '9738', '10405', '10408', '10716', '10743', '11058', '11094', '11297', '755', '757', '833', '910', '1654', '2328', '2335', '4456', '5613', '5956', '6253', '6492', '6630', '6665', '6912', '7768', '8845', '9040', '9678', '9680', '10552', '10715', '10746', '10750', '11317', '486', '500

In [5]:
player_shot_df_list = []
for p_id in player_id_list:
    # Scrape player stats:
    scrape_url = "https://understat.com/player/{}".format(p_id)
    page_connect = urlopen(scrape_url)
    page_html = BeautifulSoup(page_connect, "html.parser")

    json_raw_string = page_html.findAll(name="script")[3].text
    start_ind = json_raw_string.index("\\")
    stop_ind = json_raw_string.index("')")

    json_data = json_raw_string[start_ind:stop_ind]
    json_data = json_data.encode("utf8").decode("unicode_escape")

    shots_df = pd.json_normalize(json.loads(json_data))
    shots_df = shots_df.apply(pd.to_numeric, errors="ignore")

    full_pitch_shots_df = deepcopy(shots_df)

    player_shot_df_list.append(full_pitch_shots_df)

In [6]:
player_shots = pd.concat(player_shot_df_list, ignore_index=True)
player_shots2022_2023 =player_shots[player_shots['season']==2022]

In [7]:
player_shots2022_2023.columns

Index(['id', 'minute', 'result', 'X', 'Y', 'xG', 'player', 'h_a', 'player_id',
       'situation', 'season', 'shotType', 'match_id', 'h_team', 'a_team',
       'h_goals', 'a_goals', 'date', 'player_assisted', 'lastAction'],
      dtype='object')

In [8]:
player_shots2022_2023.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 47653 entries, 205 to 275330
Data columns (total 20 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   id               47653 non-null  int64  
 1   minute           47653 non-null  int64  
 2   result           47653 non-null  object 
 3   X                47653 non-null  float64
 4   Y                47653 non-null  float64
 5   xG               47653 non-null  float64
 6   player           47653 non-null  object 
 7   h_a              47653 non-null  object 
 8   player_id        47653 non-null  int64  
 9   situation        47653 non-null  object 
 10  season           47653 non-null  int64  
 11  shotType         47653 non-null  object 
 12  match_id         47653 non-null  int64  
 13  h_team           47653 non-null  object 
 14  a_team           47653 non-null  object 
 15  h_goals          47653 non-null  int64  
 16  a_goals          47653 non-null  int64  
 17  date     

In [9]:
player_shots2022_2023.shape

(47653, 20)

In [10]:
player_shots2022_2023.apply(pd.to_numeric, errors="ignore").info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 47653 entries, 205 to 275330
Data columns (total 20 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   id               47653 non-null  int64  
 1   minute           47653 non-null  int64  
 2   result           47653 non-null  object 
 3   X                47653 non-null  float64
 4   Y                47653 non-null  float64
 5   xG               47653 non-null  float64
 6   player           47653 non-null  object 
 7   h_a              47653 non-null  object 
 8   player_id        47653 non-null  int64  
 9   situation        47653 non-null  object 
 10  season           47653 non-null  int64  
 11  shotType         47653 non-null  object 
 12  match_id         47653 non-null  int64  
 13  h_team           47653 non-null  object 
 14  a_team           47653 non-null  object 
 15  h_goals          47653 non-null  int64  
 16  a_goals          47653 non-null  int64  
 17  date     

In [11]:
player_shots2022_2023= player_shots2022_2023.apply(pd.to_numeric, errors="ignore")

In [12]:
player_shots2022_2023.describe().round(1)

Unnamed: 0,id,minute,X,Y,xG,player_id,season,match_id,h_goals,a_goals
count,47653.0,47653.0,47653.0,47653.0,47653.0,47653.0,47653.0,47653.0,47653.0,47653.0
mean,505495.7,48.9,0.9,0.5,0.1,5820.8,2022.0,19109.0,1.6,1.3
std,14753.7,26.7,0.1,0.1,0.2,3463.1,0.0,527.2,1.4,1.1
min,478469.0,0.0,0.0,0.0,0.0,3.0,2022.0,18202.0,0.0,0.0
25%,493134.0,27.0,0.8,0.4,0.0,2356.0,2022.0,18647.0,1.0,0.0
50%,505676.0,49.0,0.9,0.5,0.1,6421.0,2022.0,19108.0,1.0,1.0
75%,517991.0,71.0,0.9,0.6,0.1,8696.0,2022.0,19567.0,2.0,2.0
max,531291.0,105.0,1.0,1.0,1.0,11627.0,2022.0,20159.0,9.0,7.0


In [13]:
player_shots2022_2023.head(10)

Unnamed: 0,id,minute,result,X,Y,xG,player,h_a,player_id,situation,season,shotType,match_id,h_team,a_team,h_goals,a_goals,date,player_assisted,lastAction
205,479846,20,MissedShots,0.936,0.503,0.484085,Erling Haaland,a,8260,OpenPlay,2022,Head,18211,West Ham,Manchester City,0,2,2022-08-07 15:30:00,Phil Foden,Cross
206,479847,35,Goal,0.885,0.5,0.761169,Erling Haaland,a,8260,Penalty,2022,LeftFoot,18211,West Ham,Manchester City,0,2,2022-08-07 15:30:00,,Standard
207,479854,64,Goal,0.864,0.405,0.328053,Erling Haaland,a,8260,OpenPlay,2022,LeftFoot,18211,West Ham,Manchester City,0,2,2022-08-07 15:30:00,Kevin De Bruyne,Throughball
208,479855,69,MissedShots,0.953,0.552,0.526608,Erling Haaland,a,8260,OpenPlay,2022,Head,18211,West Ham,Manchester City,0,2,2022-08-07 15:30:00,Jack Grealish,Chipped
209,479856,75,BlockedShot,0.894,0.71,0.066443,Erling Haaland,a,8260,OpenPlay,2022,LeftFoot,18211,West Ham,Manchester City,0,2,2022-08-07 15:30:00,Ilkay Gündogan,Pass
210,480318,44,SavedShot,0.933,0.707,0.084546,Erling Haaland,h,8260,OpenPlay,2022,LeftFoot,18214,Manchester City,Bournemouth,4,0,2022-08-13 14:00:00,João Cancelo,Chipped
211,480325,72,MissedShots,0.905,0.575,0.136635,Erling Haaland,h,8260,OpenPlay,2022,LeftFoot,18214,Manchester City,Bournemouth,4,0,2022-08-13 14:00:00,Jack Grealish,Pass
212,482576,51,SavedShot,0.822,0.606,0.055626,Erling Haaland,a,8260,OpenPlay,2022,RightFoot,18230,Newcastle United,Manchester City,3,3,2022-08-21 15:30:00,Rúben Dias,TakeOn
213,482581,59,Goal,0.967,0.567,0.541526,Erling Haaland,a,8260,FromCorner,2022,LeftFoot,18230,Newcastle United,Manchester City,3,3,2022-08-21 15:30:00,Rodri,Pass
214,482582,61,SavedShot,0.866,0.443,0.339789,Erling Haaland,a,8260,OpenPlay,2022,LeftFoot,18230,Newcastle United,Manchester City,3,3,2022-08-21 15:30:00,Kevin De Bruyne,Throughball


### How to build a simple heatmap?

### Heatmap with football field integrated

In [22]:
player_shots2022_2023.to_csv("Shots.csv")