This notebook will focus on establishing initial investigation for:
1. Fetching data from the API.
2. Selecting and extracting relevant fields.
3. Preprocessing and cleaning the data (handling missing values, normalizing, etc.).

In [7]:
# Dependencies
import requests
import pandas as pd


# API Endpoint
url = 'https://fantasy.premierleague.com/api/bootstrap-static/'

# Send a GET request to the API
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
    data = response.json()
    
    # Extract player data from the 'elements' key
    players = data['elements']
    
    # Create a DataFrame with the relevant defensive metrics
    df = pd.DataFrame(players)
    
    # Select relevant columns for defensive analysis
    defensive_metrics_df = df[['first_name', 'second_name', 'team', 'element_type', 'clean_sheets', 
                               'goals_conceded', 'yellow_cards', 'red_cards', 'minutes', 
                               'bps', 'starts', 'own_goals']]
    
    # Display the first few rows of the DataFrame
    print(defensive_metrics_df.head())
else:
    print(f"Failed to retrieve data. Status code: {response.status_code}")

  first_name           second_name  team  element_type  clean_sheets  \
0      Fábio       Ferreira Vieira     1             3             0   
1    Gabriel     Fernando de Jesus     1             4             0   
2    Gabriel  dos Santos Magalhães     1             2             2   
3        Kai               Havertz     1             4             2   
4       Karl                  Hein     1             1             0   

   goals_conceded  yellow_cards  red_cards  minutes  bps  starts  own_goals  
0               0             0          0        0    0       0          0  
1               0             1          0        5    1       0          0  
2               1             1          0      255   59       3          0  
3               1             0          0      255   76       3          0  
4               0             0          0        0    0       0          0  


In [8]:
# Mapping for element_type (position type)
position_mapping = {
    1: 'Goalkeeper',
    2: 'Defender',
    3: 'Midfielder',
    4: 'Forward'
}

# Apply the mapping to the 'element_type' column
defensive_metrics_df['element_type'] = defensive_metrics_df['element_type'].map(position_mapping)

# Filter the DataFrame to include only rows where 'element_type' is 'Defender'
defensive_metrics_df = defensive_metrics_df[defensive_metrics_df['element_type'] == 'Defender']

# Mapping team codes to team names
team_mapping = {team['id']: team['name'] for team in data['teams']}
defensive_metrics_df['team'] = defensive_metrics_df['team'].map(team_mapping)

# Display the updated DataFrame
print(defensive_metrics_df.head())

   first_name           second_name     team element_type  clean_sheets  \
2     Gabriel  dos Santos Magalhães  Arsenal     Defender             2   
5     Jurriën                Timber  Arsenal     Defender             1   
7       Jakub                Kiwior  Arsenal     Defender             0   
14    William                Saliba  Arsenal     Defender             2   
16     Kieran               Tierney  Arsenal     Defender             0   

    goals_conceded  yellow_cards  red_cards  minutes  bps  starts  own_goals  
2                1             1          0      255   59       3          0  
5                1             1          0      174   28       2          0  
7                0             0          0        0    0       0          0  
14               1             0          0      255   63       3          0  
16               0             0          0        0    0       0          0  


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  defensive_metrics_df['element_type'] = defensive_metrics_df['element_type'].map(position_mapping)


Data Cleaning

In [9]:
# Check for missing values
print(defensive_metrics_df.isnull().sum())

first_name        0
second_name       0
team              0
element_type      0
clean_sheets      0
goals_conceded    0
yellow_cards      0
red_cards         0
minutes           0
bps               0
starts            0
own_goals         0
dtype: int64


In [10]:
defensive_metrics_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 208 entries, 2 to 636
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   first_name      208 non-null    object
 1   second_name     208 non-null    object
 2   team            208 non-null    object
 3   element_type    208 non-null    object
 4   clean_sheets    208 non-null    int64 
 5   goals_conceded  208 non-null    int64 
 6   yellow_cards    208 non-null    int64 
 7   red_cards       208 non-null    int64 
 8   minutes         208 non-null    int64 
 9   bps             208 non-null    int64 
 10  starts          208 non-null    int64 
 11  own_goals       208 non-null    int64 
dtypes: int64(8), object(4)
memory usage: 21.1+ KB


In [11]:
# Normalize the relevant metrics to account for playing time differences.

# Calculate per 90-minute statistics
defensive_metrics_df['clean_sheets_per_90'] = (defensive_metrics_df['clean_sheets'] / defensive_metrics_df['minutes']) * 90
defensive_metrics_df['goals_conceded_per_90'] = (defensive_metrics_df['goals_conceded'] / defensive_metrics_df['minutes']) * 90
defensive_metrics_df['bps_per_90'] = (defensive_metrics_df['bps'] / defensive_metrics_df['minutes']) * 90

# Fill NaN values resulting from normalization (e.g., players with zero minutes)
defensive_metrics_df.fillna(0, inplace=True)

# Display the updated DataFrame
print(defensive_metrics_df.head())

   first_name           second_name     team element_type  clean_sheets  \
2     Gabriel  dos Santos Magalhães  Arsenal     Defender             2   
5     Jurriën                Timber  Arsenal     Defender             1   
7       Jakub                Kiwior  Arsenal     Defender             0   
14    William                Saliba  Arsenal     Defender             2   
16     Kieran               Tierney  Arsenal     Defender             0   

    goals_conceded  yellow_cards  red_cards  minutes  bps  starts  own_goals  \
2                1             1          0      255   59       3          0   
5                1             1          0      174   28       2          0   
7                0             0          0        0    0       0          0   
14               1             0          0      255   63       3          0   
16               0             0          0        0    0       0          0   

    clean_sheets_per_90  goals_conceded_per_90  bps_per_90  
2      

Exploratory Data Analysis (EDA):

In [12]:
defensive_metrics_df[['starts']].max()

starts    3
dtype: int64