In [1]:
# Native_API_SQL_Analysis.ipynb

# 🧱 Step 1: Import dependencies
import pandas as pd
from sqlalchemy import create_engine
from dotenv import load_dotenv
import os

# 🔐 Step 2: Load environment variables
load_dotenv()
pg_user = os.getenv("PG_USER")
pg_password = os.getenv("PG_PASSWORD")
pg_host = os.getenv("PG_HOST")
pg_db = os.getenv("PG_DB")

# 🛢️ Step 3: Connect to PostgreSQL
engine = create_engine(f'postgresql://{pg_user}:{pg_password}@{pg_host}/{pg_db}')

# 📋 Set pandas to display all rows
pd.set_option('display.max_rows', None)

In [2]:
🎯 Query 1 — Who are the most utilized players by minutes played?
# =============================================

# 💼 Business Question:
# Descriptive Analytics:
# Which Manchester City players logged the most minutes during the season, and what are 
# their positions and contract details?

query_1 = '''
SELECT 
    name,
    position,
    contract_start,
    contract_end,
    REGEXP_REPLACE(minutes_played, '[^0-9]', '', 'g')::INTEGER AS minutes_played
FROM raw.raw_web_players
ORDER BY minutes_played DESC
LIMIT 10;
'''

players_df = pd.read_sql(query_1, engine)
players_df.head(10)


Object `played` not found.


Unnamed: 0,name,position,contract_start,contract_end,minutes_played
0,Joško Gvardiol,Defence,2023-08,2028-06,3009
1,Erling Haaland,Offence,2022-07,2034-06,2484
2,Bernardo Silva,Midfield,2020-07,2026-06,2400
3,Mateo Kovačić,Midfield,2023-07,2027-06,2053
4,Ilkay Gündogan,Midfield,2024-08,2026-06,2052
5,Ederson,Goalkeeper,2017-07,2026-06,2051
6,Rúben Dias,Defence,2020-09,2027-06,1998
7,Rico Lewis,Defence,2022-08,2028-06,1822
8,Manuel Akanji,Defence,2022-09,2027-06,1744
9,Sávio,Midfield,2024-07,2029-06,1742


In [None]:
### 📈 Insight

# The top five players by minutes played are spread across defense, midfield, offense, and 
# goalkeeping. Notably, **Joško Gvardiol**, a defender with a long-term contract ending in 
# 2028, leads in total minutes, followed by **Erling Haaland** and **Bernardo Silva**. 
# This suggests strong physical availability and tactical importance in the squad’s rotation
#  strategy.

# ---

### ✅ Recommendation

# Manchester City should consider implementing load management strategies for high-minute 
# players like Gvardiol and Silva to mitigate injury risk. Resting them during low-stakes
#  matches or early domestic cup rounds could help sustain their performance in key fixtures.

# ---

### 🔮 Prediction

# If these players continue to accumulate high minutes without injuries, they will remain 
# pivotal in the club’s success across all competitions. Management may prioritize contract 
# extensions or performance-based incentives for key contributors with nearing expirations.


In [5]:
# 📊 Business Question:
# Which player positions have the highest average minutes played?
#Understanding which roles are most heavily utilized can inform recruitment, training, 
# and workload management.

query_api_position_minutes = '''
SELECT 
    position,
    ROUND(AVG(
        REGEXP_REPLACE(minutes_played, '[^0-9]', '', 'g')::INTEGER
    ), 2) AS avg_minutes_played
FROM raw.raw_web_players
WHERE minutes_played IS NOT NULL
GROUP BY position
ORDER BY avg_minutes_played DESC
LIMIT 5;
'''

In [6]:
position_minutes_df = pd.read_sql(query_api_position_minutes, engine)
position_minutes_df

Unnamed: 0,position,avg_minutes_played
0,Midfield,1107.93
1,Offence,878.75
2,Defence,866.75
3,Goalkeeper,630.0


In [None]:
# 🔍 Insight
# Midfielders have the highest average minutes played (1,107.93), significantly ahead of 
# other positions, with goalkeepers at the bottom (630.00). This suggests a heavy reliance
#  on midfielders for consistent match participation, potentially due to their pivotal role 
# in both defense and attack.

# ✅ Recommendation
# Manchester City should evaluate the physical load and rotation strategies for midfielders
#  to prevent fatigue-related injuries. Additionally, reassess squad depth in midfield to 
# ensure adequate coverage during congested fixtures or injuries.

# 📈 Prediction
# If the current playtime trends persist without strategic rotation, the team risks decreased
#  midfield performance during high-stakes matches due to overuse. Conversely, optimizing 
# rotation could enhance midfield efficiency and extend player longevity.