### Part 3 â€“ Individual Athlete Visualization  
**Contributor:** Jonathan Jafari  
**Athlete:** PLAYER_680  
**Metric:** Jump Height (m)  
**Figure file:** `screenshots/part3_player680_line_plot.png`

In [None]:
import os
from dotenv import load_dotenv
from sqlalchemy import create_engine, text
import pandas as pd


load_dotenv()

DB_USER = os.getenv("DB_USER")
DB_PASSWORD = os.getenv("DB_PASSWORD")
DB_HOST = os.getenv("DB_HOST")
DB_PORT = os.getenv("DB_PORT")
DB_NAME = os.getenv("DB_NAME")
DB_TABLE = os.getenv("DB_TABLE")

connection_string = (
    f"mysql+pymysql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
)

engine = create_engine(connection_string)

df = pd.read_sql(text(f"SELECT * FROM {DB_TABLE} LIMIT 50000"), engine)
df.head()


In [None]:
# List most common metrics to help select one
metric_counts = df["metric"].value_counts().head(40)
metric_counts


In [None]:
# Replace with the chosen metric name from metric_counts output
metric_of_interest = "Jump Height(m)"

players_with_metric = (
    df[df["metric"] == metric_of_interest]["playername"]
    .value_counts()
    .head(20)
)

players_with_metric


In [None]:
player_of_interest = "PLAYER_680"
metric_of_interest = "Jump Height(m)"

subset = df[
    (df["playername"] == player_of_interest) &
    (df["metric"] == metric_of_interest)
].copy()

subset = subset.sort_values("timestamp")

subset.head()


In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(12, 5))

plt.plot(subset["timestamp"], subset["value"], marker='o')
plt.xlabel("Date")
plt.ylabel(metric_of_interest)
plt.title(f"{metric_of_interest} Over Time for {player_of_interest}")
plt.xticks(rotation=45)
plt.tight_layout()

plt.savefig("screenshots/part3_player680_line_plot.png", dpi=300)
plt.show()



# Xiao's Version 3.1 Individual Athlete Timeline (Pair Work)

In [None]:
%pip install scipy
import os
from dotenv import load_dotenv
from sqlalchemy import create_engine, text
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import linregress

load_dotenv()

DB_USER = os.getenv("DB_USER")
DB_PASSWORD = os.getenv("DB_PASSWORD")
DB_HOST = os.getenv("DB_HOST")
DB_PORT = os.getenv("DB_PORT")
DB_NAME = os.getenv("DB_NAME")
DB_TABLE = os.getenv("DB_TABLE")

connection_string = (
    f"mysql+pymysql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
)

engine = create_engine(connection_string)

print("Database connection established successfully.")

df = pd.read_sql(text(f"SELECT * FROM {DB_TABLE}"), engine)
print("Data retrieved successfully.\n")
df.head()


selected_metrics = [
    "Jump Height(m)",
    "Peak Propulsive Force(N)",
    "Peak Velocity(m/s)",
    "Propulsive Net Impulse(N.s)",
    "mRSI",
]


In [None]:
# filter teams based on coverage of selected metrics
metrics_sql = ", ".join([f"'{m}'" for m in selected_metrics])

query_team_coverage = f"""
SELECT 
    team,
    COUNT(DISTINCT metric) AS num_metrics_found
FROM {DB_TABLE}
WHERE metric IN ({metrics_sql})
GROUP BY team
ORDER BY num_metrics_found DESC;
"""

team_coverage = pd.read_sql(text(query_team_coverage), engine)
team_coverage

In [None]:
# Filter players who have all 5 selected metrics from the chosen team
chosen_team = "Team: Stony Brook Men's Basketball".replace("'","''")
query_playersallmetrics = f"""
SELECT
    playername,
    COUNT(DISTINCT metric) AS num_metrics,team
FROM {DB_TABLE}
WHERE team = '{chosen_team}'
  AND metric IN ({metrics_sql})
GROUP BY playername
HAVING num_metrics = {len(selected_metrics)}
ORDER BY team
"""

df_playersallmetrics = pd.read_sql(text(query_playersallmetrics), engine)
df_playersallmetrics

In [None]:
# PLAYER_999 and PLAYER_404 has been chosen for our metrics
# create my main database and clean up types on data for plotting function
player_list = ["PLAYER_999", "PLAYER_404"]
player_sql = ", ".join([f"'{p.replace("'", "''")}'" for p in player_list])

query_team_metrics = f"""
SELECT * 
FROM {DB_TABLE}
WHERE team = '{chosen_team}'
AND playername IN ({player_sql})
AND metric IN ({metrics_sql});
"""
df_team = pd.read_sql(text(query_team_metrics),engine)

df_team["timestamp"] = pd.to_datetime(df_team['timestamp'])
df_team["value"] = pd.to_numeric(df_team['value'], errors ='coerce')

print(df_team.shape)
df_team.head()

In [None]:
months_back = 12
PLAYER_999 = "PLAYER_999"
PLAYER_404 = "PLAYER_404"

# Plotting function for individual players by selected metrics
def plot_player_metrics(df_team, athlete, selected_metrics, months_back=12):
    df_team = df_team[df_team['playername'] == athlete].copy()
    if df_team.empty:
        print(f"No data found for {athlete}.")
        return pd.DataFrame

    #sort out data chronologically
    df_team = df_team.sort_values("timestamp")

    #restict to last 12 months
    max_date = df_team["timestamp"].max()
    min_date = max_date - pd.DateOffset(months=months_back)
    df_team = df_team[df_team["timestamp"] >= min_date]

    print(f"\n===== Player: {athlete} =====")
    print(f" Data range: {df_team['timestamp'].min()} to {df_team['timestamp'].max()}")
    print(f"Rows: {df_team.shape[0]}")
    summary_rows = []

    for metric in selected_metrics:
        metric_data = df_team[df_team['metric'] ==metric].copy()
        if metric_data.empty:
            print(f" No data for metric: {metric}")
            continue

        #Plotting
        plt.figure(figsize=(10,4))
        plt.plot(metric_data['timestamp'], metric_data['value'], marker='o')
        plt.title(f"{metric} over time - {athlete}")
        plt.xlabel("Date")
        plt.ylabel(metric)
        plt.xticks(rotations=45)
        plt.tight_layout()
        plt.show()

        #Best and worse performances
        best_perf = metric_data['value'].idxmax()
        worst_perf = metric_data['value'].idxmin()

        best_value = metric_data.loc[best_perf, "value"]
        best_date = metric_data.loc[best_perf, "timestamp"]

        worst_value = metric_data.loc[worst_perf, "value"]
        worst_date = metric_data.loc[worst_perf, "timestamp"]

        #linear trend
        metric_data = metric_data(subset=["timestamp", "value"]).copy()
        metric_data["ts_num"] = metric_data['timestamp'].map(pd.Timestamp.toodinal)

        if len(metric_data) >1:
            slope, intercept, r, p, stderr = linregress(metric_data['ts_num'], metric_data['value'])
        else:
            slope, intercept, r, p, stderr = (0,0,0,1,0)

        summary_rows.append({
            metric,
            best_value, best_date,
            worst_value, worst_date,
            slope, r, p
        })

        summary_df = pd.DataFrame(
            summary_rows,
            columns=[
                "Metric",
                "Best Value", "Best Date",
                "Worst Value", "Worst Date",
                "Trend Slope", "R-value", "P-value"
            ]
        )
        
    return summary_df

In [None]:
# run for PLAYER_999
summary_PLAYER_999 = plot_player_metrics(
    df_team,
    PLAYER_999,
    selected_metrics,
    months_back=12
)  

summary_PLAYER_999

In [None]:
#run for PLAYER_404
summary_PLAYER_404 = plot_player_metrics(
    df_team,
    PLAYER_404,
    selected_metrics,
    months_back=12
)

summary_PLAYER_404

# 3.1 Individual Athlete Timeline (Pair Work)
## Contributor: Anthony Mapuyan 

In [None]:
# Establish database connection and import libraries
import os
from dotenv import load_dotenv
from sqlalchemy import create_engine, text
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
import numpy as np

load_dotenv()

DB_USER = os.getenv("DB_USER")
DB_PASSWORD = os.getenv("DB_PASSWORD")
DB_HOST = os.getenv("DB_HOST")
DB_PORT = os.getenv("DB_PORT", "3306")
DB_NAME = os.getenv("DB_NAME")
DB_TABLE = os.getenv("DB_TABLE", "research_experiment_refractor_test")

connection_string = (   
    f"mysql+pymysql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
)

engine = create_engine(connection_string)

print("Database connection established successfully.")

In [None]:
SELECTED_METRICS = [
    "Jump Height(m)",
    "Peak Propulsive Force(N)",
    "Peak Velocity(m/s)",
    "Propulsive Net Impulse(N.s)",
    "mRSI",
]
