# Analysis on new signing Paddy Lane

- Paddy is a Right winger who can also play on the Left Wing
- Played in Portsmouth 23/24 promotion season
- Using his 22/23, 23/24 & 24/25 performance data, I will be forecasting what we can expect from him if we he were to play a full season

In [18]:
import pandas as pd

df = pd.read_csv('../../data/paddy_lane_analysis.csv')
df.head()


Unnamed: 0,Name,Age,Rating,League,Club,Season,Appearances,Mins,Goals,Assists,Shots_Per_Game,Xg/90,Avg_Passes,Key_Passes,Pass_Percentage,Dribbles
0,Paddy Lane,24,6.32,Championship,Portsmouth,24/25,22,1072,1,1,0.92,0.13,19.39,1.09,72.41,1.09
1,Paddy Lane,23,6.97,League one,Portsmouth,23/24,42,3015,12,7,1.64,0.36,26.03,1.1,73.28,1.55
2,Paddy Lane,22,6.68,League one,Portsmouth,22/23,35,1661,2,1,0.87,0.11,25.51,1.28,74.03,2.09


In [19]:
df.columns

Index(['Name', 'Age', 'Rating', 'League', 'Club', 'Season', 'Appearances',
       'Mins', 'Goals', 'Assists', 'Shots_Per_Game', 'Xg/90', 'Avg_Passes',
       'Key_Passes', 'Pass_Percentage', 'Dribbles'],
      dtype='object')

In [20]:
# Normalize counting stats to a 46-game season 
# Per-game stats (multiply by max minutes)
# players can play the same games but not the same mins i.e, being a substitute 

import pandas as pd

# Full season minutes (46 games × 90 minutes)
full_season_mins = 46 * 90  # 4140 minutes

# Counting stats you want to normalize
counting_stats = ['Goals', 'Assists']

# Replace 0 minutes with NaN to avoid division errors
df['Mins'] = df['Mins'].replace(0, pd.NA)

# Normalize goals and assists by minutes played
for col in counting_stats:
    df[f'{col}_full_season'] = df[col] * (full_season_mins / df['Mins'])

# Select only the normalized goals and assists columns
normalized_cols = [f'{stat}_full_season' for stat in counting_stats]

# Print player names with normalized goals and assists
print(df[['Name'] + normalized_cols])

         Name  Goals_full_season  Assists_full_season
0  Paddy Lane           3.861940             3.861940
1  Paddy Lane          16.477612             9.611940
2  Paddy Lane           4.984949             2.492474


In [21]:
#printing normalised columns along with other columns for an easy view of Paddy Lanes' season performances in League one and the Championship

other_cols = ['Shots_Per_Game', 'Xg/90', 'Avg_Passes', 'Key_Passes', 'Pass_Percentage', 'Dribbles'] 

# The normalized columns for goals and assists
normalized_cols = ['Goals_full_season', 'Assists_full_season']

# Select Daniel’s row and print desired columns
print(df.loc[df['Name'] == 'Paddy Lane', other_cols + normalized_cols])

   Shots_Per_Game  Xg/90  Avg_Passes  Key_Passes  Pass_Percentage  Dribbles  \
0            0.92   0.13       19.39        1.09            72.41      1.09   
1            1.64   0.36       26.03        1.10            73.28      1.55   
2            0.87   0.11       25.51        1.28            74.03      2.09   

   Goals_full_season  Assists_full_season  
0           3.861940             3.861940  
1          16.477612             9.611940  
2           4.984949             2.492474  


In [23]:
# Instead of multiplying down, apply inverted (boosted) multipliers
multipliers = {
    'Goals_full_season': 1 / 0.90,           # ~1.11
    'Assists_full_season': 1 / 0.90,
    'Shots_Per_Game': 1 / 0.95,
    'Xg/90': 1 / 0.90,
    'Avg_Passes': 1 / 0.95,
    'Key_Passes': 1 / 0.90,
    'Pass_Percentage': 1.00,                # no change
    'Dribbles': 1 / 0.90
}

mask = (df['Name'] == 'Paddy Lane') & (df['League'] == 'Championship')

for stat, factor in multipliers.items():
    adjusted_col = stat + '_L1_adj'
    df.loc[mask, adjusted_col] = df.loc[mask, stat] * factor



In [25]:
# Step 1: Define the original stat columns you care about
base_stats = [
    'Goals_full_season', 'Assists_full_season', 'Shots_Per_Game', 'Xg/90',
    'Avg_Passes', 'Key_Passes', 'Pass_Percentage', 'Dribbles'
]

# Step 2: Build the "_L1_adj" version of those columns
adjusted_stats = [stat + '_L1_adj' for stat in base_stats]

# Step 3: Create new consistent columns that hold adjusted values (if Championship season), or original values
for orig, adj in zip(base_stats, adjusted_stats):
    df[f'{orig}_L1_equiv'] = df[orig]  # default to original
    df.loc[(df['Name'] == 'Paddy Lane') & (df['League'] == 'Championship'), f'{orig}_L1_equiv'] = df.loc[
        (df['Name'] == 'Paddy Lane') & (df['League'] == 'Championship'), adj
    ]


In [26]:
# Columns to show
view_cols = ['Season', 'League'] + [f'{stat}_L1_equiv' for stat in base_stats]

# Filter rows for Paddy Lane
paddy_lane_l1_equiv = df[df['Name'] == 'Paddy Lane'][['Name'] + view_cols]

# Display the table
print(paddy_lane_l1_equiv)


         Name Season        League  Goals_full_season_L1_equiv  \
0  Paddy Lane  24/25  Championship                    4.291045   
1  Paddy Lane  23/24    League one                   16.477612   
2  Paddy Lane  22/23    League one                    4.984949   

   Assists_full_season_L1_equiv  Shots_Per_Game_L1_equiv  Xg/90_L1_equiv  \
0                      4.291045                 0.968421        0.144444   
1                      9.611940                 1.640000        0.360000   
2                      2.492474                 0.870000        0.110000   

   Avg_Passes_L1_equiv  Key_Passes_L1_equiv  Pass_Percentage_L1_equiv  \
0            20.410526             1.211111                     72.41   
1            26.030000             1.100000                     73.28   
2            25.510000             1.280000                     74.03   

   Dribbles_L1_equiv  
0           1.211111  
1           1.550000  
2           2.090000  


In [27]:
forecast_stats = [
    'Goals_full_season_L1_equiv',
    'Assists_full_season_L1_equiv',
    'Shots_Per_Game_L1_equiv',
    'Xg/90_L1_equiv',
    'Avg_Passes_L1_equiv',
    'Key_Passes_L1_equiv',
    'Pass_Percentage_L1_equiv',
    'Dribbles_L1_equiv'
]


In [28]:
# Convert Season string to numeric (e.g., 22/23 → 2023)
season_map = {'22/23': 2023, '23/24': 2024, '24/25': 2025}
df['Season_Num'] = df['Season'].map(season_map)


In [29]:
paddy_data = df[df['Name'] == 'Paddy Lane']


In [30]:
from sklearn.linear_model import LinearRegression
import numpy as np

forecast = {}
target_season = 2026

for stat in forecast_stats:
    model = LinearRegression()
    
    # Prepare inputs
    X = paddy_data[['Season_Num']].values
    y = paddy_data[stat].values

    # Fit and predict
    model.fit(X, y)
    forecast[stat.replace('_L1_equiv', '') + '_25_26'] = model.predict(np.array([[target_season]]))[0]


In [31]:
import pandas as pd

forecast_df = pd.DataFrame([forecast])
forecast_df.insert(0, 'Name', 'Paddy Lane')
forecast_df.insert(1, 'Season', '25/26')
print(forecast_df.T)


                                    0
Name                       Paddy Lane
Season                          25/26
Goals_full_season_25_26      7.890631
Assists_full_season_25_26    7.263724
Shots_Per_Game_25_26         1.257895
Xg/90_25_26                  0.239259
Avg_Passes_25_26            18.884035
Key_Passes_25_26             1.128148
Pass_Percentage_25_26           71.62
Dribbles_25_26               0.738148
