# Player Recommendation System Model

## Position Recommendation

### Import Required Libraries

In [48]:
import numpy as np
import pandas as pd  
import matplotlib.pyplot as plt 
import seaborn as sns 
import plotly as px 

### Read CSV

In [49]:
df = pd.read_csv("../data/processed_data/cleaned_player_recommendation_dataset_final.csv")
df

Unnamed: 0,player_id,league_id,team_id,footballer_id,footballer_name_footballer,age,club,league_name,position,position_acronym,...,num_players,avg_age,total_squad_value,country,num_legionnaires,avg_marketing_val,num_teams,players,avg_marketing_val_league,avg_age_league
0,1,tr1,1,1,Fernando Muslera,38,Galatasaray,Süper Lig,Goalkeeper,GK,...,28,26.7,262050000.0,Türkiye,16,9.360000e+06,19.0,563,2040000.0,26.5
1,2,tr1,1,2,Günay Güvenç,33,Galatasaray,Süper Lig,Goalkeeper,GK,...,28,26.7,262050000.0,Türkiye,16,9.360000e+06,19.0,563,2040000.0,26.5
2,3,tr1,1,3,Batuhan Şen,25,Galatasaray,Süper Lig,Goalkeeper,GK,...,28,26.7,262050000.0,Türkiye,16,9.360000e+06,19.0,563,2040000.0,26.5
3,4,tr1,1,4,Atakan Ordu,19,Galatasaray,Süper Lig,Goalkeeper,GK,...,28,26.7,262050000.0,Türkiye,16,9.360000e+06,19.0,563,2040000.0,26.5
4,5,tr1,1,5,Davinson Sánchez,28,Galatasaray,Süper Lig,Centre-Back,CB,...,28,26.7,262050000.0,Türkiye,16,9.360000e+06,19.0,563,2040000.0,26.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1580,1581,tr2,57,1581,Mehmet Emin Taştan,20,Yeni Malatyaspor,1.Lig,Right Winger,RW,...,27,21.8,950000.0,Türkiye,0,3.518519e+04,20.0,563,255000.0,26.6
1581,1582,tr2,57,1582,Cengizhan Akgün,26,Yeni Malatyaspor,1.Lig,Right Winger,RW,...,27,21.8,950000.0,Türkiye,0,3.518519e+04,20.0,563,255000.0,26.6
1582,1583,tr2,57,1583,Enes Savucu,20,Yeni Malatyaspor,1.Lig,Centre-Forward,CF,...,27,21.8,950000.0,Türkiye,0,3.518519e+04,20.0,563,255000.0,26.6
1583,1584,tr2,57,1584,Halil Atcı,18,Yeni Malatyaspor,1.Lig,Centre-Forward,CF,...,27,21.8,950000.0,Türkiye,0,3.518519e+04,20.0,563,255000.0,26.6


### Position Mapping Dictionary

In [50]:
position_mapping = {
    'Goalkeeper': 'GK',
    'Centre-Back': 'CB',
    'Left-Back': 'LB',
    'Right-Back': 'RB',
    'Defensive Midfield': 'DM',
    'Central Midfield': 'CM',
    'Left Winger': 'LW',
    'Right Winger': 'RW',
    'Second Striker': 'SS',
    'Centre-Forward': 'CF',
    'Attacking Midfield': 'AM',
    'Left Midfield': 'LM',
    'Right Midfield': 'RM',
    'Midfielder': 'MID',
    'Defender': 'DEF',
    'Striker': 'ST'
}

### Function to Shorten Position Names

In [51]:
def shorten_position(position):
    return position_mapping.get(position, position)

### Handle Multiple Positions Separated by Commas

In [52]:
df['position_acronym'] = df['position'].apply(lambda x: [shorten_position(pos.strip()) for pos in x.split(',')])

### Explode the DataFrame to Handle Multiple Positions per Player

In [53]:
df = df.explode('position_acronym')

### Calculate Position Statistics Per Club

In [54]:
position_stats = df.groupby(['club', 'position_acronym']).agg(
    player_count=('position_acronym', 'count'),
    total_market_value=('market_value', 'sum'),
    average_market_value=('market_value', 'mean'),
    total_age=('age', 'sum')
).reset_index()


### Define Ideal Squad Distribution Per Position

In [None]:
ideal_position_distribution = {
    'GK': 2,
    'CB': 4,
    'LB': 2,
    'RB': 2,
    'DM': 2,
    'CM': 4,
    'AM': 2,
    'LW': 2,
    'RW': 2,
    'OFF': 3  # Combined offensive group: CF, SS, ST
}

### Function to Find Top 3 Critical Missing Positions Based on Squad Statistics

In [None]:
def find_critical_positions(club_df):
    club_name = club_df['club'].iloc[0]
    missing = {}

    # Calculate combined offensive players count (CF, SS, ST)
    offensive_positions = ['CF', 'SS', 'ST']
    offensive_data = club_df[club_df['position_acronym'].isin(offensive_positions)]
    offensive_count = offensive_data['player_count'].sum() if not offensive_data.empty else 0
    total_offensive_age = offensive_data['total_age'].sum() if not offensive_data.empty else 0
    average_offensive_market_value = offensive_data['average_market_value'].mean() if not offensive_data.empty else 0

    # Evaluate offensive group as a whole
    if offensive_count < ideal_position_distribution['OFF']:
        age_factor = (total_offensive_age / offensive_count) if offensive_count > 0 else 100
        need_score = (ideal_position_distribution['OFF'] - offensive_count) * 1.5
        need_score += (age_factor / 30)
        need_score += (1 / (average_offensive_market_value + 1))
        missing['OFF'] = need_score

    # Evaluate other positions individually
    for position, ideal_count in ideal_position_distribution.items():
        if position == 'OFF':
            continue  # Skip offensive group here since it's already handled

        pos_data = club_df[club_df['position_acronym'] == position]

        current_count = pos_data['player_count'].sum() if not pos_data.empty else 0
        total_age = pos_data['total_age'].sum() if not pos_data.empty else 0
        average_market_value = pos_data['average_market_value'].mean() if not pos_data.empty else 0

        if current_count < ideal_count:
            age_factor = (total_age / current_count) if current_count > 0 else 100
            need_score = (ideal_count - current_count) * 1.5
            need_score += (age_factor / 30)
            need_score += (1 / (average_market_value + 1))
            missing[position] = need_score

    # Return the top 3 most critical missing positions
    top_3_missing = dict(sorted(missing.items(), key=lambda item: item[1], reverse=True)[:3])
    return pd.Series({'club': club_name, 'top_missing_positions': top_3_missing})

### Apply Function to Find Critical Positions for Each Club

In [None]:
critical_positions_df = position_stats.groupby('club').apply(find_critical_positions).reset_index(drop=True)

### Display Top 3 Critical Missing Positions for Each Club

In [None]:
for _, row in critical_positions_df.iterrows():
    club = row['club']
    top_positions = row['top_missing_positions']
    print(f"Top 3 critical missing positions for {club}:")
    for position, score in top_positions.items():
        position_label = "Offensive Group (CF, SS, ST)" if position == 'OFF' else position
        print(f" - {position_label}: Priority Score = {round(score, 2)}")
    print("-" * 40)

Top 3 critical missing positions for Adana Demirspor:
 - LB: Priority Score = 7.33
 - ST: Priority Score = 7.33
 - SS: Priority Score = 5.83
----------------------------------------
Top 3 critical missing positions for Adanaspor:
 - ST: Priority Score = 7.33
 - SS: Priority Score = 5.83
 - LB: Priority Score = 2.33
----------------------------------------
Top 3 critical missing positions for Alanyaspor:
 - ST: Priority Score = 7.33
 - SS: Priority Score = 5.83
 - DM: Priority Score = 2.5
----------------------------------------
Top 3 critical missing positions for Amed SK:
 - ST: Priority Score = 7.33
 - SS: Priority Score = 5.83
 - CM: Priority Score = 2.44
----------------------------------------
Top 3 critical missing positions for Ankara Keçiörengücü:
 - ST: Priority Score = 7.33
 - SS: Priority Score = 5.83
 - RW: Priority Score = 2.43
----------------------------------------
Top 3 critical missing positions for Ankaragücü:
 - ST: Priority Score = 7.33
 - SS: Priority Score = 5.83

### Define Key Features for Each Position

In [59]:
position_features = {
    'GK': ['goalkeeping', 'reflexes', 'handling', 'communication'],
    'CB': ['tackling', 'marking', 'heading', 'strength'],
    'LB': ['crossing', 'pace', 'tackling', 'positioning'],
    'RB': ['crossing', 'pace', 'tackling', 'positioning'],
    'DM': ['tackling', 'positioning', 'passing', 'stamina'],
    'CM': ['passing', 'vision', 'technique', 'stamina'],
    'AM': ['dribbling', 'vision', 'passing', 'flair'],
    'LW': ['dribbling', 'pace', 'crossing', 'finishing'],
    'RW': ['dribbling', 'pace', 'crossing', 'finishing'],
    'SS': ['finishing', 'off-the-ball', 'dribbling', 'technique'],
    'CF': ['finishing', 'heading', 'pace', 'off-the-ball'],
    'ST': ['finishing', 'heading', 'pace', 'off-the-ball']
}

### Build Player Recommendation System

In [60]:
def recommend_players(missing_positions, last_df):
    recommendations = {}
    for team, positions in missing_positions.items():
        team_recommendations = {}
        for position, count_needed in positions.items():
            if count_needed > 0:
                features = position_features.get(position, [])
                last_df['score'] = last_df[features].mean(axis=1)
                available_players = last_df[
                    (last_df['position_acronym'] == position) &
                    (last_df['team_name'] != team)
                ].sort_values(by='score', ascending=False)
                top_players = available_players.head(count_needed)[['footballer_name_player', 'score']]
                team_recommendations[position] = top_players
        recommendations[team] = team_recommendations
    return recommendations

# Generate Player Recommendations
recommendations = recommend_players(missing_positions, df)

NameError: name 'missing_positions' is not defined

### Display Recommendations for a Specific Team


In [None]:
sample_team = list(recommendations.keys())[0]  # Example: First team in the list
print(f"Recommendations for {sample_team}:")
for position, players in recommendations[sample_team].items():
    print(f"\nPosition: {position}")
    print(players)

Recommendations for Adana Demirspor:

Position: LB
   footballer_name_player  score
37           Filip Kostić  68.75
36     Jayden Oosterwolde  68.75

Position: RB
   footballer_name_player  score
98         Pedro Malheiro   67.5

Position: DM
   footballer_name_player  score
69         Ali Al Musrati   75.0

Position: CM
    footballer_name_player  score
44                    Fred  76.25
213          Jonjo Shelvey  75.00

Position: RW
    footballer_name_player  score
111             Edin Visca   70.0

Position: SS
   footballer_name_player  score
83                   Rafa   75.0

Position: ST
     footballer_name_player  score
1529              Ali Kılıç   42.5
