# Player Recommendation System Model

## Position Recommendation

### Import Required Libraries

In [1]:
import numpy as np
import pandas as pd  
import matplotlib.pyplot as plt 
import seaborn as sns 
import plotly as px
from sklearn.tree import DecisionTreeClassifier

### Read CSV

In [2]:
df = pd.read_csv("../data/processed_data/cleaned_player_recommendation_dataset_final.csv")
df

Unnamed: 0,player_id,league_id,team_id,footballer_id,footballer_name_footballer,age,club,league_name,position,position_acronym,...,num_players,avg_age,total_squad_value,country,num_legionnaires,avg_marketing_val,num_teams,players,avg_marketing_val_league,avg_age_league
0,1,tr1,1,1,Fernando Muslera,38,Galatasaray,Süper Lig,Goalkeeper,GK,...,28,26.7,262050000.0,Türkiye,16,9.360000e+06,19.0,563,2040000.0,26.5
1,2,tr1,1,2,Günay Güvenç,33,Galatasaray,Süper Lig,Goalkeeper,GK,...,28,26.7,262050000.0,Türkiye,16,9.360000e+06,19.0,563,2040000.0,26.5
2,3,tr1,1,3,Batuhan Şen,25,Galatasaray,Süper Lig,Goalkeeper,GK,...,28,26.7,262050000.0,Türkiye,16,9.360000e+06,19.0,563,2040000.0,26.5
3,4,tr1,1,4,Atakan Ordu,19,Galatasaray,Süper Lig,Goalkeeper,GK,...,28,26.7,262050000.0,Türkiye,16,9.360000e+06,19.0,563,2040000.0,26.5
4,5,tr1,1,5,Davinson Sánchez,28,Galatasaray,Süper Lig,Centre-Back,CB,...,28,26.7,262050000.0,Türkiye,16,9.360000e+06,19.0,563,2040000.0,26.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1580,1581,tr2,57,1581,Mehmet Emin Taştan,20,Yeni Malatyaspor,1.Lig,Right Winger,RW,...,27,21.8,950000.0,Türkiye,0,3.518519e+04,20.0,563,255000.0,26.6
1581,1582,tr2,57,1582,Cengizhan Akgün,26,Yeni Malatyaspor,1.Lig,Right Winger,RW,...,27,21.8,950000.0,Türkiye,0,3.518519e+04,20.0,563,255000.0,26.6
1582,1583,tr2,57,1583,Enes Savucu,20,Yeni Malatyaspor,1.Lig,Centre-Forward,CF,...,27,21.8,950000.0,Türkiye,0,3.518519e+04,20.0,563,255000.0,26.6
1583,1584,tr2,57,1584,Halil Atcı,18,Yeni Malatyaspor,1.Lig,Centre-Forward,CF,...,27,21.8,950000.0,Türkiye,0,3.518519e+04,20.0,563,255000.0,26.6


### Position Mapping Dictionary

In [3]:
position_mapping = {
    'Goalkeeper': 'GK',
    'Centre-Back': 'CB',
    'Left-Back': 'LB',
    'Right-Back': 'RB',
    'Defensive Midfield': 'DM',
    'Central Midfield': 'CM',
    'Left Winger': 'LW',
    'Right Winger': 'RW',
    'Second Striker': 'SS',
    'Centre-Forward': 'CF',
    'Attacking Midfield': 'AM',
    'Left Midfield': 'LM',
    'Right Midfield': 'RM',
    'Midfielder': 'MID',
    'Defender': 'DEF',
    'Striker': 'ST'
}

### Function to Shorten Position Names

In [4]:
def shorten_position(position):
    return position_mapping.get(position, position)

### Handle Multiple Positions Separated by Commas

In [5]:
df['position_acronym'] = df['position'].apply(lambda x: [shorten_position(pos.strip()) for pos in x.split(',')])

### Explode the DataFrame to Handle Multiple Positions per Player

In [6]:
df = df.explode('position_acronym')

### Calculate Position Statistics Per Club

In [7]:
position_stats = df.groupby(['club', 'position_acronym']).agg(
    player_count=('position_acronym', 'count'),
    total_market_value=('market_value', 'sum'),
    average_market_value=('market_value', 'mean'),
    total_age=('age', 'sum')
).reset_index()


### Define Ideal Squad Distribution Per Position

In [8]:
ideal_position_distribution = {
    'GK': 2,
    'CB': 4,
    'LB': 2,
    'RB': 2,
    'DM': 2,
    'CM': 4,
    'AM': 2,
    'LW': 2,
    'RW': 2,
    'OFF': 3  # Combined offensive group: CF, SS, ST
}

### Function to Find Top 3 Critical Missing Positions Based on Squad Statistics

In [9]:
def find_critical_positions(club_df):
    club_name = club_df['club'].iloc[0]
    missing = {}

    # Calculate combined offensive players count (CF, SS, ST)
    offensive_positions = ['CF', 'SS', 'ST']
    offensive_data = club_df[club_df['position_acronym'].isin(offensive_positions)]
    offensive_count = offensive_data['player_count'].sum() if not offensive_data.empty else 0
    total_offensive_age = offensive_data['total_age'].sum() if not offensive_data.empty else 0
    average_offensive_market_value = offensive_data['average_market_value'].mean() if not offensive_data.empty else 0

    # Evaluate offensive group as a whole
    if offensive_count < ideal_position_distribution['OFF']:
        age_factor = (total_offensive_age / offensive_count) if offensive_count > 0 else 100
        need_score = (ideal_position_distribution['OFF'] - offensive_count) * 1.5
        need_score += (age_factor / 30)
        need_score += (1 / (average_offensive_market_value + 1))
        missing['OFF'] = need_score

    # Evaluate other positions individually
    for position, ideal_count in ideal_position_distribution.items():
        if position == 'OFF':
            continue  # Skip offensive group here since it's already handled

        pos_data = club_df[club_df['position_acronym'] == position]

        current_count = pos_data['player_count'].sum() if not pos_data.empty else 0
        total_age = pos_data['total_age'].sum() if not pos_data.empty else 0
        average_market_value = pos_data['average_market_value'].mean() if not pos_data.empty else 0

        if current_count < ideal_count:
            age_factor = (total_age / current_count) if current_count > 0 else 100
            need_score = (ideal_count - current_count) * 1.5
            need_score += (age_factor / 30)
            need_score += (1 / (average_market_value + 1))
            missing[position] = need_score

    # Return the top 3 most critical missing positions
    top_3_missing = dict(sorted(missing.items(), key=lambda item: item[1], reverse=True)[:3])
    return pd.Series({'club': club_name, 'top_missing_positions': top_3_missing})

### Apply Function to Find Critical Positions for Each Club

In [10]:
critical_positions_df = position_stats.groupby('club').apply(find_critical_positions).reset_index(drop=True)

### Display Top 3 Critical Missing Positions for Each Club

In [11]:
for _, row in critical_positions_df.iterrows():
    club = row['club']
    top_positions = row['top_missing_positions']
    print(f"Top 3 critical missing positions for {club}:")
    for position, score in top_positions.items():
        position_label = "Offensive Group (CF, SS, ST)" if position == 'OFF' else position
        print(f" - {position_label}: Priority Score = {round(score, 2)}")
    print("-" * 40)

Top 3 critical missing positions for Adana Demirspor:
 - LB: Priority Score = 7.33
 - CM: Priority Score = 3.88
 - RW: Priority Score = 2.37
----------------------------------------
Top 3 critical missing positions for Adanaspor:
 - LB: Priority Score = 2.33
 - LW: Priority Score = 2.33
 - RW: Priority Score = 2.3
----------------------------------------
Top 3 critical missing positions for Alanyaspor:
 - DM: Priority Score = 2.5
 - RB: Priority Score = 2.5
 - CM: Priority Score = 2.33
----------------------------------------
Top 3 critical missing positions for Amed SK:
 - CM: Priority Score = 2.44
 - Offensive Group (CF, SS, ST): Priority Score = 2.42
 - RW: Priority Score = 2.37
----------------------------------------
Top 3 critical missing positions for Ankara Keçiörengücü:
 - RW: Priority Score = 2.43
 - CM: Priority Score = 2.34
 - RB: Priority Score = 2.23
----------------------------------------
Top 3 critical missing positions for Ankaragücü:
 - Offensive Group (CF, SS, ST): 

## Player Recommendatation

### Feature Groups by Position

In [12]:
# Feature Groups by Position
position_features = {
    'GK': ['reflexes', 'handling', 'kicking', 'one-on-ones', 'throwing'],
    'CB': ['marking', 'tackling', 'positioning', 'jumping-reach', 'strength'],
    'LB': ['crossing', 'tackling', 'acceleration', 'pace', 'stamina'],
    'RB': ['crossing', 'tackling', 'acceleration', 'pace', 'stamina'],
    'DM': ['passing', 'positioning', 'tackling', 'work-rate', 'aggression'],
    'CM': ['passing', 'vision', 'dribbling', 'work-rate', 'teamwork'],
    'AM': ['passing', 'dribbling', 'flair', 'long-shots', 'vision'],
    'LW': ['dribbling', 'acceleration', 'flair', 'pace', 'crossing'],
    'RW': ['dribbling', 'acceleration', 'flair', 'pace', 'crossing'],
    'CF': ['finishing', 'off-the-ball', 'composure', 'heading', 'long-shots'],
    'SS': ['passing', 'finishing', 'off-the-ball', 'dribbling', 'vision'],
    'ST': ['finishing', 'composure', 'off-the-ball', 'pace', 'strength']
}


### Calculate Player Score

In [13]:
# Calculate Player Score
def calculate_player_score(player, position):
    features = position_features.get(position, [])
    feature_score = sum(player[feature] * 0.15 for feature in features if feature in player and not pd.isnull(player[feature]))
    age_score = 0.3 / (player['age'] + 1)
    rating_score = player['rating'] * 0.3
    potential_score = player['potential'] * 0.4
    total_score = feature_score + age_score + rating_score + potential_score
    return total_score

### Recommend Players

In [14]:
# Recommend Players
def recommend_players(club, position, club_avg_value):
    max_market_value = club_avg_value * 1.5
    club_avg_rating = df[df['club'] == club]['rating'].mean()
    
    player_pool = df[  # Filter players based on position, market value, and age
        (df['position_acronym'] == position) &
        (df['market_value'] <= max_market_value) &
        (df['age'] < 25) &  # Exclude players from the same club
        (df['club'] != club) &
        (df['rating'] <= (club_avg_rating + 15))  # Kulüp ortalamasının en fazla 15 fazlası olan oyuncuları filtreleme
    ].copy()

    player_pool['score'] = player_pool.apply(lambda x: calculate_player_score(x, position), axis=1)
    top_players = player_pool.sort_values(by='score', ascending=False).head(10)
    top_players['recommended_position'] = position
    top_players['recommended_club'] = club
    return top_players[['recommended_club', 'recommended_position', 'footballer_name_footballer', 'age', 'rating', 'potential', 'market_value', 'club', 'score']]


### Recommendation Loop with CSV Export

In [15]:
critical_positions_df['priority_scores'] = critical_positions_df['top_missing_positions'].apply(lambda x: {k: round(v, 2) for k, v in x.items()})

# Recommendation Loop with CSV Export - Priority Score ile
all_recommendations = []

for _, row in critical_positions_df.iterrows():
    club = row['club']
    top_positions = row['top_missing_positions']
    priority_scores = row['priority_scores']  # Priority score'ları al
    club_avg_value = position_stats[position_stats['club'] == club]['average_market_value'].mean()
    print(f"🏠 Club: {club} Player Recommendations:\n")

    if not top_positions:
        print("No missing position recommendations.")
        continue

    for position in top_positions.keys():
        print(f"🔑 Position: {position}")
        priority_score = priority_scores[position]  # Bu pozisyon için priority score
        recommendations = recommend_players(club, position, club_avg_value)
        
        if recommendations.empty:
            print("❌ No suitable player found.")
        else:
            # Her bir tavsiye için priority score ekle
            recommendations['priority_score'] = priority_score
            print(recommendations)
            all_recommendations.append(recommendations)
        print("-" * 40)

🏠 Club: Adana Demirspor Player Recommendations:

🔑 Position: LB
     recommended_club recommended_position footballer_name_footballer  age  \
1168  Adana Demirspor                   LB           Batuhan Adıgüzel   23   
1537  Adana Demirspor                   LB           Nafican Yardımcı   23   
97    Adana Demirspor                   LB                Arif Boşluk   21   
430   Adana Demirspor                   LB              Cemali Sertel   24   
1425  Adana Demirspor                   LB               Furkan Doğan   23   
488   Adana Demirspor                   LB                   Cenk Şen   24   
1063  Adana Demirspor                   LB                Emir Tintiş   20   
456   Adana Demirspor                   LB            Godfrey Stephen   24   
1507  Adana Demirspor                   LB            Engin Can Aksoy   21   
66    Adana Demirspor                   LB              Emrecan Terzi   20   

      rating  potential  market_value              club      score  \
1168   

### Save to CSV

In [16]:
# Save to CSV
if all_recommendations:
    result_df = pd.concat(all_recommendations)
    result_df.to_csv('../data/processed_data/recommendations.csv', index=False)
    print("✅ Player recommendations exported successfully.")
else:
    print("❌ No recommendations to export.")

✅ Player recommendations exported successfully.


### Recommendation Loop with Print Statements

In [17]:
# Recommendation Loop with Print Statements
for _, row in critical_positions_df.iterrows():
    club = row['club']
    top_positions = row['top_missing_positions']
    club_avg_value = position_stats[position_stats['club'] == club]['average_market_value'].mean()

    print(f"🏠 Club: {club} Player Recommendations:\n")

    for position in top_positions.keys():
        print(f"🔑 Position: {position}")
        recommendations = recommend_players(club, position, club_avg_value)
        
        if recommendations.empty:
            print("❌ No suitable player found.")
        else:
            print(recommendations)
        print("-" * 40)


🏠 Club: Adana Demirspor Player Recommendations:

🔑 Position: LB
     recommended_club recommended_position footballer_name_footballer  age  \
1168  Adana Demirspor                   LB           Batuhan Adıgüzel   23   
1537  Adana Demirspor                   LB           Nafican Yardımcı   23   
97    Adana Demirspor                   LB                Arif Boşluk   21   
430   Adana Demirspor                   LB              Cemali Sertel   24   
1425  Adana Demirspor                   LB               Furkan Doğan   23   
488   Adana Demirspor                   LB                   Cenk Şen   24   
1063  Adana Demirspor                   LB                Emir Tintiş   20   
456   Adana Demirspor                   LB            Godfrey Stephen   24   
1507  Adana Demirspor                   LB            Engin Can Aksoy   21   
66    Adana Demirspor                   LB              Emrecan Terzi   20   

      rating  potential  market_value              club      score  
1168    