# Système de Recommandation des Joueurs


Ce notebook présente un système de recommandation des joueurs basé sur les recherches récentes des clubs.
Le système recommande des joueurs aux clubs en fonction de la similarité de leur position principale et de leur style de jeu.
Les étapes suivantes sont couvertes :
1. Chargement des données
2. Calcul de la similarité entre les joueurs et les clubs
3. Filtrage des joueurs en fonction des besoins des clubs
4. Génération des recommandations pour chaque club


## 1. Chargement des données

In [14]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import os

# Fonction pour charger les données
def load_data(player_path=None, club_path=None, search_path=None):
    player_profiles_path = player_path or '/content/cleaned_player_profiles.csv'
    club_profiles_path = club_path or '/content/cleaned_club_profiles.csv'
    club_search_history_path = search_path or '/content/club_search_history_with_features.csv'

    try:
        player_profiles = pd.read_csv(player_profiles_path)
        club_profiles = pd.read_csv(club_profiles_path)
        club_search_history = pd.read_csv(club_search_history_path)
    except FileNotFoundError as e:
        raise FileNotFoundError(f"Error loading data: {e}")

    return player_profiles, club_profiles, club_search_history


## 2. Prétraitement des données

In [15]:
# Fonction de conversion de vecteur de style de jeu en liste
def convert_to_array(vector_str):
    try:
        cleaned = vector_str.strip('[]').replace(',', ' ')
        return np.array([int(x) for x in cleaned.split() if x.strip().isdigit()])
    except Exception as e:
        print(f"Conversion error for vector: {vector_str} -> {e}")
        return np.array([])

# Appliquer cette fonction aux colonnes de vecteurs de style de jeu
def preprocess_data(player_profiles, club_profiles):
    player_profiles['play_style_vector'] = player_profiles['play_style_vector'].apply(convert_to_array)
    club_profiles['style_of_play'] = club_profiles['style_of_play'].apply(convert_to_array)
    return player_profiles, club_profiles


## 3. Calcul de la similarité

In [16]:

from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Calcul de la similarité entre un joueur et un club
def calculate_similarity(player, club, position_weight=0.5, style_weight=0.5):
    # Sécurité : garantir que les vecteurs ne sont pas vides
    if len(player['play_style_vector']) == 0 or len(club['style_of_play']) == 0:
        style_similarity = 0
    else:
        style_similarity = cosine_similarity(
            [player['play_style_vector']],
            [club['style_of_play']]
        )[0][0]

    # S'assurer que 'needs_position' est bien une liste
    needs_position = club['needs_position']
    if isinstance(needs_position, str):
        needs_position = needs_position.strip("[]").replace("'", "").split(',')

    needs_position = [pos.strip().lower() for pos in needs_position]
    player_position = player['position_main'].strip().lower()

    position_similarity = 1 if player_position in needs_position else 0

    # Moyenne pondérée
    return position_weight * position_similarity + style_weight * style_similarity


## 4. Filtrage des joueurs

In [17]:
# Filtrer les joueurs en fonction de la position et du style de jeu
def filter_players_by_profile(club, player_profiles):
    # Nettoyer le champ 'needs_position' s'il est sous forme de chaîne
    needs_position = club['needs_position']
    if isinstance(needs_position, str):
        needs_position = needs_position.strip("[]").replace("'", "").split(',')

    needs_position = [pos.strip().lower() for pos in needs_position]

    # Filtrage par position
    filtered_players = player_profiles[player_profiles['position_main'].str.lower().isin(needs_position)].copy()

    # Calcul de la similarité de style avec gestion des erreurs
    def safe_similarity(player_vector):
        if len(player_vector) == 0 or len(club['style_of_play']) == 0:
            return 0
        return cosine_similarity([player_vector], [club['style_of_play']])[0][0]

    filtered_players['style_similarity'] = filtered_players['play_style_vector'].apply(safe_similarity)

    # Trier et sélectionner les 50 meilleurs
    top_players = filtered_players.sort_values(by='style_similarity', ascending=False).head(50)

    return top_players


## 5. Génération des recommandations

In [19]:
# Fonction pour recommander des joueurs à un club en fonction de ses recherches
def recommend_players_to_club_filtered(club_id, club_search_history, player_profiles, club_profiles):
    club_searches = club_search_history[club_search_history['club_id'] == club_id]

    if club_profiles[club_profiles['club_id'] == club_id].empty:
        print(f"Club ID {club_id} not found.")
        return pd.DataFrame()

    club = club_profiles[club_profiles['club_id'] == club_id].iloc[0]

    recommendations = []
    recommended_player_ids = set()

    for _, search in club_searches.iterrows():
        player_id = search['player_id']
        player_match = player_profiles[player_profiles['player_id'] == player_id]
        if player_match.empty:
            continue
        player = player_match.iloc[0]

        filtered_players = filter_players_by_profile(club, player_profiles)

        for _, other_player in filtered_players.iterrows():
            pid = other_player['player_id']
            if pid != player_id and pid not in recommended_player_ids:
                similarity = calculate_similarity(other_player, club)
                recommendations.append({
                    'club_id': club_id,
                    'club_name': club.get('club_name', 'N/A'),
                    'player_id': pid,
                    'player_name': other_player.get('name', 'N/A'),
                    'similarity': similarity
                })
                recommended_player_ids.add(pid)

    recommendations_df = pd.DataFrame(recommendations)
    recommendations_df = recommendations_df.sort_values(by='similarity', ascending=False)

    return recommendations_df.head(10)


# Exemple d'utilisation
def main():
    player_profiles, club_profiles, club_search_history = load_data()
    player_profiles, club_profiles = preprocess_data(player_profiles, club_profiles)

    if not club_profiles.empty:
        club_id_example = club_profiles.iloc[0]['club_id']
        recommended_players_filtered = recommend_players_to_club_filtered(
            club_id_example, club_search_history, player_profiles, club_profiles
        )
        print(recommended_players_filtered)
    else:
        print("No clubs loaded.")

# Lancer l'exemple
main()



   club_id    club_name  player_id              player_name  similarity
0     2001  Real Madrid      15274           Darren Oldaker    0.887298
1     2001  Real Madrid       5902             Nahuel Leiva    0.887298
2     2001  Real Madrid        970           Nabil Bentaleb    0.887298
3     2001  Real Madrid       1525            Adnan Januzaj    0.887298
4     2001  Real Madrid      10108            Michael Duffy    0.816228
5     2001  Real Madrid       1888            Morgan Rogers    0.816228
6     2001  Real Madrid       2318           Mohamed Daramy    0.816228
7     2001  Real Madrid       9009  Gustav Mendonca Wikheim    0.816228
8     2001  Real Madrid       5037           Juninho Bacuna    0.816228
9     2001  Real Madrid        222          Mattia Zaccagni    0.816228
