In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style="whitegrid")
plt.rcParams["figure.figsize"] = (12, 6)

# CSV Dosyalarını Yükleme
csv1 = pd.read_csv(r"C:\Users\mbaki\Desktop\Proje\data\raw\23_24\23_24.csv")
csv2 = pd.read_csv(r"C:\Users\mbaki\Desktop\Proje\data\raw\23_24\23_24_teams_and_players.csv")
csv3 = pd.read_csv(r"C:\Users\mbaki\Desktop\Proje\data\raw\23_24\oyuncu_verileri.csv")

# Fonksiyon: Oyuncu Soyadını Çıkartma
def extract_surname(full_name):
    name = full_name.replace('(c)', '').strip()
    parts = name.split(' ')
    return parts[-1] if parts else name

# Fonksiyon: Takım İsimlerini Düzenleme
def modify_team_name(team_name):
    team_name_lower = team_name.lower()
    if team_name_lower == 'karagümrük':
        return 'fati'
    elif team_name_lower == 'mke ankaragücü':
        return 'anka'
    elif team_name_lower == 'çaykur rizespor':
        return 'rize'
    else:
        return team_name_lower[:4]

# Fonksiyon: Market Value'yi Temizleme
def clean_market_value(value):
    if pd.isna(value):
        return np.nan
    value = value.lower().replace('.', '').replace(',', '.').replace(' ', '').replace('€', '')
    if 'mil' in value:
        try:
            num = float(value.replace('mil', ''))
            return num
        except ValueError:
            return np.nan
    elif 'bin' in value:
        try:
            num = float(value.replace('bin', ''))
            return num / 1000
        except ValueError:
            return np.nan
    else:
        try:
            return float(value) / 1_000_000
        except ValueError:
            return np.nan

# Oyuncu İsimlerini Listeye Bölme
csv1['Home Players'] = csv1['Home Players'].str.split('; ')
csv1['Away Players'] = csv1['Away Players'].str.split('; ')

# Home Players ve Away Players Soyadını Çıkarma
csv1['Home Players'] = csv1['Home Players'].apply(lambda players: [extract_surname(player) for player in players])
csv1['Away Players'] = csv1['Away Players'].apply(lambda players: [extract_surname(player) for player in players])

MAX_OYUNCU = 11

# Oyuncu Listelerini Ayrı Sütunlara Bölme
def split_players(player_list, prefix):
    df = player_list.apply(lambda players: pd.Series(players[:MAX_OYUNCU]))
    df = df.rename(columns=lambda x: f'{prefix} Player {x + 1}')
    return df

home_players_df = split_players(csv1['Home Players'], 'Home')
away_players_df = split_players(csv1['Away Players'], 'Away')

# Orijinal Oyuncu Sütunlarını Kaldır ve Yeni Sütunları Ekleyin
csv1 = csv1.drop(['Home Players', 'Away Players'], axis=1)
csv1 = pd.concat([csv1, home_players_df, away_players_df], axis=1)

# Home Team ve Away Team Sütunlarını Düzenleme
csv1['Home Team'] = csv1['Home Team'].apply(modify_team_name).str.strip()
csv1['Away Team'] = csv1['Away Team'].apply(modify_team_name).str.strip()

# csv2 Veri Setini Düzenleme
csv2['Player Name'] = csv2['Player Name'].apply(extract_surname).str.lower().str.strip()
csv2['Team Name'] = csv2['Team Name'].apply(modify_team_name).str.strip()
player_rating_dict = csv2.set_index('Player Name')['Player Rating'].to_dict()

def get_player_rating(player):
    key = player.lower().strip()
    return player_rating_dict.get(key, np.nan)

home_player_cols = [f'Home Player {i}' for i in range(1, MAX_OYUNCU + 1)]
away_player_cols = [f'Away Player {i}' for i in range(1, MAX_OYUNCU + 1)]
all_player_cols = home_player_cols + away_player_cols

for player_col in all_player_cols:
    rating_col = f"{player_col} Rating"
    csv1[rating_col] = csv1[player_col].apply(get_player_rating)

csv3['Player Surname'] = csv3['Player Name'].apply(extract_surname).str.lower().str.strip()
csv3['Market Value (M€)'] = csv3['Market Value'].apply(clean_market_value)
csv3['Team Name'] = csv3['Team Name'].apply(modify_team_name).str.strip()

player_age_avg = csv3.groupby('Player Surname')['Age'].mean()
player_market_value_avg = csv3.groupby('Player Surname')['Market Value (M€)'].mean()

team_avg_age = csv3.groupby('Team Name')['Age'].mean().to_dict()
team_avg_market_value = csv3.groupby('Team Name')['Market Value (M€)'].mean().to_dict()

def get_player_age(player):
    key = player.lower().strip()
    return player_age_avg.get(key, np.nan)

def get_player_market_value(player):
    key = player.lower().strip()
    return player_market_value_avg.get(key, np.nan)

for player_col in all_player_cols:
    age_col = f"{player_col} Age"
    market_value_col = f"{player_col} Market Value (M€)"
    csv1[age_col] = csv1[player_col].apply(get_player_age)
    csv1[market_value_col] = csv1[player_col].apply(get_player_market_value)

overall_avg_age = csv3['Age'].mean()
overall_avg_market_value = csv3['Market Value (M€)'].mean()

for player_col in all_player_cols:
    age_col = f"{player_col} Age"
    market_value_col = f"{player_col} Market Value (M€)"
    csv1[age_col].fillna(overall_avg_age, inplace=True)
    csv1[market_value_col].fillna(overall_avg_market_value, inplace=True)

# "Match Date"e Göre Sıralama ve Kaydetme
csv1['Match Date'] = pd.to_datetime(csv1['Match Date'], format='%d/%m/%y', errors='coerce')
csv1 = csv1.sort_values(by='Match Date')

# İşlenmiş ve sıralanmış veriyi kaydetme
csv1.to_csv(r"C:\Users\mbaki\Desktop\Proje\data\processed\sorted_processed_23_24.csv", index=False)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  csv1[age_col].fillna(overall_avg_age, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  csv1[market_value_col].fillna(overall_avg_market_value, inplace=True)
