In [46]:
import pandas as pd
import numpy as np
import math
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Veri Yükleme
df = pd.read_csv(r"C:\Users\mbaki\Desktop\Proje\data\processed\sorted_processed_23_24.csv")

# Formasyonları parse etme
def parse_formation(formation):
    try:
        return list(map(int, formation.split('-')))
    except AttributeError:
        return [np.nan, np.nan, np.nan, np.nan]

home_formations = df['Home Formation'].apply(parse_formation)
df['Home_Defenders'] = home_formations.apply(lambda x: x[0] if len(x) > 0 else np.nan)
df['Home_Midfielders'] = home_formations.apply(lambda x: sum(x[1:-1]) if len(x) > 2 else np.nan)
df['Home_Forwards'] = home_formations.apply(lambda x: x[-1] if len(x) > 0 else np.nan)

away_formations = df['Away Formation'].apply(parse_formation)
df['Away_Defenders'] = away_formations.apply(lambda x: x[0] if len(x) > 0 else np.nan)
df['Away_Midfielders'] = away_formations.apply(lambda x: sum(x[1:-1]) if len(x) > 2 else np.nan)
df['Away_Forwards'] = away_formations.apply(lambda x: x[-1] if len(x) > 0 else np.nan)

# Maçları sıralama (1. maç, 2. maç şeklinde)
df['Match_Number'] = np.arange(1, len(df) + 1)

# "Season", "Week", "Match Date" sütunlarını kaldırıp yerine "Match_Number"ı en başa koyma
df = df.drop(columns=['Season', 'Week', 'Match Date'], errors='ignore')
df = df[['Match_Number'] + [col for col in df.columns if col != 'Match_Number']]

# Oyuncu sütunları
home_player_columns = [f'Home Player {i}' for i in range(1, 12)]
away_player_columns = [f'Away Player {i}' for i in range(1, 12)]

home_age_cols = [f'Home Player {i} Age' for i in range(1, 12)]
away_age_cols = [f'Away Player {i} Age' for i in range(1, 12)]

home_value_cols = [f'Home Player {i} Market Value (M€)' for i in range(1, 12)]
away_value_cols = [f'Away Player {i} Market Value (M€)' for i in range(1, 12)]

home_rating_cols = [f'Home Player {i} Rating' for i in range(1, 12)]
away_rating_cols = [f'Away Player {i} Rating' for i in range(1, 12)]

# Oyuncu istatistikleri toplama
player_stats = {}

def update_player_stats(player_name, age, value, rating):
    if player_name not in player_stats:
        player_stats[player_name] = {
            'rating_sum': 0.0,
            'value_sum': 0.0,
            'age_sum': 0.0,
            'count': 0
        }
    if pd.notnull(rating):
        player_stats[player_name]['rating_sum'] += rating
    if pd.notnull(value):
        player_stats[player_name]['value_sum'] += value
    if pd.notnull(age):
        player_stats[player_name]['age_sum'] += age
    player_stats[player_name]['count'] += 1

for idx, row in df.iterrows():
    for i in range(11):
        h_player = row[home_player_columns[i]]
        a_player = row[away_player_columns[i]]

        h_age = row[home_age_cols[i]] if home_age_cols[i] in df.columns else np.nan
        a_age = row[away_age_cols[i]] if away_age_cols[i] in df.columns else np.nan

        h_value = row[home_value_cols[i]] if home_value_cols[i] in df.columns else np.nan
        a_value = row[away_value_cols[i]] if away_value_cols[i] in df.columns else np.nan

        h_rating = row[home_rating_cols[i]] if home_rating_cols[i] in df.columns else np.nan
        a_rating = row[away_rating_cols[i]] if away_rating_cols[i] in df.columns else np.nan

        update_player_stats(h_player, h_age, h_value, h_rating)
        update_player_stats(a_player, a_age, a_value, a_rating)

# Oyuncu skorlarını hesaplama
player_scores = {}
for player, stats in player_stats.items():
    c = stats['count']
    avg_rating = stats['rating_sum'] / c if c > 0 else 0
    avg_value = stats['value_sum'] / c if c > 0 else 0
    avg_age = stats['age_sum'] / c if c > 0 else 0
    # Örnek bir formül (dilediğiniz gibi özelleştirebilirsiniz)
    player_score = avg_rating + math.log(avg_value + 1) - (abs(avg_age - 27) / 10.0)
    player_scores[player] = player_score

# Oyuncu isimlerini skorlarla değiştir
for i in range(11):
    h_col = home_player_columns[i]
    a_col = away_player_columns[i]
    df[h_col] = df[h_col].map(player_scores)
    df[a_col] = df[a_col].map(player_scores)

# Kullanılmayan sütunları (yaş, değer, rating) sil
cols_to_drop = home_age_cols + away_age_cols + home_value_cols + away_value_cols + home_rating_cols + away_rating_cols
df = df.drop(columns=cols_to_drop, errors='ignore')

# Takım oyuncularının toplam skoru
df['Home_Players_Total_Score'] = df[home_player_columns].sum(axis=1)
df['Away_Players_Total_Score'] = df[away_player_columns].sum(axis=1)

# Skor farkı
df['Player_Score_Diff'] = df['Home_Players_Total_Score'] - df['Away_Players_Total_Score']

# Performans farkı
df['Performance_Diff'] = df['Home Performance'] - df['Away Performance']

# Maç sonucu
def get_match_result(row):
    if row['Home Goals'] > row['Away Goals']:
        return 1
    elif row['Home Goals'] == row['Away Goals']:
        return 0
    else:
        return -1

df['Match_Result'] = df.apply(get_match_result, axis=1)

# Gol farkı
df['Goal_Difference'] = df['Home Goals'] - df['Away Goals']

# Kategorik sütunları encode
kategorik_sutunlar = ['Home Team', 'Away Team']
le = LabelEncoder()
for sutun in kategorik_sutunlar:
    df[sutun] = le.fit_transform(df[sutun].astype(str))

# Sayısal sütunları scale
# İhtiyaç duyulan sayısal sütunları belirliyoruz
sayisal_sutunlar = [
    'Home Goals', 'Away Goals', 'Home Performance', 'Away Performance',
    'Performance_Diff', 'Home_Defenders', 'Home_Midfielders', 'Home_Forwards',
    'Away_Defenders', 'Away_Midfielders', 'Away_Forwards', 'Home_Players_Total_Score',
    'Away_Players_Total_Score', 'Player_Score_Diff', 'Goal_Difference'
] + home_player_columns + away_player_columns

scaler = StandardScaler()
df[sayisal_sutunlar] = scaler.fit_transform(df[sayisal_sutunlar])

# Hazırlanan veri setini kaydetme
df.to_csv(r"C:\Users\mbaki\Desktop\Proje\data\processed\final_prepared_data.csv", index=False, encoding='utf-8-sig')
print("Veri başarıyla işlenmiş ve kaydedilmiştir.")


Veri başarıyla işlenmiş ve kaydedilmiştir.


In [47]:
print(df.head())

   Match_Number  Home Team  Away Team  Home Goals  Away Goals  \
0             1         20          3   -0.448701   -1.036494   
1             2         16         10   -0.448701    3.233409   
2             3         14          8   -1.221691   -1.036494   
3             4         15         11   -0.448701   -0.182513   
4             5         13          2    1.097278    0.671468   

   Home Performance  Away Performance Home Formation Away Formation  \
0          0.379067          0.028207        4-1-4-1        4-2-3-1   
1         -2.044110          0.457539        4-1-3-2        4-2-3-1   
2         -0.226727          0.758072        4-4-1-1        4-2-3-1   
3          0.425667         -1.173925        4-2-3-1        4-2-3-1   
4          1.404258         -0.658725        4-2-3-1          3-4-3   

   Home Player 1  ...  Home_Forwards  Away_Defenders  Away_Midfielders  \
0       1.169420  ...      -0.420841       -0.042261          0.363638   
1       0.217429  ...       1.2234

In [48]:
df

Unnamed: 0,Match_Number,Home Team,Away Team,Home Goals,Away Goals,Home Performance,Away Performance,Home Formation,Away Formation,Home Player 1,...,Home_Forwards,Away_Defenders,Away_Midfielders,Away_Forwards,Home_Players_Total_Score,Away_Players_Total_Score,Player_Score_Diff,Performance_Diff,Match_Result,Goal_Difference
0,1,20,3,-0.448701,-1.036494,0.379067,0.028207,4-1-4-1,4-2-3-1,1.169420,...,-0.420841,-0.042261,0.363638,-0.377860,1.219787,-1.580371,1.959775,0.184696,1,0.354109
1,2,16,10,-0.448701,3.233409,-2.044110,0.457539,4-1-3-2,4-2-3-1,0.217429,...,1.223476,-0.042261,0.363638,-0.377860,0.662880,-0.854704,1.062069,-1.346703,-1,-2.441874
2,3,14,8,-1.221691,-1.036494,-0.226727,0.758072,4-4-1-1,4-2-3-1,0.692704,...,-0.420841,-0.042261,0.363638,-0.377860,-1.864115,1.249595,-2.162775,-0.556304,0,-0.205088
3,4,15,11,-0.448701,-0.182513,0.425667,-1.173925,4-2-3-1,4-2-3-1,0.516972,...,-0.420841,-0.042261,0.363638,-0.377860,0.723391,-0.086743,0.554995,0.900996,0,-0.205088
4,5,13,2,1.097278,0.671468,1.404258,-0.658725,4-2-3-1,3-4-3,-1.518856,...,-0.420841,-3.245624,-1.220487,3.115042,-0.131336,0.889951,-0.724937,1.123295,1,0.354109
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
374,375,10,16,0.324289,-1.036494,0.938262,-0.916325,4-1-4-1,4-2-3-1,-1.494490,...,-0.420841,-0.042261,0.363638,-0.377860,-1.037618,-0.274932,-0.510937,1.024495,1,0.913305
375,376,7,11,3.416248,-1.036494,2.802245,-2.161390,4-4-2,5-4-1,0.560905,...,1.223476,3.161103,-1.220487,-0.377860,1.461820,-0.618994,1.438328,2.728794,1,3.150092
376,377,0,4,0.324289,4.087390,-2.743104,1.487938,5-4-1,4-1-4-1,-2.040729,...,-0.420841,-0.042261,0.363638,-0.377860,-0.319656,-0.868142,0.401970,-2.310003,-1,-2.441874
377,378,20,2,1.870268,0.671468,0.658665,-1.045125,4-2-3-1,4-2-3-1,-1.525127,...,-0.420841,-0.042261,0.363638,-0.377860,0.767370,0.901476,-0.120611,0.950395,1,0.913305
