# Imports

In [1]:
import pandas as pd
import numpy as np

In [2]:
players = pd.read_csv("../data/players_eda.csv")

# 1Ô∏è‚É£ Feature Erstellung

## Torqoute: Tore pro 90 Minuten

In [3]:
players["goals_per_90"] = np.where(players["total_minutes_played"] > 0,
                                   players["total_goals"] / (players["total_minutes_played"] / 90),
                                   0)

## Assistqoute: Assists pro 90 Minuten

In [4]:
players["assists_per_90"] = np.where(players["total_minutes_played"] > 0,
    players["total_assists"] / (players["total_minutes_played"] / 90),
    0)

## Kartenqoute: Karten pro Spiel

In [5]:
players["cards_per_game"] = np.where(players["total_games"] > 0,
    (players["total_yellow_cards"] + players["total_red_cards"]) / players["total_games"],
    0)

## Transferintensit√§t: Transfers pro Alter

In [6]:
players["transfer_intensity"] = np.where(players["age"] > 0,
    players["number_of_transfers"] / players["age"],
    0)

## Karriereintensit√§t: Transfers pro Einsatzzeit

In [7]:
players["transfer_intensity_per_10k_min"] = np.where(
    players["total_minutes_played"] >= 1000,
    players["number_of_transfers"] / (players["total_minutes_played"] / 10000.0),
    0)

In [8]:
players[["name", "position", "total_goals", "total_assists", "total_minutes_played",
    "total_games", "number_of_transfers", "age",
    "goals_per_90", "assists_per_90", "cards_per_game", "transfer_intensity", "transfer_intensity_per_10k_min"]].sort_values(by=["transfer_intensity_per_10k_min"] ,ascending=False).head(10)

Unnamed: 0,name,position,total_goals,total_assists,total_minutes_played,total_games,number_of_transfers,age,goals_per_90,assists_per_90,cards_per_game,transfer_intensity,transfer_intensity_per_10k_min
12577,Jordy Hiwula,Attack,1.0,0.0,1149.0,46,22,31,0.078329,0.0,0.065217,0.709677,191.470844
27309,Ari Moura,Attack,4.0,1.0,1112.0,4,21,29,0.323741,0.080935,0.25,0.724138,188.848921
15747,Andrija Majdevac,Attack,4.0,1.0,1036.0,9,19,28,0.34749,0.086873,0.222222,0.678571,183.397683
12909,Christian Walton,Goalkeeper,0.0,0.0,1170.0,30,21,29,0.0,0.0,0.033333,0.724138,179.487179
29468,Tiago Santana,Defender,0.0,1.0,1162.0,3,20,27,0.0,0.077453,2.666667,0.740741,172.11704
14531,Alex Palmer,Goalkeeper,0.0,0.0,1020.0,20,17,29,0.0,0.0,0.05,0.586207,166.666667
17189,Paulo Azzi,Defender,1.0,0.0,1099.0,19,18,31,0.081893,0.0,0.105263,0.580645,163.785259
21532,Josua Mej√≠as,Defender,0.0,1.0,1166.0,4,19,27,0.0,0.077187,1.0,0.703704,162.950257
12178,Dario Canadjija,Midfield,0.0,1.0,1118.0,15,18,31,0.0,0.080501,0.266667,0.580645,161.001789
22602,Daniel Penha,Midfield,4.0,2.0,1274.0,0,20,26,0.282575,0.141287,0.0,0.769231,156.985871


# 2Ô∏è‚É£ Feature Auswahl

In [9]:
players.columns

Index(['player_id', 'first_name', 'last_name', 'name', 'last_season',
       'current_club_id', 'player_code', 'city_of_birth',
       'country_of_citizenship', 'date_of_birth', 'sub_position', 'position',
       'foot', 'height_in_cm', 'contract_expiration_date',
       'current_club_domestic_competition_id', 'current_club_name',
       'market_value_in_eur', 'highest_market_value_in_eur',
       'number_of_transfers', 'total_transfer_fee', 'total_starting_lineups',
       'total_substitute_appearances', 'total_captain_appearances',
       'total_games', 'total_yellow_cards', 'total_red_cards', 'total_goals',
       'total_assists', 'total_minutes_played', 'age', 'contract_years_left',
       'log_market_value', 'goals_per_90', 'assists_per_90', 'cards_per_game',
       'transfer_intensity', 'transfer_intensity_per_10k_min'],
      dtype='object')

## Entfernen von redundanten/sp√§rlichen Spalten

**üß© Zielvariable (Target):**
- üéØ market_value_in_eur oder log_market_value
- ‚Üí das ist dein Ziel (y), wird also nicht als Feature genutzt.

**üß± Nicht f√ºr Regression geeignete Spalten**
- 1Ô∏è‚É£ Identifikations- & Textspalten
- 2Ô∏è‚É£ Datumsspalten (ohne Feature Engineering)
- 3Ô∏è‚É£ Target Leakage

**üß† Features, die behalten werden**
- üßç‚Äç‚ôÇÔ∏è Demografisch & Vertraglich
- ‚öΩ Leistungsbezogen
- üî¢ Verh√§ltniskennzahlen (Feature Engineering)
- üí∞ Karrierebezogen
- üß© Kategorische

In [10]:
drop_cols = [
    "player_id", "first_name", "last_name", "name",
    "player_code", "city_of_birth", "current_club_name",
    "current_club_id", "date_of_birth", "contract_expiration_date",
    "highest_market_value_in_eur",
    "market_value_in_eur"
]
players = players.drop(columns=drop_cols)

In [11]:
players.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31046 entries, 0 to 31045
Data columns (total 26 columns):
 #   Column                                Non-Null Count  Dtype  
---  ------                                --------------  -----  
 0   last_season                           31046 non-null  int64  
 1   country_of_citizenship                31046 non-null  object 
 2   sub_position                          31046 non-null  object 
 3   position                              31046 non-null  object 
 4   foot                                  31046 non-null  object 
 5   height_in_cm                          31046 non-null  float64
 6   current_club_domestic_competition_id  31046 non-null  object 
 7   number_of_transfers                   31046 non-null  int64  
 8   total_transfer_fee                    31046 non-null  int64  
 9   total_starting_lineups                31046 non-null  int64  
 10  total_substitute_appearances          31046 non-null  int64  
 11  total_captain_a

# 3Ô∏è‚É£ Output

In [12]:
players.to_csv("../data/players_fe.csv", index=False)