# Predicting Player Market Value with Linear Regression

## 1. Load the Dataset

In [2]:
import pandas as pd

df = pd.read_csv('player_data_encoded.csv')
df.head()

Unnamed: 0,player_id,market_value_in_eur,position,foot,height_in_cm,age,total_goals,total_assists,total_minutes_played
0,10,1000000.0,4,0,184.0,47.0,48.0,25.0,8808.0
1,26,750000.0,1,1,190.0,45.0,0.0,0.0,13508.0
2,65,1000000.0,4,3,183.0,44.0,38.0,13.0,8788.0
3,77,200000.0,2,3,183.0,47.0,0.0,0.0,307.0
4,80,100000.0,1,0,194.0,44.0,0.0,0.0,1080.0


## 2. Data Preprocessing

In [3]:
# Handle categorical variables using one-hot encoding
df_processed = pd.get_dummies(df, columns=['position', 'foot'], drop_first=True)

# Display the first few rows of the processed dataframe
df_processed.head()

Unnamed: 0,player_id,market_value_in_eur,height_in_cm,age,total_goals,total_assists,total_minutes_played,position_1,position_2,position_3,position_4,foot_1,foot_2,foot_3
0,10,1000000.0,184.0,47.0,48.0,25.0,8808.0,False,False,False,True,False,False,False
1,26,750000.0,190.0,45.0,0.0,0.0,13508.0,True,False,False,False,True,False,False
2,65,1000000.0,183.0,44.0,38.0,13.0,8788.0,False,False,False,True,False,False,True
3,77,200000.0,183.0,47.0,0.0,0.0,307.0,False,True,False,False,False,False,True
4,80,100000.0,194.0,44.0,0.0,0.0,1080.0,True,False,False,False,False,False,False


## 3. Define Features (X) and Target (y)

In [4]:
X = df_processed.drop(['player_id', 'market_value_in_eur'], axis=1)
y = df_processed['market_value_in_eur']

## 4. Split Data into Training and Testing Sets

In [5]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

ModuleNotFoundError: No module named 'sklearn'

## 5. Train the Linear Regression Model

In [None]:
from sklearn.linear_model import LinearRegression

model = LinearRegression()
model.fit(X_train, y_train)

## 6. Evaluate the Model

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

y_pred = model.predict(X_test)

mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Absolute Error (MAE): {mae}')
print(f'Mean Squared Error (MSE): {mse}')
print(f'R-squared (R²): {r2}')