In [34]:
import pandas as pd
import zipfile
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error

In [36]:
csv_file_path = 'Athletes.csv'
df = pd.read_csv(csv_file_path)

In [38]:
# Display the first few rows of the dataset to understand its structure
print(df.head())

   Bib    Zip  Age Age Group Gender  First Half  Second Half  Finish  \
0   25  93730   30  Under 35      M        3832         3961    7793   
1    9  97124   32  Under 35      M        3845         4059    7904   
2   20  80922   40     40-44      M        3795         4157    7952   
3   46   2136   38     35-39      M        3997         4065    8062   
4   51   6119   26  Under 35      M        3979         4198    8177   

   Positive Split  Percent Change  
0             129        0.033664  
1             214        0.055657  
2             362        0.095389  
3              68        0.017013  
4             219        0.055039  


In [40]:
# Select relevant columns
X = df[['First Half', 'Second Half']]
y = df['Age']

In [42]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [44]:
models = {
    'Linear Regression': LinearRegression(),
    'Decision Tree Regressor': DecisionTreeRegressor(random_state=42),
    'Random Forest Regressor': RandomForestRegressor(random_state=42),
    'Support Vector Regressor': SVR()
}

In [46]:
# Train and evaluate each model
results = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mae = mean_absolute_error(y_test, y_pred)
    results[name] = {
        'Model': model,
        'Predicted': y_pred,
        'MAE': mae
    }

In [47]:
# Display the Mean Absolute Error for each model
for name, result in results.items():
    print(f"{name} MAE: {result['MAE']}")

Linear Regression MAE: 10.275231219868001
Decision Tree Regressor MAE: 12.829123112659698
Random Forest Regressor MAE: 10.111315254963774
Support Vector Regressor MAE: 9.665743902857761


In [48]:
# Display the first few predicted outputs for the Random Forest Regressor
random_forest_results = pd.DataFrame({'Actual Age': y_test, 'Predicted Age': results['Random Forest Regressor']['Predicted']})
print("\nRandom Forest Regressor Predictions:")
print(random_forest_results.head())


Random Forest Regressor Predictions:
       Actual Age  Predicted Age
16722          39        41.8300
15088          28        32.3000
9922           54        52.7300
980            43        42.7100
8253           45        45.4305
