In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# ML libraries
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR

In [2]:
# import dataset

dataset = pd.read_csv('/Users/diogomonteiro/Documents/Education/CCT/Summer BootCamp/Diploma in Predictive Analytics/Week 2/datasets/Performance.csv')
dataset.head()

Unnamed: 0,Running,Swimming,Cycling,Diving,Performance
0,14.96,41.76,1024.07,73.17,463.26
1,25.18,62.96,1020.04,57.1,444.37
2,5.11,39.4,1012.16,92.14,488.56
3,20.86,57.32,1010.24,76.64,446.48
4,10.82,37.5,1009.23,57.1,473.9


In [3]:
# Assuming the data is loaded into a pandas DataFrame called `data`
# Example: data = pd.read_csv('data.csv')

# Splitting the data into features (X) and target (y)
X = dataset.iloc[:,:-1].values
y = dataset.iloc[:,-1].values

# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [4]:
# Initializing regression models
models = {
    'Linear Regression': LinearRegression(),
    'Random Forest': RandomForestRegressor(n_estimators=10, random_state=42),
    'Decision Tree': DecisionTreeRegressor(random_state=42),
    'K-Nearest Neighbors': KNeighborsRegressor(),
    'Support Vector Regressor': SVR()
}

# Training models and making predictions
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    print(f"{name} - MSE: {mse:.2f}, R2: {r2:.2f}")

Linear Regression - MSE: 20.31, R2: 0.93
Random Forest - MSE: 10.52, R2: 0.96
Decision Tree - MSE: 19.45, R2: 0.93
K-Nearest Neighbors - MSE: 13.82, R2: 0.95
Support Vector Regressor - MSE: 16.80, R2: 0.94
