<a href="https://colab.research.google.com/github/jmvazqueznicolas/AI_and_DS_Tec2023/blob/main/Metrics_regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Regression Metrics

## Mean Squared Error (MSE)


In [None]:
# Import necessary libraries


# Load the diabetes dataset


# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=42)

# Create models
models = {
    "Linear Regression": LinearRegression(),
    "Ridge": Ridge(),
    "Random Forest": RandomForestRegressor(random_state=42)
}

# Train the models and compute MSE
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    print(f"{name} - Mean Squared Error: {mse:.2f}")

## Mean Absolute Error (MAE)

In [None]:
# Import necessary libraries


# Importing various regression models


# Load the diabetes dataset
X = data.data[:, np.newaxis, 2]  # Using only the 'bmi' feature
y = data.target

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a dictionary of different regression models
models = {
    "Linear Regression": LinearRegression(),
    "Ridge": Ridge(),
    "Lasso": Lasso(),
}

# Plot the true data points (using scatter function)
plt.____(X_test, y_test, color='black', s=20, marker='o', label='true values')

# Plot model predictions
for name, model in models.items():
    model.fit(X_train, y_train)  # Train model
    predictions = model.predict(X_test)  # Make predictions

    plt.____(X_test, predictions, label=name) #Using plot function

    # Print MAE for each model
    mae = mean_absolute_error(y_test, predictions)
    print(f"{name}: MAE = {mae:.2f}")

plt.xlabel('BMI')
plt.ylabel('Progression')
plt.title('Regression Model Fits')
plt.legend()
plt.show()

## Root Mean Squared Error (RMSE)


In [None]:
# Load the dataset


# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Create a list of regression models
models = [
    ('Linear Regression', _____),
    ('Decision Tree', DecisionTreeRegressor(random_state=42)),
    ('Random Forest', _____)
]

rmse_scores = []

# Train each model and calculate RMSE
for name, model in models:
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    rmse = np.sqrt(mean_squared_error(y_test, predictions))
    rmse_scores.append((name, rmse))
    print(f'{name} RMSE: {rmse:.2f}')

# Plotting
names, scores = zip(*rmse_scores)
plt.bar(names, scores)
plt.ylabel('RMSE')
plt.title('Model RMSE Comparisons')
plt.show()


## Coefficient of Determination R²


In [None]:
# Load the dataset


# Split the data into training and test sets
X_train, X_test, y_train, y_test = ______(X, y, test_size=0.3, random_state=42)

# Create a list of regression models
models = [
    ('Linear Regression', ______),
    ('Decision Tree', ______),
    ('Random Forest', ______)
]

r2_scores = []

# Train each model and calculate R^2 score
for name, model in models:
    model.fit(X_train, y_train)
    r2 = model.score(X_test, y_test)
    r2_scores.append((name, r2))
    print(f'{name} R^2 Score: {r2:.2f}')

# Plotting
names, scores = zip(*r2_scores)
plt.scatter(names, scores, color='blue', s=100)  # Scatter plot
plt.axhline(0, color='red', linestyle='--')  # Draw a horizontal line at R^2 = 0
plt.ylabel('R^2 Score')
plt.title('Model R^2 Score Comparisons')
plt.ylim(min(scores) - 0.1, 1)  # Setting the y-axis limits to be slightly below the minimum R^2 score and up to 1
plt.show()


## Mean Absolute Percentage Error (MAPE)


In [None]:
# Load the diabetes dataset

# Split the dataset
X_train, X_test, y_train, y_test = _____(X, y, test_size=0.2, random_state=42)

# Define MAPE function
def mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    non_zero_indices = y_true != 0  # Avoid division by zero
    return np.mean(np.abs((y_true[non_zero_indices] - y_pred[non_zero_indices]) / y_true[non_zero_indices])) * 100

# List of models to evaluate
models = [
    ('Linear Regression', _____),
    ('Ridge', Ridge()),
    ('Lasso', Lasso()),
    ('Decision Tree', _____),
    ('Random Forest', RandomForestRegressor(random_state=42)),
    ('SVR', SVR())
]

mape_scores = []

# Train and evaluate each model
for name, model in models:
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    mape = mean_absolute_percentage_error(y_test, predictions)
    mape_scores.append((name, mape))

# Plotting
names, scores = zip(*mape_scores)
plt.bar(names, scores, color='skyblue')
plt.ylabel('MAPE (%)')
plt.title('Model MAPE Comparisons')
plt.xticks(rotation=45)
plt.show()


## Root Mean Squared Logarithmic Error (RMSLE)


In [None]:
# Import necessary libraries


# Load the diabetes dataset


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define RMSLE function
def root_mean_squared_log_error(y_true, y_pred):
    # Ensure predictions are positive (clip at a very small positive value)
    y_pred = np.clip(y_pred, 1e-10, np.inf)
    return np.sqrt(mean_squared_log_error(y_true, y_pred))

# List of models to evaluate
models = [
    ('Linear Regression', LinearRegression()),
    ('Ridge', Ridge()),
    ('Lasso', Lasso()),
    ('Decision Tree', DecisionTreeRegressor(random_state=42)),
    ('Random Forest', RandomForestRegressor(random_state=42)),
    ('SVR', ____)
]

rmsle_scores = []

# Train and evaluate each model
for name, model in models:
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    rmsle = root_mean_squared_log_error(y_test, predictions)
    rmsle_scores.append((name, rmsle))

# Plotting
names, scores = zip(*rmsle_scores)
plt.bar(names, scores, color='salmon')
plt.ylabel('RMSLE')
plt.title('Model RMSLE Comparisons')
plt.xticks(rotation=45)
plt.show()

