In [1]:
# 1. Splitting the dataset into training and testing data for regression
# Step 1: Import necessary libraries
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
import pandas as pd

# Step 2: Load the California Housing dataset
california = fetch_california_housing()

# Step 3: Convert it into a pandas DataFrame
X = pd.DataFrame(california.data, columns=california.feature_names)
y = pd.Series(california.target, name='MedHouseVal')  # Target is the median house value

# Step 4: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Check the dimensions of the training and testing data
print(f"Training data shape: {X_train.shape}, {y_train.shape}")
print(f"Testing data shape: {X_test.shape}, {y_test.shape}")


Training data shape: (16512, 8), (16512,)
Testing data shape: (4128, 8), (4128,)


In [2]:
# 2. Building and training a model using Linear Regression and calculating evaluation metrics
# Step 1: Import necessary libraries
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Step 2: Build the Linear Regression model
lin_reg = LinearRegression()

# Step 3: Train the model using the training data
lin_reg.fit(X_train, y_train)

# Step 4: Make predictions on the test data
y_pred = lin_reg.predict(X_test)

# Step 5: Calculate evaluation metrics

# Mean Squared Error (MSE)
mse = mean_squared_error(y_test, y_pred)

# Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, y_pred)

# R² Score
r2 = r2_score(y_test, y_pred)

# Print the evaluation metrics
print(f"Mean Squared Error (MSE): {mse}")
print(f"Mean Absolute Error (MAE): {mae}")
print(f"R² Score: {r2}")


Mean Squared Error (MSE): 0.555891598695244
Mean Absolute Error (MAE): 0.5332001304956558
R² Score: 0.5757877060324511


In [3]:
# 3. Creating a Final Regression Report/Table of Evaluation Metrics
# Step 1: Import pandas
import pandas as pd

# Step 2: Create a dictionary with the evaluation metrics
regression_report = {
    'Model': ['Linear Regression'],
    'Mean Squared Error (MSE)': [mse],
    'Mean Absolute Error (MAE)': [mae],
    'R² Score': [r2]
}

# Step 3: Convert the dictionary into a DataFrame for a clean table format
report_df = pd.DataFrame(regression_report)

# Step 4: Display the report
print(report_df)


               Model  Mean Squared Error (MSE)  Mean Absolute Error (MAE)  \
0  Linear Regression                  0.555892                     0.5332   

   R² Score  
0  0.575788  


In [4]:
# 4. Building and training a model using KNN and calculating evaluation metrics
# Step 1: Import necessary libraries
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Step 2: Build the KNN model
knn_reg = KNeighborsRegressor(n_neighbors=5)  # You can adjust 'n_neighbors' based on your dataset

# Step 3: Train the model using the training data
knn_reg.fit(X_train, y_train)

# Step 4: Make predictions on the test data
y_pred_knn = knn_reg.predict(X_test)

# Step 5: Calculate evaluation metrics for KNN

# Mean Squared Error (MSE)
mse_knn = mean_squared_error(y_test, y_pred_knn)

# Mean Absolute Error (MAE)
mae_knn = mean_absolute_error(y_test, y_pred_knn)

# R² Score
r2_knn = r2_score(y_test, y_pred_knn)

# Print the evaluation metrics
print(f"KNN Mean Squared Error (MSE): {mse_knn}")
print(f"KNN Mean Absolute Error (MAE): {mae_knn}")
print(f"KNN R² Score: {r2_knn}")


KNN Mean Squared Error (MSE): 1.1186823858768293
KNN Mean Absolute Error (MAE): 0.8127975600775195
KNN R² Score: 0.14631049965900345


In [5]:
# 5. Building and training a model using Decision Trees and calculating evaluation metrics
# Step 1: Import necessary libraries
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Step 2: Build the Decision Tree model
dt_reg = DecisionTreeRegressor(random_state=42)  # Random state for reproducibility

# Step 3: Train the model using the training data
dt_reg.fit(X_train, y_train)

# Step 4: Make predictions on the test data
y_pred_dt = dt_reg.predict(X_test)

# Step 5: Calculate evaluation metrics for Decision Tree

# Mean Squared Error (MSE)
mse_dt = mean_squared_error(y_test, y_pred_dt)

# Mean Absolute Error (MAE)
mae_dt = mean_absolute_error(y_test, y_pred_dt)

# R² Score
r2_dt = r2_score(y_test, y_pred_dt)

# Print the evaluation metrics
print(f"Decision Tree Mean Squared Error (MSE): {mse_dt}")
print(f"Decision Tree Mean Absolute Error (MAE): {mae_dt}")
print(f"Decision Tree R² Score: {r2_dt}")


Decision Tree Mean Squared Error (MSE): 0.495235205629094
Decision Tree Mean Absolute Error (MAE): 0.45467918846899225
Decision Tree R² Score: 0.622075845135081


In [7]:
# 6. Building and training a model using Logistic Regression and calculating evaluation metrics
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris

# Load a sample dataset (you should replace this with your actual dataset)
data = load_iris()
X = data.data
y = data.target

# Split the data into training and testing sets
X_train_clf, X_test_clf, y_train_clf, y_test_clf = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 1: Scale the features
scaler = StandardScaler()
X_train_clf_scaled = scaler.fit_transform(X_train_clf)
X_test_clf_scaled = scaler.transform(X_test_clf)

# Step 2: Build the Logistic Regression model with increased iterations
log_reg = LogisticRegression(max_iter=2000)

# Step 3: Train the model using the scaled data
log_reg.fit(X_train_clf_scaled, y_train_clf)

# Step 4: Make predictions on the test data
y_pred_clf = log_reg.predict(X_test_clf_scaled)

# Step 5: Calculate evaluation metrics

# Accuracy
accuracy = accuracy_score(y_test_clf, y_pred_clf)

# Precision
precision = precision_score(y_test_clf, y_pred_clf, average='macro')  # 'macro' for multi-class

# Recall
recall = recall_score(y_test_clf, y_pred_clf, average='macro')

# F1 Score
f1 = f1_score(y_test_clf, y_pred_clf, average='macro')

# Print the evaluation metrics
print(f"Logistic Regression Accuracy: {accuracy}")
print(f"Logistic Regression Precision: {precision}")
print(f"Logistic Regression Recall: {recall}")
print(f"Logistic Regression F1 Score: {f1}")


Logistic Regression Accuracy: 1.0
Logistic Regression Precision: 1.0
Logistic Regression Recall: 1.0
Logistic Regression F1 Score: 1.0


In [11]:
# 7. Building and training a model using SVM and calculating evaluation metrics
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Step 1: Build the SVM model (you can also experiment with different kernels like 'linear', 'rbf', etc.)
svm_clf = SVC(kernel='linear')  # Use 'rbf' or other kernels if preferred

# Step 2: Train the SVM model using the training data
svm_clf.fit(X_train_clf, y_train_clf)

# Step 3: Make predictions on the test data
y_pred_svm = svm_clf.predict(X_test_clf)

# Step 4: Calculate evaluation metrics for SVM

# Accuracy
accuracy_svm = accuracy_score(y_test_clf, y_pred_svm)

# Precision (use 'macro' or 'weighted' for multi-class classification)
precision_svm = precision_score(y_test_clf, y_pred_svm, average='macro')

# Recall (use 'macro' or 'weighted' for multi-class classification)
recall_svm = recall_score(y_test_clf, y_pred_svm, average='macro')

# F1 Score (use 'macro' or 'weighted' for multi-class classification)
f1_svm = f1_score(y_test_clf, y_pred_svm, average='macro')

# Print the evaluation metrics
print(f"SVM Accuracy: {accuracy_svm}")
print(f"SVM Precision: {precision_svm}")
print(f"SVM Recall: {recall_svm}")
print(f"SVM F1 Score: {f1_svm}")


SVM Accuracy: 1.0
SVM Precision: 1.0
SVM Recall: 1.0
SVM F1 Score: 1.0


In [13]:
# 8. Creating a final classification report/table of evaluation metrics
import pandas as pd

# Example evaluation metrics for all models (replace with actual values)
accuracy_log_reg = 0.85
precision_log_reg = 0.82
recall_log_reg = 0.88
f1_log_reg = 0.85

accuracy_svm = 0.87
precision_svm = 0.85
recall_svm = 0.89
f1_svm = 0.87

accuracy_knn = 0.83
precision_knn = 0.80
recall_knn = 0.85
f1_knn = 0.82

accuracy_dt = 0.82
precision_dt = 0.78
recall_dt = 0.84
f1_dt = 0.81

# Create a dictionary with the results
results = {
    'Model': ['Logistic Regression', 'SVM', 'KNN', 'Decision Tree'],
    'Accuracy': [accuracy_log_reg, accuracy_svm, accuracy_knn, accuracy_dt],
    'Precision': [precision_log_reg, precision_svm, precision_knn, precision_dt],
    'Recall': [recall_log_reg, recall_svm, recall_knn, recall_dt],
    'F1 Score': [f1_log_reg, f1_svm, f1_knn, f1_dt]
}

# Convert the dictionary to a DataFrame
classification_report = pd.DataFrame(results)

# Display the classification report
print(classification_report)

# Optional: Save the report to a CSV file
classification_report.to_csv('classification_report.csv', index=False)


                 Model  Accuracy  Precision  Recall  F1 Score
0  Logistic Regression      0.85       0.82    0.88      0.85
1                  SVM      0.87       0.85    0.89      0.87
2                  KNN      0.83       0.80    0.85      0.82
3        Decision Tree      0.82       0.78    0.84      0.81
