# Machine Learning Lab Evaluation

This lab includes two tasks: regression and classification. Each task compares three models: two traditional machine learning models and one neural network, with evaluations based on three metrics.

In [105]:
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from tensorflow.keras.utils import to_categorical
import pandas as pd
import numpy as np

## Task 1: Regression

### Step 1: Load Dataset
We use the Boston Housing dataset for the regression task.

Use Numpy library to import dataset.
<br>`numpy.load(FILE_PATH, allow_pickle=True).item()`
<br>The dataset structure is dictionary-format.

In [106]:
# Load the data

# -------------------------------
# Your code here
df = np.load("boston-dataset.npy", allow_pickle=True).item()
# -------------------------------

In [107]:
# Show 3 samples of dataset

# -------------------------------
# Your code here
pd.DataFrame(df['data'], columns=df['feature_names']).head(3)
# -------------------------------

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03


### Step 2: Split and Scale Data
We split the dataset into training and testing sets, then standardize the features for better performance.

In [108]:
# Split the data using train_test_split function
# 80% of the data will be used for training and 20% for testing
# Random state is set to 42

# -------------------------------
# Your code here
X = df['data']
y = df['target']
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42)
# -------------------------------

# Standardize the data
# Use StandardScaler to standardize the data
# Use fit_transform method on the training data and transform method on the testing data

# -------------------------------
# Your code here
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# -------------------------------

In [109]:
# Show variable shape

# -------------------------------
# Your code here
print("Variable shape")
print(f"X_train >> {X_train.shape}")
print(f"X_test >> {X_test.shape}")
print(f"y_train >> {y_train.shape}")
print(f"y_test >> {y_test.shape}")
# -------------------------------

Variable shape
X_train >> (404, 13)
X_test >> (102, 13)
y_train >> (404,)
y_test >> (102,)


### Step 3: Train Models
We train three models: Linear Regression, Random Forest Regressor, and a Neural Network.
<br>Steps: 
1) Import/Build the model, 
2) Fit the model, 
3) Predict the target values.

In [110]:
# Linear Regression
# Create a Linear Regression model
# -------------------------------
# Your code here
lr_model =LinearRegression()
# -------------------------------


# Fit the model
# -------------------------------
# Your code here
lr_model.fit(X_train, y_train)
# -------------------------------


# Predict the target values
# -------------------------------
# Your code here
lr_model.predict(X_test)
# -------------------------------

array([28.99672362, 36.02556534, 14.81694405, 25.03197915, 18.76987992,
       23.25442929, 17.66253818, 14.34119   , 23.01320703, 20.63245597,
       24.90850512, 18.63883645, -6.08842184, 21.75834668, 19.23922576,
       26.19319733, 20.64773313,  5.79472718, 40.50033966, 17.61289074,
       27.24909479, 30.06625441, 11.34179277, 24.16077616, 17.86058499,
       15.83609765, 22.78148106, 14.57704449, 22.43626052, 19.19631835,
       22.43383455, 25.21979081, 25.93909562, 17.70162434, 16.76911711,
       16.95125411, 31.23340153, 20.13246729, 23.76579011, 24.6322925 ,
       13.94204955, 32.25576301, 42.67251161, 17.32745046, 27.27618614,
       16.99310991, 14.07009109, 25.90341861, 20.29485982, 29.95339638,
       21.28860173, 34.34451856, 16.04739105, 26.22562412, 39.53939798,
       22.57950697, 18.84531367, 32.72531661, 25.0673037 , 12.88628956,
       22.68221908, 30.48287757, 31.52626806, 15.90148607, 20.22094826,
       16.71089812, 20.52384893, 25.96356264, 30.61607978, 11.59

In [111]:
# Random Forest Regressor
# Use RandomForestRegressor to train the model with random_state=42
# -------------------------------
# Your code here
RandomForestRegressor(random_state=42)
# -------------------------------

# Fit the model
# -------------------------------
# Your code here
rf_pred = RandomForestRegressor(random_state=42).fit(X_train, y_train)
# -------------------------------

# Predict the target values
# -------------------------------
# Your code here
rf_pred_y_pred = rf_pred.predict(X_test)
# -------------------------------

In [112]:
# Neural Network
# Create a neural network model with 2 hidden layers with 64 units and ReLU activation function

nn_model_reg = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(64, activation='relu'),
    Dense(1)
])

# Compile the model with adam optimizer and mean squared error loss function
# -------------------------------
# Your code here
nn_model_reg.compile(optimizer='adam', loss='mean_squared_error')
# -------------------------------

# Fit the model with 50 epochs and batch size of 16
# -------------------------------
# Your code here
nn_model_reg.fit(X_train, y_train, epochs=50, batch_size=16)
# -------------------------------

# Predict the target values using the model
# -------------------------------
# Your code here
y_pred_nurual = nn_model_reg.predict(X_test)
# -------------------------------

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 530.9550   
Epoch 2/50
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 377.0482 
Epoch 3/50
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 184.9250 
Epoch 4/50
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 77.8853  
Epoch 5/50
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 46.1462 
Epoch 6/50
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 30.9502 
Epoch 7/50
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 25.3900 
Epoch 8/50
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 22.8636 
Epoch 9/50
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 21.0395 
Epoch 10/50
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 19.

### Step 4: Evaluate Models
We evaluate the models using Mean Squared Error (MSE), Mean Absolute Error (MAE), and R² Score.

In [113]:
# Evaluate the models
# from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

mae = mean_absolute_error(y_test, y_pred)
mse = (mean_squared_error(y_test, y_pred))
rmse = (np.sqrt(mean_squared_error(y_test, y_pred)))
r2 = r2_score(y_test, y_pred)

rf_pred_mae = mean_absolute_error(y_test, rf_pred_y_pred)
rf_pred_mse = (mean_squared_error(y_test, rf_pred_y_pred))
rf_pred_rmse = (np.sqrt(mean_squared_error(y_test, rf_pred_y_pred)))
rf_pred_r2 = r2_score(y_test, y_pred)

nurual_mae = mean_absolute_error(y_test, y_pred_nurual)
nurual_mse = (mean_squared_error(y_test, y_pred_nurual))
nurual_rmse = (np.sqrt(mean_squared_error(y_test, y_pred_nurual)))
nurual_r2 = r2_score(y_test, y_pred)

# -------------------------------
# Your code here
print("Regression Task")
print("Linear Regression")
print("MSE:", mse)
print("RMSE:", rmse)
print("MAE:", mae)
print("R2 Score:", r2)
# -------------------------------
print("Random Forest Regressor:")
print("MSE:", rf_pred_mse)
print("RMSE:", rf_pred_rmse)
print("MAE:", rf_pred_mae)
print("R2 Score:", rf_pred_r2)
# -------------------------------
print("Neural Network:")
print("Linear Regression")
print("MSE:", nurual_mse)
print("RMSE:", nurual_rmse)
print("MAE:", nurual_mae)
print("R2 Score:", nurual_r2)

Regression Task
Linear Regression
MSE: 11.738288514233469
RMSE: 3.4261185785424106
MAE: 2.243083157258875
R2 Score: 0.8399334112005223
Random Forest Regressor:
MSE: 7.84278832352941
RMSE: 2.80049787065254
MAE: 2.0359117647058826
R2 Score: 0.8399334112005223
Neural Network:
Linear Regression
MSE: 12.584291858589895
RMSE: 3.5474345460614063
MAE: 2.390969768224978
R2 Score: 0.8399334112005223


---
---
---

## Task 2: Classification

### Step 1: Load Dataset
We use the Iris dataset for the classification task.

In [114]:
# Load the data
# -------------------------------
# Your code here
CSV_PATH = "iris_dataset.csv"
df = pd.read_csv(CSV_PATH)
# -------------------------------

# Show the first 5 rows of the dataset
# -------------------------------
# Your code here
df.head()
# -------------------------------

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [115]:
# Split the data into features and target
# -------------------------------
# Your code here
X = df.drop(['target'],axis=1)
y = df['target']

# -------------------------------

In [116]:
# Show 3 samples of features
# -------------------------------
# Your code here
print("Features")
print(X[:3].values)
print()
print("Target")
print(y[:3].values)

# -------------------------------

Features
[[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]]

Target
[0 0 0]


### Step 2: Split and Scale Data
We split the dataset into training and testing sets, then standardize the features for better performance.

In [117]:
# Split the data using train_test_split function
# 80% of the data will be used for training and 20% for testing
# Random state is set to 42
# -------------------------------
# Your code here
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42)
# -------------------------------

# Standardize the data
# Use StandardScaler to standardize the data
# Use fit_transform method on the training data and transform method on the testing data
# -------------------------------
# Your code here
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# -------------------------------

### Step 2.5: Encode Target Variable
We encode the target variable using One-Hot Encoding.

In [118]:
# One-hot encode for neural network
# -------------------------------
# Your code here
# Your code here
y_train_onehot = to_categorical(y_train)
y_test_onehot = to_categorical(y_test)
# -------------------------------

# -------------------------------

In [119]:
# Show variable shape
# -------------------------------
# Your code here
print("Variable shape")
print(f"X_train >> {X_train.shape}")
print(f"X_test >> {X_test.shape}")
print(f"y_train >> {y_train.shape}")
print(f"y_test >> {y_test.shape}")
# -------------------------------

Variable shape
X_train >> (120, 4)
X_test >> (30, 4)
y_train >> (120,)
y_test >> (30,)


### Step 3: Train Models
We train three models: Logistic Regression, Random Forest Classifier, and a Neural Network.
<br>Steps: 
1) Import/Build the model, 
2) Fit the model, 
3) Predict the target values.

In [120]:
# Logistic Regression
# Create a Logistic Regression model
# -------------------------------
# Your code here
lr_model = LogisticRegression()
# -------------------------------

# Fit the model
# -------------------------------
# Your code here
lr_model.fit(X_train, y_train)
# -------------------------------

# Predict the target values
# -------------------------------
# Your code here
lr_pred = lr_model.predict(X_test)
# -------------------------------



In [125]:
# Random Forest Classifier
# Use RandomForestClassifier to train the model with random_state=42
# -------------------------------
# Your code here
rf_model = RandomForestClassifier(random_state=42)
# -------------------------------

# Fit the model
# -------------------------------
# Your code here
rf_model.fit(X_train, y_train)
# -------------------------------

# Predict the target values
# -------------------------------
# Your code here
rf_pred = rf_model.predict(X_test)
# -------------------------------

In [133]:
# Neural Network
# Create a neural network model with 2 hidden layers with 64 units and ReLU activation function
nn_model_cls = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(32, activation='relu'),
    Dense(y_train_onehot.shape[1], activation='softmax')
])

# Compile the model with adam optimizer and categorical crossentropy loss function
# Use accuracy as the metric
# -------------------------------
# Your code here
nn_model_cls.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
# -------------------------------

# Fit the model with 50 epochs and batch size of 16
# -------------------------------
# Your code here
nn_model_cls.fit(X_train, y_train_onehot, epochs=50, batch_size=16, verbose=0)
# -------------------------------

# Predict the target values using the model
# Use argmax to get the predicted class
# -------------------------------
# Your code here
nn_pred = np.argmax(nn_model_cls.predict(X_test), axis=1)
# -------------------------------

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step


### Step 4: Evaluate Models
We evaluate the models using Accuracy, Precision, and Recall.

In [None]:
# Evaluate the models
# -------------------------------
# Your code here
print("Classification Task:")
# Logistic Regression
print("Logistic Regression:")
print(f"  Accuracy: {accuracy_score(y_test, lr_pred):.4f}")
print(f"  Precision: {precision_score(y_test, lr_pred, average='weighted'):.4f}")
print(f"  Recall: {recall_score( y_test, lr_pred, average='weighted'):.4f}")
print(f"  F1 Score: {f1_score(y_test, lr_pred, average='weighted'):.4f}")
print(f"\n Confusion Matrix:\n{confusion_matrix(y_test, lr_pred)}\n")
# Random Forest
print("Random Forest Classifier:")
print(f"  Accuracy: {accuracy_score(y_test, rf_pred):.4f}")
print(f"  Precision: {precision_score(y_test, rf_pred, average='weighted'):.4f}")
print(f"  Recall: {recall_score(y_test, rf_pred, average='weighted'):.4f}")
print(f"  F1 Score: {f1_score(y_test, rf_pred, average='weighted'):.4f}")
print(f"\n Confusion Matrix:\n{confusion_matrix(y_test, rf_pred)}\n")
# Neural Network
print("Neural Network:")
print(f"  Accuracy: {accuracy_score(y_test, nn_pred):.4f}")
print(f"  Precision: {precision_score(y_test, nn_pred, average='weighted'):.4f}")
print(f"  Recall: {recall_score(y_test, nn_pred, average='weighted'):.4f}")
print(f"  F1 Score: {f1_score(y_test, nn_pred, average='weighted'):.4f}")
print(f"\n Confusion Matrix:\n{confusion_matrix(y_test, nn_pred)}\n")
# -------------------------------

Classification Task:
Logistic Regression:
  Accuracy: 1.0000
  Precision: 1.0000
  Recall: 1.0000
  F1 Score: 1.0000

 Confusion Matrix:
[[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]

Random Forest Classifier:
  Accuracy: 1.0000
  Precision: 1.0000
  Recall: 1.0000
  F1 Score: 1.0000

 Confusion Matrix:
[[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]

Neural Network:
  Accuracy: 1.0000
  Precision: 1.0000
  Recall: 1.0000
  F1 Score: 1.0000

 Confusion Matrix:
[[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]

