In [159]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn import datasets
from sklearn.preprocessing import OneHotEncoder

In [160]:
from ucimlrepo import fetch_ucirepo 

  
# fetch dataset 
adult = fetch_ucirepo(id=2) 
  
# data (as pandas dataframes) 
adult_X = adult.data.features 
adult_y = adult.data.targets 
  
# variable information 
adult_y
# Combine features and target into one DataFrame
adult_df = pd.concat([adult_X, adult_y], axis=1)

# Drop rows with missing values
adult_df.dropna(inplace=True)
adult_df.reset_index(drop=True, inplace=True)
adult_df

# Identify categorical columns (replace 'categorical_column1', 'categorical_column2', etc. with your actual column names)
categorical_columns = ['workclass', 'education','marital-status','occupation','relationship','race','sex','native-country']

# Create a DataFrame with the original categorical columns
categorical_df = adult_df[categorical_columns]

# Perform one-hot encoding
encoded_categorical_df = pd.get_dummies(categorical_df, drop_first=True)

# Replace the original categorical columns with the encoded ones
adult_df = pd.concat([adult_df.drop(categorical_columns, axis=1), encoded_categorical_df], axis=1)


# Convert the income to binary results
income_mapping = {'<=50K': 0, '>50K': 1,'<=50K.': 0,  '>50K.': 1}
adult_df['income'] = adult_df['income'].map(income_mapping)

adult_df

Unnamed: 0,age,fnlwgt,education-num,capital-gain,capital-loss,hours-per-week,income,workclass_Federal-gov,workclass_Local-gov,workclass_Never-worked,...,native-country_Portugal,native-country_Puerto-Rico,native-country_Scotland,native-country_South,native-country_Taiwan,native-country_Thailand,native-country_Trinadad&Tobago,native-country_United-States,native-country_Vietnam,native-country_Yugoslavia
0,39,77516,13,2174,0,40,0,False,False,False,...,False,False,False,False,False,False,False,True,False,False
1,50,83311,13,0,0,13,0,False,False,False,...,False,False,False,False,False,False,False,True,False,False
2,38,215646,9,0,0,40,0,False,False,False,...,False,False,False,False,False,False,False,True,False,False
3,53,234721,7,0,0,40,0,False,False,False,...,False,False,False,False,False,False,False,True,False,False
4,28,338409,13,0,0,40,0,False,False,False,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47616,33,245211,13,0,0,40,0,False,False,False,...,False,False,False,False,False,False,False,True,False,False
47617,39,215419,13,0,0,36,0,False,False,False,...,False,False,False,False,False,False,False,True,False,False
47618,38,374983,13,0,0,50,0,False,False,False,...,False,False,False,False,False,False,False,True,False,False
47619,44,83891,13,5455,0,40,0,False,False,False,...,False,False,False,False,False,False,False,True,False,False


In [161]:
from sklearn.model_selection import train_test_split

# Separate features (X) and target variable (y)
X = adult_df.drop('income', axis=1)  
y = adult_df['income']

# Split the data into training and testing sets. However, we also need to consider that using first 80% for training,
# rest 20% for testing is different from first 20% for training, rest for testing. So we would 3 times each for 
# every partition and compute average scores to remove potentials of having accidental results. We would set shuffle=true,
# and use different random_state each time.

# 80% training, 20% testing partition
X_train1, X_test1, y_train1, y_test1 = train_test_split(X, y, test_size=0.2, shuffle= True, random_state=42)
X_train2, X_test2, y_train2, y_test2 = train_test_split(X, y, test_size=0.2, shuffle= True, random_state=10)
X_train3, X_test3, y_train3, y_test3 = train_test_split(X, y, test_size=0.2, shuffle= True, random_state=35)

# Print the shape of the resulting sets
print("X_train shape:", X_train1.shape, X_train2.shape, X_train3.shape)
print("X_test shape:", X_test1.shape, X_test2.shape, X_test3.shape)
print("y_train shape:", y_train1.shape, y_train2.shape, y_train3.shape)
print("y_test shape:", y_test1.shape, y_test2.shape, y_test3.shape)


X_train shape: (38096, 100) (38096, 100) (38096, 100)
X_test shape: (9525, 100) (9525, 100) (9525, 100)
y_train shape: (38096,) (38096,) (38096,)
y_test shape: (9525,) (9525,) (9525,)


In [162]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
model = LinearRegression()


# Fit the model on the training data
model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = model.predict(X_test1)

# Evaluate the model
mse1 = mean_squared_error(y_test1, y_pred)
rmse = np.sqrt(mse1)
r2 = r2_score(y_test1, y_pred)

# Print the evaluation metrics
print("Mean Squared Error1:", mse1)
print("Root Mean Squared Error:", rmse)
print("R-squared:", r2)

# Create a linear regression model
model = LinearRegression()

# Fit the model on the training data
model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = model.predict(X_test2)

# Evaluate the model
mse2 = mean_squared_error(y_test2, y_pred)
rmse = np.sqrt(mse2)
r2 = r2_score(y_test2, y_pred)

# Print the evaluation metrics
print("Mean Squared Error2:", mse2)
print("Root Mean Squared Error:", rmse)
print("R-squared:", r2)

# Create a linear regression model
model = LinearRegression()

# Fit the model on the training data
model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = model.predict(X_test3)

# Evaluate the model
mse3 = mean_squared_error(y_test3, y_pred)
rmse = np.sqrt(mse3)
r2 = r2_score(y_test3, y_pred)

# Print the evaluation metrics
print("Mean Squared Error3:", mse3)
print("Root Mean Squared Error:", rmse)
print("R-squared:", r2)

print("Average Mean Squared Error of 3 trials:", (mse1+mse2+mse3)/3)

Mean Squared Error1: 0.11798860267273278
Root Mean Squared Error: 0.34349469089453594
R-squared: 0.3660427695984775
Mean Squared Error2: 0.11694971718553746
Root Mean Squared Error: 0.34197911805479797
R-squared: 0.36946172366283914
Mean Squared Error3: 0.11517773359958065
Root Mean Squared Error: 0.3393784518786964
R-squared: 0.3612221984621845
Average Mean Squared Error of 3 trials: 0.11670535115261697


In [163]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Create a logistic regression model
logistic_model = LogisticRegression()

# Fit the model on the training data
logistic_model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = logistic_model.predict(X_test1)

# Evaluate the model
accuracy1 = accuracy_score(y_test1, y_pred)
conf_matrix = confusion_matrix(y_test1, y_pred)
classification_rep = classification_report(y_test1, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy1)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)


# Create a logistic regression model
logistic_model = LogisticRegression()

# Fit the model on the training data
logistic_model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = logistic_model.predict(X_test2)

# Evaluate the model
accuracy2 = accuracy_score(y_test2, y_pred)
conf_matrix = confusion_matrix(y_test2, y_pred)
classification_rep = classification_report(y_test2, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy2)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)



# Create a logistic regression model
logistic_model = LogisticRegression()

# Fit the model on the training data
logistic_model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = logistic_model.predict(X_test3)

# Evaluate the model
accuracy3 = accuracy_score(y_test3, y_pred)
conf_matrix = confusion_matrix(y_test3, y_pred)
classification_rep = classification_report(y_test3, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy3)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep, )


print("average mean of accuracy three trials:", (accuracy1+accuracy2+accuracy3)/3)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Accuracy: 0.7921259842519685
Confusion Matrix:
 [[6718  452]
 [1528  827]]
Classification Report:
               precision    recall  f1-score   support

           0       0.81      0.94      0.87      7170
           1       0.65      0.35      0.46      2355

    accuracy                           0.79      9525
   macro avg       0.73      0.64      0.66      9525
weighted avg       0.77      0.79      0.77      9525

Accuracy: 0.796010498687664
Confusion Matrix:
 [[6948  234]
 [1709  634]]
Classification Report:
               precision    recall  f1-score   support

           0       0.80      0.97      0.88      7182
           1       0.73      0.27      0.39      2343

    accuracy                           0.80      9525
   macro avg       0.77      0.62      0.64      9525
weighted avg       0.78      0.80      0.76      9525

Accuracy: 0.8006299212598426
Confusion Matrix:
 [[7029  248]
 [1651  597]]
Classification Report:
               precision    recall  f1-score   supp

In [164]:
from sklearn.svm import SVC
# Create an SVM model
svm_model = SVC()

# Fit the model on the training data
svm_model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = svm_model.predict(X_test1)

# Evaluate the model
accuracy1 = accuracy_score(y_test1, y_pred)
conf_matrix1 = confusion_matrix(y_test1, y_pred)
classification_rep1 = classification_report(y_test1, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy1)
print("Confusion Matrix:\n", conf_matrix1)
print("Classification Report:\n", classification_rep1)


# Fit the model on the training data
svm_model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = svm_model.predict(X_test2)

# Evaluate the model
accuracy2 = accuracy_score(y_test2, y_pred)
conf_matrix2 = confusion_matrix(y_test2, y_pred)
classification_rep2 = classification_report(y_test2, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy2)
print("Confusion Matrix:\n", conf_matrix2)
print("Classification Report:\n", classification_rep2)

# Fit the model on the training data
svm_model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = svm_model.predict(X_test3)

# Evaluate the model
accuracy3 = accuracy_score(y_test3, y_pred)
conf_matrix3 = confusion_matrix(y_test3, y_pred)
classification_rep3 = classification_report(y_test3, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy3)
print("Confusion Matrix:\n", conf_matrix3)
print("Classification Report:\n", classification_rep3)

print("average mean of accuracy three trials:", (accuracy1+accuracy2+accuracy3)/3)

Accuracy: 0.7911811023622047
Confusion Matrix:
 [[7151   19]
 [1970  385]]
Classification Report:
               precision    recall  f1-score   support

           0       0.78      1.00      0.88      7170
           1       0.95      0.16      0.28      2355

    accuracy                           0.79      9525
   macro avg       0.87      0.58      0.58      9525
weighted avg       0.83      0.79      0.73      9525

Accuracy: 0.7952755905511811
Confusion Matrix:
 [[7168   14]
 [1936  407]]
Classification Report:
               precision    recall  f1-score   support

           0       0.79      1.00      0.88      7182
           1       0.97      0.17      0.29      2343

    accuracy                           0.80      9525
   macro avg       0.88      0.59      0.59      9525
weighted avg       0.83      0.80      0.74      9525

Accuracy: 0.8028346456692913
Confusion Matrix:
 [[7265   12]
 [1866  382]]
Classification Report:
               precision    recall  f1-score   sup

In [165]:
from sklearn.ensemble import RandomForestClassifier

# Create a Random Forest model
rf_model = RandomForestClassifier(n_estimators=100)

# Fit the model on the training data
rf_model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = rf_model.predict(X_test1)

# Evaluate the model
accuracy1 = accuracy_score(y_test1, y_pred)
conf_matrix = confusion_matrix(y_test1, y_pred)
classification_rep = classification_report(y_test1, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy1)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
rf_model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = rf_model.predict(X_test2)

# Evaluate the model
accuracy2 = accuracy_score(y_test2, y_pred)
conf_matrix = confusion_matrix(y_test2, y_pred)
classification_rep = classification_report(y_test2, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy2)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)


# Fit the model on the training data
rf_model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = rf_model.predict(X_test3)

# Evaluate the model
accuracy2 = accuracy_score(y_test3, y_pred)
conf_matrix = confusion_matrix(y_test3, y_pred)
classification_rep = classification_report(y_test3, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy3)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

print("average mean of accuracy three trials:", (accuracy1+accuracy2+accuracy3)/3)

Accuracy: 0.8489238845144357
Confusion Matrix:
 [[6678  492]
 [ 947 1408]]
Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.93      0.90      7170
           1       0.74      0.60      0.66      2355

    accuracy                           0.85      9525
   macro avg       0.81      0.76      0.78      9525
weighted avg       0.84      0.85      0.84      9525

Accuracy: 0.8490288713910761
Confusion Matrix:
 [[6615  567]
 [ 871 1472]]
Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.92      0.90      7182
           1       0.72      0.63      0.67      2343

    accuracy                           0.85      9525
   macro avg       0.80      0.77      0.79      9525
weighted avg       0.84      0.85      0.85      9525

Accuracy: 0.8028346456692913
Confusion Matrix:
 [[6708  569]
 [ 845 1403]]
Classification Report:
               precision    recall  f1-score   sup

In [166]:
from sklearn.neighbors import KNeighborsClassifier

from sklearn.model_selection import GridSearchCV

# Assuming 'X' and 'y' are your feature and target variables
param_grid = {'n_neighbors': [1, 3, 5, 7, 9]}  # Adjust the range as needed

knn_model = KNeighborsClassifier()
grid_search = GridSearchCV(knn_model, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X, y)
best_neighbor = grid_search.best_params_['n_neighbors']

# Print the results of the grid search
print("Best n_neighbors:", best_neighbor)


Best n_neighbors: 9


In [167]:

# Create a KNN model with best neighbor found
knn_model = KNeighborsClassifier(n_neighbors=best_neighbor)

# Fit the model on the training data
knn_model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = knn_model.predict(X_test1)

# Evaluate the model
accuracy1 = accuracy_score(y_test1, y_pred)
conf_matrix = confusion_matrix(y_test1, y_pred)
classification_rep = classification_report(y_test1, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy1)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
knn_model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = knn_model.predict(X_test2)

# Evaluate the model
accuracy2 = accuracy_score(y_test2, y_pred)
conf_matrix = confusion_matrix(y_test2, y_pred)
classification_rep = classification_report(y_test2, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy2)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
knn_model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = knn_model.predict(X_test3)

# Evaluate the model
accuracy3 = accuracy_score(y_test3, y_pred)
conf_matrix = confusion_matrix(y_test3, y_pred)
classification_rep = classification_report(y_test3, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy3)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

print("average mean of accuracy three trials:", (accuracy1+accuracy2+accuracy3)/3)

Accuracy: 0.7857217847769029
Confusion Matrix:
 [[6835  335]
 [1706  649]]
Classification Report:
               precision    recall  f1-score   support

           0       0.80      0.95      0.87      7170
           1       0.66      0.28      0.39      2355

    accuracy                           0.79      9525
   macro avg       0.73      0.61      0.63      9525
weighted avg       0.77      0.79      0.75      9525

Accuracy: 0.7903412073490813
Confusion Matrix:
 [[6817  365]
 [1632  711]]
Classification Report:
               precision    recall  f1-score   support

           0       0.81      0.95      0.87      7182
           1       0.66      0.30      0.42      2343

    accuracy                           0.79      9525
   macro avg       0.73      0.63      0.64      9525
weighted avg       0.77      0.79      0.76      9525

Accuracy: 0.7935958005249344
Confusion Matrix:
 [[6922  355]
 [1611  637]]
Classification Report:
               precision    recall  f1-score   sup

In [168]:
# 50% training, 50% testing partition
X_train1, X_test1, y_train1, y_test1 = train_test_split(X, y, test_size=0.5, shuffle= True, random_state=42)
X_train2, X_test2, y_train2, y_test2 = train_test_split(X, y, test_size=0.5, shuffle= True, random_state=10)
X_train3, X_test3, y_train3, y_test3 = train_test_split(X, y, test_size=0.5, shuffle= True, random_state=35)

# Print the shape of the resulting sets
print("X_train shape:", X_train1.shape, X_train2.shape, X_train3.shape)
print("X_test shape:", X_test1.shape, X_test2.shape, X_test3.shape)
print("y_train shape:", y_train1.shape, y_train2.shape, y_train3.shape)
print("y_test shape:", y_test1.shape, y_test2.shape, y_test3.shape)

X_train shape: (23810, 100) (23810, 100) (23810, 100)
X_test shape: (23811, 100) (23811, 100) (23811, 100)
y_train shape: (23810,) (23810,) (23810,)
y_test shape: (23811,) (23811,) (23811,)


In [169]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
model = LinearRegression()


# Fit the model on the training data
model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = model.predict(X_test1)

# Evaluate the model
mse1 = mean_squared_error(y_test1, y_pred)
rmse = np.sqrt(mse1)
r2 = r2_score(y_test1, y_pred)

# Print the evaluation metrics
print("Mean Squared Error1:", mse1)
print("Root Mean Squared Error:", rmse)
print("R-squared:", r2)

# Create a linear regression model
model = LinearRegression()

# Fit the model on the training data
model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = model.predict(X_test2)

# Evaluate the model
mse2 = mean_squared_error(y_test2, y_pred)
rmse = np.sqrt(mse2)
r2 = r2_score(y_test2, y_pred)

# Print the evaluation metrics
print("Mean Squared Error2:", mse2)
print("Root Mean Squared Error:", rmse)
print("R-squared:", r2)

# Create a linear regression model
model = LinearRegression()

# Fit the model on the training data
model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = model.predict(X_test3)

# Evaluate the model
mse3 = mean_squared_error(y_test3, y_pred)
rmse = np.sqrt(mse3)
r2 = r2_score(y_test3, y_pred)

# Print the evaluation metrics
print("Mean Squared Error3:", mse3)
print("Root Mean Squared Error:", rmse)
print("R-squared:", r2)

print("Average Mean Squared Error of 3 trials:", (mse1+mse2+mse3)/3)

Mean Squared Error1: 0.11682593910853742
Root Mean Squared Error: 0.34179809699373315
R-squared: 0.36756582793672365
Mean Squared Error2: 0.11665032670634408
Root Mean Squared Error: 0.34154110544170824
R-squared: 0.36270464405226
Mean Squared Error3: 0.11655367147558676
Root Mean Squared Error: 0.3413995774390864
R-squared: 0.36533807077801916
Average Mean Squared Error of 3 trials: 0.11667664576348942


In [170]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Create a logistic regression model
logistic_model = LogisticRegression()

# Fit the model on the training data
logistic_model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = logistic_model.predict(X_test1)

# Evaluate the model
accuracy1 = accuracy_score(y_test1, y_pred)
conf_matrix = confusion_matrix(y_test1, y_pred)
classification_rep = classification_report(y_test1, y_pred)

# Print the evaluation metrics
print("Accuracy1:", accuracy1)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)


# Create a logistic regression model
logistic_model = LogisticRegression()

# Fit the model on the training data
logistic_model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = logistic_model.predict(X_test2)

# Evaluate the model
accuracy2 = accuracy_score(y_test2, y_pred)
conf_matrix = confusion_matrix(y_test2, y_pred)
classification_rep = classification_report(y_test2, y_pred)

# Print the evaluation metrics
print("Accuracy2:", accuracy2)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)


# Create a logistic regression model
logistic_model = LogisticRegression()

# Fit the model on the training data
logistic_model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = logistic_model.predict(X_test3)

# Evaluate the model
accuracy3 = accuracy_score(y_test3, y_pred)
conf_matrix = confusion_matrix(y_test3, y_pred)
classification_rep = classification_report(y_test3, y_pred)

# Print the evaluation metrics
print("Accuracy3:", accuracy3)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep, )


print("average mean of accuracy three trials:", (accuracy1+accuracy2+accuracy3)/3)

Accuracy1: 0.7942547562051153
Confusion Matrix:
 [[17399   590]
 [ 4309  1513]]
Classification Report:
               precision    recall  f1-score   support

           0       0.80      0.97      0.88     17989
           1       0.72      0.26      0.38      5822

    accuracy                           0.79     23811
   macro avg       0.76      0.61      0.63     23811
weighted avg       0.78      0.79      0.76     23811

Accuracy2: 0.7966486077863173
Confusion Matrix:
 [[17421   646]
 [ 4196  1548]]
Classification Report:
               precision    recall  f1-score   support

           0       0.81      0.96      0.88     18067
           1       0.71      0.27      0.39      5744

    accuracy                           0.80     23811
   macro avg       0.76      0.62      0.63     23811
weighted avg       0.78      0.80      0.76     23811

Accuracy3: 0.7978245348788375
Confusion Matrix:
 [[17488   551]
 [ 4263  1509]]
Classification Report:
               precision    recall 

In [171]:
from sklearn.svm import SVC
# Create an SVM model
svm_model = SVC()

# Fit the model on the training data
svm_model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = svm_model.predict(X_test1)

# Evaluate the model
accuracy1 = accuracy_score(y_test1, y_pred)
conf_matrix = confusion_matrix(y_test1, y_pred)
classification_rep = classification_report(y_test1, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy1)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
svm_model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = svm_model.predict(X_test2)

# Evaluate the model
accuracy2 = accuracy_score(y_test2, y_pred)
conf_matrix = confusion_matrix(y_test2, y_pred)
classification_rep = classification_report(y_test2, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy2)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
svm_model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = svm_model.predict(X_test3)

# Evaluate the model
accuracy1 = accuracy_score(y_test3, y_pred)
conf_matrix = confusion_matrix(y_test3, y_pred)
classification_rep = classification_report(y_test3, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy3)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

print("average mean of accuracy three trials:", (accuracy1+accuracy2+accuracy3)/3)


Accuracy: 0.78971903741968
Confusion Matrix:
 [[17980     9]
 [ 4998   824]]
Classification Report:
               precision    recall  f1-score   support

           0       0.78      1.00      0.88     17989
           1       0.99      0.14      0.25      5822

    accuracy                           0.79     23811
   macro avg       0.89      0.57      0.56     23811
weighted avg       0.83      0.79      0.72     23811

Accuracy: 0.7942547562051153
Confusion Matrix:
 [[18052    15]
 [ 4884   860]]
Classification Report:
               precision    recall  f1-score   support

           0       0.79      1.00      0.88     18067
           1       0.98      0.15      0.26      5744

    accuracy                           0.79     23811
   macro avg       0.88      0.57      0.57     23811
weighted avg       0.83      0.79      0.73     23811

Accuracy: 0.7978245348788375
Confusion Matrix:
 [[18031     8]
 [ 4922   850]]
Classification Report:
               precision    recall  f1-s

In [172]:
from sklearn.ensemble import RandomForestClassifier

# Create a Random Forest model
rf_model = RandomForestClassifier(n_estimators=100)

# Fit the model on the training data
rf_model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = rf_model.predict(X_test1)

# Evaluate the model
accuracy1 = accuracy_score(y_test1, y_pred)
conf_matrix = confusion_matrix(y_test1, y_pred)
classification_rep = classification_report(y_test1, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy1)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
rf_model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = rf_model.predict(X_test2)

# Evaluate the model
accuracy2 = accuracy_score(y_test2, y_pred)
conf_matrix = confusion_matrix(y_test2, y_pred)
classification_rep = classification_report(y_test2, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy2)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)


# Fit the model on the training data
rf_model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = rf_model.predict(X_test3)

# Evaluate the model
accuracy2 = accuracy_score(y_test3, y_pred)
conf_matrix = confusion_matrix(y_test3, y_pred)
classification_rep = classification_report(y_test3, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy3)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

print("average mean of accuracy three trials:", (accuracy1+accuracy2+accuracy3)/3)

Accuracy: 0.851287220192348
Confusion Matrix:
 [[16730  1259]
 [ 2282  3540]]
Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.93      0.90     17989
           1       0.74      0.61      0.67      5822

    accuracy                           0.85     23811
   macro avg       0.81      0.77      0.79     23811
weighted avg       0.85      0.85      0.85     23811

Accuracy: 0.8528831212464827
Confusion Matrix:
 [[16743  1324]
 [ 2179  3565]]
Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.93      0.91     18067
           1       0.73      0.62      0.67      5744

    accuracy                           0.85     23811
   macro avg       0.81      0.77      0.79     23811
weighted avg       0.85      0.85      0.85     23811

Accuracy: 0.7978245348788375
Confusion Matrix:
 [[16673  1366]
 [ 2143  3629]]
Classification Report:
               precision    recall  f1-

In [173]:

# Create a KNN model with best neighbor found
knn_model = KNeighborsClassifier(n_neighbors=best_neighbor)

# Fit the model on the training data
knn_model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = knn_model.predict(X_test1)

# Evaluate the model
accuracy1 = accuracy_score(y_test1, y_pred)
conf_matrix = confusion_matrix(y_test1, y_pred)
classification_rep = classification_report(y_test1, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy1)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
knn_model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = knn_model.predict(X_test2)

# Evaluate the model
accuracy2 = accuracy_score(y_test2, y_pred)
conf_matrix = confusion_matrix(y_test2, y_pred)
classification_rep = classification_report(y_test2, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy2)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
knn_model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = knn_model.predict(X_test3)

# Evaluate the model
accuracy3 = accuracy_score(y_test3, y_pred)
conf_matrix = confusion_matrix(y_test3, y_pred)
classification_rep = classification_report(y_test3, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy3)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

print("average mean of accuracy three trials:", (accuracy1+accuracy2+accuracy3)/3)

Accuracy: 0.7877031624039309
Confusion Matrix:
 [[17300   689]
 [ 4366  1456]]
Classification Report:
               precision    recall  f1-score   support

           0       0.80      0.96      0.87     17989
           1       0.68      0.25      0.37      5822

    accuracy                           0.79     23811
   macro avg       0.74      0.61      0.62     23811
weighted avg       0.77      0.79      0.75     23811

Accuracy: 0.7904749905505859
Confusion Matrix:
 [[17309   758]
 [ 4231  1513]]
Classification Report:
               precision    recall  f1-score   support

           0       0.80      0.96      0.87     18067
           1       0.67      0.26      0.38      5744

    accuracy                           0.79     23811
   macro avg       0.73      0.61      0.63     23811
weighted avg       0.77      0.79      0.75     23811

Accuracy: 0.7909369619083617
Confusion Matrix:
 [[17325   714]
 [ 4264  1508]]
Classification Report:
               precision    recall  f1

In [174]:
# 30% training, 70% testing partition
X_train1, X_test1, y_train1, y_test1 = train_test_split(X, y, test_size=0.7, shuffle= True, random_state=42)
X_train2, X_test2, y_train2, y_test2 = train_test_split(X, y, test_size=0.7, shuffle= True, random_state=10)
X_train3, X_test3, y_train3, y_test3 = train_test_split(X, y, test_size=0.7, shuffle= True, random_state=35)

# Print the shape of the resulting sets
print("X_train shape:", X_train1.shape, X_train2.shape, X_train3.shape)
print("X_test shape:", X_test1.shape, X_test2.shape, X_test3.shape)
print("y_train shape:", y_train1.shape, y_train2.shape, y_train3.shape)
print("y_test shape:", y_test1.shape, y_test2.shape, y_test3.shape)

X_train shape: (14286, 100) (14286, 100) (14286, 100)
X_test shape: (33335, 100) (33335, 100) (33335, 100)
y_train shape: (14286,) (14286,) (14286,)
y_test shape: (33335,) (33335,) (33335,)


In [175]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
model = LinearRegression()


# Fit the model on the training data
model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = model.predict(X_test1)

# Evaluate the model
mse1 = mean_squared_error(y_test1, y_pred)
rmse = np.sqrt(mse1)
r2 = r2_score(y_test1, y_pred)

# Print the evaluation metrics
print("Mean Squared Error1:", mse1)
print("Root Mean Squared Error:", rmse)
print("R-squared:", r2)

# Create a linear regression model
model = LinearRegression()

# Fit the model on the training data
model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = model.predict(X_test2)

# Evaluate the model
mse2 = mean_squared_error(y_test2, y_pred)
rmse = np.sqrt(mse2)
r2 = r2_score(y_test2, y_pred)

# Print the evaluation metrics
print("Mean Squared Error2:", mse2)
print("Root Mean Squared Error:", rmse)
print("R-squared:", r2)

# Create a linear regression model
model = LinearRegression()

# Fit the model on the training data
model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = model.predict(X_test3)

# Evaluate the model
mse3 = mean_squared_error(y_test3, y_pred)
rmse = np.sqrt(mse3)
r2 = r2_score(y_test3, y_pred)

# Print the evaluation metrics
print("Mean Squared Error3:", mse3)
print("Root Mean Squared Error:", rmse)
print("R-squared:", r2)

print("Average Mean Squared Error of 3 trials:", (mse1+mse2+mse3)/3)

Mean Squared Error1: 0.11736153766801749
Root Mean Squared Error: 0.3425807024162591
R-squared: 0.36229463054578104
Mean Squared Error2: 0.11726632989097546
Root Mean Squared Error: 0.3424417175096741
R-squared: 0.3603957368634936
Mean Squared Error3: 0.11689162514299879
Root Mean Squared Error: 0.3418941724320536
R-squared: 0.36254718561458965
Average Mean Squared Error of 3 trials: 0.11717316423399725


In [176]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Create a logistic regression model
logistic_model = LogisticRegression()

# Fit the model on the training data
logistic_model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = logistic_model.predict(X_test1)

# Evaluate the model
accuracy1 = accuracy_score(y_test1, y_pred)
conf_matrix = confusion_matrix(y_test1, y_pred)
classification_rep = classification_report(y_test1, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy1)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)



# Create a logistic regression model
logistic_model = LogisticRegression()

# Fit the model on the training data
logistic_model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = logistic_model.predict(X_test2)

# Evaluate the model
accuracy2 = accuracy_score(y_test2, y_pred)
conf_matrix = confusion_matrix(y_test2, y_pred)
classification_rep = classification_report(y_test2, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy2)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)



# Create a logistic regression model
logistic_model = LogisticRegression()

# Fit the model on the training data
logistic_model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = logistic_model.predict(X_test3)

# Evaluate the model
accuracy3 = accuracy_score(y_test3, y_pred)
conf_matrix = confusion_matrix(y_test3, y_pred)
classification_rep = classification_report(y_test3, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy3)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep, )


print("average mean of accuracy three trials:", (accuracy1+accuracy2+accuracy3)/3)

Accuracy: 0.7967901604919754
Confusion Matrix:
 [[24410   819]
 [ 5955  2151]]
Classification Report:
               precision    recall  f1-score   support

           0       0.80      0.97      0.88     25229
           1       0.72      0.27      0.39      8106

    accuracy                           0.80     33335
   macro avg       0.76      0.62      0.63     33335
weighted avg       0.78      0.80      0.76     33335

Accuracy: 0.7952002399880006
Confusion Matrix:
 [[24351   923]
 [ 5904  2157]]
Classification Report:
               precision    recall  f1-score   support

           0       0.80      0.96      0.88     25274
           1       0.70      0.27      0.39      8061

    accuracy                           0.80     33335
   macro avg       0.75      0.62      0.63     33335
weighted avg       0.78      0.80      0.76     33335

Accuracy: 0.7971201439928004
Confusion Matrix:
 [[24439   833]
 [ 5930  2133]]
Classification Report:
               precision    recall  f1

In [177]:
from sklearn.svm import SVC
# Create an SVM model
svm_model = SVC()

# Fit the model on the training data
svm_model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = svm_model.predict(X_test1)

# Evaluate the model
accuracy1 = accuracy_score(y_test1, y_pred)
conf_matrix = confusion_matrix(y_test1, y_pred)
classification_rep = classification_report(y_test1, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy1)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
svm_model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = svm_model.predict(X_test2)

# Evaluate the model
accuracy2 = accuracy_score(y_test2, y_pred)
conf_matrix = confusion_matrix(y_test2, y_pred)
classification_rep = classification_report(y_test2, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy2)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
svm_model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = svm_model.predict(X_test3)

# Evaluate the model
accuracy1 = accuracy_score(y_test3, y_pred)
conf_matrix = confusion_matrix(y_test3, y_pred)
classification_rep = classification_report(y_test3, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy3)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

print("average mean of accuracy three trials:", (accuracy1+accuracy2+accuracy3)/3)


Accuracy: 0.783920803959802
Confusion Matrix:
 [[25218    11]
 [ 7192   914]]
Classification Report:
               precision    recall  f1-score   support

           0       0.78      1.00      0.88     25229
           1       0.99      0.11      0.20      8106

    accuracy                           0.78     33335
   macro avg       0.88      0.56      0.54     33335
weighted avg       0.83      0.78      0.71     33335

Accuracy: 0.7863506824658767
Confusion Matrix:
 [[25261    13]
 [ 7109   952]]
Classification Report:
               precision    recall  f1-score   support

           0       0.78      1.00      0.88     25274
           1       0.99      0.12      0.21      8061

    accuracy                           0.79     33335
   macro avg       0.88      0.56      0.54     33335
weighted avg       0.83      0.79      0.72     33335

Accuracy: 0.7971201439928004
Confusion Matrix:
 [[25261    11]
 [ 7118   945]]
Classification Report:
               precision    recall  f1-

In [178]:
from sklearn.ensemble import RandomForestClassifier

# Create a Random Forest model
rf_model = RandomForestClassifier(n_estimators=100)

# Fit the model on the training data
rf_model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = rf_model.predict(X_test1)

# Evaluate the model
accuracy1 = accuracy_score(y_test1, y_pred)
conf_matrix = confusion_matrix(y_test1, y_pred)
classification_rep = classification_report(y_test1, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy1)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
rf_model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = rf_model.predict(X_test2)

# Evaluate the model
accuracy2 = accuracy_score(y_test2, y_pred)
conf_matrix = confusion_matrix(y_test2, y_pred)
classification_rep = classification_report(y_test2, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy2)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)


# Fit the model on the training data
rf_model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = rf_model.predict(X_test3)

# Evaluate the model
accuracy2 = accuracy_score(y_test3, y_pred)
conf_matrix = confusion_matrix(y_test3, y_pred)
classification_rep = classification_report(y_test3, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy3)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

print("average mean of accuracy three trials:", (accuracy1+accuracy2+accuracy3)/3)

Accuracy: 0.8507874606269686
Confusion Matrix:
 [[23363  1866]
 [ 3108  4998]]
Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.93      0.90     25229
           1       0.73      0.62      0.67      8106

    accuracy                           0.85     33335
   macro avg       0.81      0.77      0.79     33335
weighted avg       0.85      0.85      0.85     33335

Accuracy: 0.8510874456277187
Confusion Matrix:
 [[23439  1835]
 [ 3129  4932]]
Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.93      0.90     25274
           1       0.73      0.61      0.67      8061

    accuracy                           0.85     33335
   macro avg       0.81      0.77      0.78     33335
weighted avg       0.85      0.85      0.85     33335

Accuracy: 0.7971201439928004
Confusion Matrix:
 [[23410  1862]
 [ 3048  5015]]
Classification Report:
               precision    recall  f1

In [179]:

# Create a KNN model with best neighbor found
knn_model = KNeighborsClassifier(n_neighbors=best_neighbor)

# Fit the model on the training data
knn_model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = knn_model.predict(X_test1)

# Evaluate the model
accuracy1 = accuracy_score(y_test1, y_pred)
conf_matrix = confusion_matrix(y_test1, y_pred)
classification_rep = classification_report(y_test1, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy1)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
knn_model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = knn_model.predict(X_test2)

# Evaluate the model
accuracy2 = accuracy_score(y_test2, y_pred)
conf_matrix = confusion_matrix(y_test2, y_pred)
classification_rep = classification_report(y_test2, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy2)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
knn_model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = knn_model.predict(X_test3)

# Evaluate the model
accuracy3 = accuracy_score(y_test3, y_pred)
conf_matrix = confusion_matrix(y_test3, y_pred)
classification_rep = classification_report(y_test3, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy3)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

print("average mean of accuracy three trials:", (accuracy1+accuracy2+accuracy3)/3)

Accuracy: 0.7884805759712015
Confusion Matrix:
 [[24437   792]
 [ 6259  1847]]
Classification Report:
               precision    recall  f1-score   support

           0       0.80      0.97      0.87     25229
           1       0.70      0.23      0.34      8106

    accuracy                           0.79     33335
   macro avg       0.75      0.60      0.61     33335
weighted avg       0.77      0.79      0.75     33335

Accuracy: 0.7870706464676767
Confusion Matrix:
 [[24275   999]
 [ 6099  1962]]
Classification Report:
               precision    recall  f1-score   support

           0       0.80      0.96      0.87     25274
           1       0.66      0.24      0.36      8061

    accuracy                           0.79     33335
   macro avg       0.73      0.60      0.61     33335
weighted avg       0.77      0.79      0.75     33335

Accuracy: 0.7895905204739763
Confusion Matrix:
 [[24381   891]
 [ 6123  1940]]
Classification Report:
               precision    recall  f1

In [180]:
# fetch dataset 
heart_disease = fetch_ucirepo(id=45) 
  
# data (as pandas dataframes) 
heart_X = heart_disease.data.features 
heart_y = heart_disease.data.targets 
  
  
# variable information 
#print(heart_disease.variables) 
heart_y
# Combine features and target into one DataFrame
heart_df = pd.concat([heart_X, heart_y], axis=1)

# Drop rows with missing values
heart_df.dropna(inplace=True)
heart_df.reset_index(drop=True, inplace=True)
heart_df

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,num
0,63,1,1,145,233,1,2,150,0,2.3,3,0.0,6.0,0
1,67,1,4,160,286,0,2,108,1,1.5,2,3.0,3.0,2
2,67,1,4,120,229,0,2,129,1,2.6,2,2.0,7.0,1
3,37,1,3,130,250,0,0,187,0,3.5,3,0.0,3.0,0
4,41,0,2,130,204,0,2,172,0,1.4,1,0.0,3.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
292,57,0,4,140,241,0,0,123,1,0.2,2,0.0,7.0,1
293,45,1,1,110,264,0,0,132,0,1.2,2,0.0,7.0,1
294,68,1,4,144,193,1,0,141,0,3.4,2,2.0,7.0,2
295,57,1,4,130,131,0,0,115,1,1.2,2,1.0,7.0,3


In [181]:
# Separate features (X) and target variable (y)
X = heart_df.drop('num', axis=1)  
y = heart_df['num']

# Split the data into training and testing sets. However, we also need to consider that using first 80% for training,
# rest 20% for testing is different from first 20% for training, rest for testing. So we would 3 times each for 
# every partition and compute average scores to remove potentials of having accidental results. We would set shuffle=true,
# and use different random_state each time.

# 80% training, 20% testing partition
X_train1, X_test1, y_train1, y_test1 = train_test_split(X, y, test_size=0.2, shuffle= True, random_state=42)
X_train2, X_test2, y_train2, y_test2 = train_test_split(X, y, test_size=0.2, shuffle= True, random_state=10)
X_train3, X_test3, y_train3, y_test3 = train_test_split(X, y, test_size=0.2, shuffle= True, random_state=35)

# Print the shape of the resulting sets
print("X_train shape:", X_train1.shape, X_train2.shape, X_train3.shape)
print("X_test shape:", X_test1.shape, X_test2.shape, X_test3.shape)
print("y_train shape:", y_train1.shape, y_train2.shape, y_train3.shape)
print("y_test shape:", y_test1.shape, y_test2.shape, y_test3.shape)


X_train shape: (237, 13) (237, 13) (237, 13)
X_test shape: (60, 13) (60, 13) (60, 13)
y_train shape: (237,) (237,) (237,)
y_test shape: (60,) (60,) (60,)


In [182]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
model = LinearRegression()


# Fit the model on the training data
model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = model.predict(X_test1)

# Evaluate the model
mse1 = mean_squared_error(y_test1, y_pred)
rmse = np.sqrt(mse1)
r2 = r2_score(y_test1, y_pred)

# Print the evaluation metrics
print("Mean Squared Error1:", mse1)
print("Root Mean Squared Error:", rmse)
print("R-squared:", r2)

# Create a linear regression model
model = LinearRegression()

# Fit the model on the training data
model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = model.predict(X_test2)

# Evaluate the model
mse2 = mean_squared_error(y_test2, y_pred)
rmse = np.sqrt(mse2)
r2 = r2_score(y_test2, y_pred)

# Print the evaluation metrics
print("Mean Squared Error2:", mse2)
print("Root Mean Squared Error:", rmse)
print("R-squared:", r2)

# Create a linear regression model
model = LinearRegression()

# Fit the model on the training data
model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = model.predict(X_test3)

# Evaluate the model
mse3 = mean_squared_error(y_test3, y_pred)
rmse = np.sqrt(mse3)
r2 = r2_score(y_test3, y_pred)

# Print the evaluation metrics
print("Mean Squared Error3:", mse3)
print("Root Mean Squared Error:", rmse)
print("R-squared:", r2)

print("Average Mean Squared Error of 3 trials:", (mse1+mse2+mse3)/3)

Mean Squared Error1: 0.8203721866929784
Root Mean Squared Error: 0.9057439962224306
R-squared: 0.4815063426799997
Mean Squared Error2: 0.6966698453679048
Root Mean Squared Error: 0.8346675058775829
R-squared: 0.5672111400648046
Mean Squared Error3: 0.4557153968984596
Root Mean Squared Error: 0.675066957344573
R-squared: 0.6314141925781949
Average Mean Squared Error of 3 trials: 0.6575858096531143


In [183]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Create a logistic regression model
logistic_model = LogisticRegression()

# Fit the model on the training data
logistic_model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = logistic_model.predict(X_test1)

# Evaluate the model
accuracy1 = accuracy_score(y_test1, y_pred)
conf_matrix = confusion_matrix(y_test1, y_pred)
classification_rep = classification_report(y_test1, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy1)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)


# Create a logistic regression model
logistic_model = LogisticRegression()

# Fit the model on the training data
logistic_model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = logistic_model.predict(X_test2)

# Evaluate the model
accuracy2 = accuracy_score(y_test2, y_pred)
conf_matrix = confusion_matrix(y_test2, y_pred)
classification_rep = classification_report(y_test2, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy2)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)



# Create a logistic regression model
logistic_model = LogisticRegression()

# Fit the model on the training data
logistic_model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = logistic_model.predict(X_test3)

# Evaluate the model
accuracy3 = accuracy_score(y_test3, y_pred)
conf_matrix = confusion_matrix(y_test3, y_pred)
classification_rep = classification_report(y_test3, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy3)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep, )


print("average mean of accuracy three trials:", (accuracy1+accuracy2+accuracy3)/3)

Accuracy: 0.6666666666666666
Confusion Matrix:
 [[35  0  0  1  0]
 [ 4  3  1  1  0]
 [ 0  2  1  2  0]
 [ 3  2  1  1  0]
 [ 2  0  1  0  0]]
Classification Report:
               precision    recall  f1-score   support

           0       0.80      0.97      0.88        36
           1       0.43      0.33      0.38         9
           2       0.25      0.20      0.22         5
           3       0.20      0.14      0.17         7
           4       0.00      0.00      0.00         3

    accuracy                           0.67        60
   macro avg       0.33      0.33      0.33        60
weighted avg       0.59      0.67      0.62        60

Accuracy: 0.6333333333333333
Confusion Matrix:
 [[34  1  0  0  0]
 [ 3  3  0  2  0]
 [ 1  3  0  3  0]
 [ 1  3  2  1  0]
 [ 0  2  0  1  0]]
Classification Report:
               precision    recall  f1-score   support

           0       0.87      0.97      0.92        35
           1       0.25      0.38      0.30         8
           2       0.0

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier,

In [184]:
from sklearn.svm import SVC
# Create an SVM model
svm_model = SVC()

# Fit the model on the training data
svm_model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = svm_model.predict(X_test1)

# Evaluate the model
accuracy1 = accuracy_score(y_test1, y_pred)
conf_matrix = confusion_matrix(y_test1, y_pred)
classification_rep = classification_report(y_test1, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy1)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
svm_model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = svm_model.predict(X_test2)

# Evaluate the model
accuracy2 = accuracy_score(y_test2, y_pred)
conf_matrix = confusion_matrix(y_test2, y_pred)
classification_rep = classification_report(y_test2, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy2)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
svm_model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = svm_model.predict(X_test3)

# Evaluate the model
accuracy1 = accuracy_score(y_test3, y_pred)
conf_matrix = confusion_matrix(y_test3, y_pred)
classification_rep = classification_report(y_test3, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy3)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

print("average mean of accuracy three trials:", (accuracy1+accuracy2+accuracy3)/3)


Accuracy: 0.6
Confusion Matrix:
 [[36  0  0  0  0]
 [ 9  0  0  0  0]
 [ 5  0  0  0  0]
 [ 7  0  0  0  0]
 [ 3  0  0  0  0]]
Classification Report:
               precision    recall  f1-score   support

           0       0.60      1.00      0.75        36
           1       0.00      0.00      0.00         9
           2       0.00      0.00      0.00         5
           3       0.00      0.00      0.00         7
           4       0.00      0.00      0.00         3

    accuracy                           0.60        60
   macro avg       0.12      0.20      0.15        60
weighted avg       0.36      0.60      0.45        60

Accuracy: 0.5833333333333334
Confusion Matrix:
 [[35  0  0  0  0]
 [ 8  0  0  0  0]
 [ 7  0  0  0  0]
 [ 7  0  0  0  0]
 [ 3  0  0  0  0]]
Classification Report:
               precision    recall  f1-score   support

           0       0.58      1.00      0.74        35
           1       0.00      0.00      0.00         8
           2       0.00      0.00    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [185]:
from sklearn.ensemble import RandomForestClassifier

# Create a Random Forest model
rf_model = RandomForestClassifier(n_estimators=100)

# Fit the model on the training data
rf_model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = rf_model.predict(X_test1)

# Evaluate the model
accuracy1 = accuracy_score(y_test1, y_pred)
conf_matrix = confusion_matrix(y_test1, y_pred)
classification_rep = classification_report(y_test1, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy1)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
rf_model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = rf_model.predict(X_test2)

# Evaluate the model
accuracy2 = accuracy_score(y_test2, y_pred)
conf_matrix = confusion_matrix(y_test2, y_pred)
classification_rep = classification_report(y_test2, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy2)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)


# Fit the model on the training data
rf_model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = rf_model.predict(X_test3)

# Evaluate the model
accuracy2 = accuracy_score(y_test3, y_pred)
conf_matrix = confusion_matrix(y_test3, y_pred)
classification_rep = classification_report(y_test3, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy3)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

print("average mean of accuracy three trials:", (accuracy1+accuracy2+accuracy3)/3)

Accuracy: 0.6333333333333333
Confusion Matrix:
 [[35  1  0  0  0]
 [ 6  1  2  0  0]
 [ 1  1  2  1  0]
 [ 2  4  0  0  1]
 [ 1  2  0  0  0]]
Classification Report:
               precision    recall  f1-score   support

           0       0.78      0.97      0.86        36
           1       0.11      0.11      0.11         9
           2       0.50      0.40      0.44         5
           3       0.00      0.00      0.00         7
           4       0.00      0.00      0.00         3

    accuracy                           0.63        60
   macro avg       0.28      0.30      0.28        60
weighted avg       0.53      0.63      0.57        60

Accuracy: 0.65
Confusion Matrix:
 [[33  1  1  0  0]
 [ 4  1  1  2  0]
 [ 2  0  2  3  0]
 [ 1  2  1  3  0]
 [ 0  1  1  1  0]]
Classification Report:
               precision    recall  f1-score   support

           0       0.82      0.94      0.88        35
           1       0.20      0.12      0.15         8
           2       0.33      0.29   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [186]:
from sklearn.neighbors import KNeighborsClassifier

from sklearn.model_selection import GridSearchCV

# Assuming 'X' and 'y' are your feature and target variables
param_grid = {'n_neighbors': [1, 3, 5, 7, 9]}  # Adjust the range as needed

knn_model = KNeighborsClassifier()
grid_search = GridSearchCV(knn_model, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X, y)
best_neighbor = grid_search.best_params_['n_neighbors']

# Print the results of the grid search
print("Best n_neighbors:", best_neighbor)


Best n_neighbors: 9


In [187]:

# Create a KNN model with best neighbor found
knn_model = KNeighborsClassifier(n_neighbors=best_neighbor)

# Fit the model on the training data
knn_model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = knn_model.predict(X_test1)

# Evaluate the model
accuracy1 = accuracy_score(y_test1, y_pred)
conf_matrix = confusion_matrix(y_test1, y_pred)
classification_rep = classification_report(y_test1, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy1)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
knn_model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = knn_model.predict(X_test2)

# Evaluate the model
accuracy2 = accuracy_score(y_test2, y_pred)
conf_matrix = confusion_matrix(y_test2, y_pred)
classification_rep = classification_report(y_test2, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy2)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
knn_model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = knn_model.predict(X_test3)

# Evaluate the model
accuracy3 = accuracy_score(y_test3, y_pred)
conf_matrix = confusion_matrix(y_test3, y_pred)
classification_rep = classification_report(y_test3, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy3)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

print("average mean of accuracy three trials:", (accuracy1+accuracy2+accuracy3)/3)

Accuracy: 0.5666666666666667
Confusion Matrix:
 [[33  1  0  2  0]
 [ 6  1  1  1  0]
 [ 3  2  0  0  0]
 [ 7  0  0  0  0]
 [ 3  0  0  0  0]]
Classification Report:
               precision    recall  f1-score   support

           0       0.63      0.92      0.75        36
           1       0.25      0.11      0.15         9
           2       0.00      0.00      0.00         5
           3       0.00      0.00      0.00         7
           4       0.00      0.00      0.00         3

    accuracy                           0.57        60
   macro avg       0.18      0.21      0.18        60
weighted avg       0.42      0.57      0.47        60

Accuracy: 0.48333333333333334
Confusion Matrix:
 [[28  6  1  0  0]
 [ 5  1  1  1  0]
 [ 5  2  0  0  0]
 [ 6  0  1  0  0]
 [ 3  0  0  0  0]]
Classification Report:
               precision    recall  f1-score   support

           0       0.60      0.80      0.68        35
           1       0.11      0.12      0.12         8
           2       0.

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [188]:
# 50% training, 50% testing partition
X_train1, X_test1, y_train1, y_test1 = train_test_split(X, y, test_size=0.5, shuffle= True, random_state=42)
X_train2, X_test2, y_train2, y_test2 = train_test_split(X, y, test_size=0.5, shuffle= True, random_state=10)
X_train3, X_test3, y_train3, y_test3 = train_test_split(X, y, test_size=0.5, shuffle= True, random_state=35)

# Print the shape of the resulting sets
print("X_train shape:", X_train1.shape, X_train2.shape, X_train3.shape)
print("X_test shape:", X_test1.shape, X_test2.shape, X_test3.shape)
print("y_train shape:", y_train1.shape, y_train2.shape, y_train3.shape)
print("y_test shape:", y_test1.shape, y_test2.shape, y_test3.shape)

X_train shape: (148, 13) (148, 13) (148, 13)
X_test shape: (149, 13) (149, 13) (149, 13)
y_train shape: (148,) (148,) (148,)
y_test shape: (149,) (149,) (149,)


In [189]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
model = LinearRegression()


# Fit the model on the training data
model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = model.predict(X_test1)

# Evaluate the model
mse1 = mean_squared_error(y_test1, y_pred)
rmse = np.sqrt(mse1)
r2 = r2_score(y_test1, y_pred)

# Print the evaluation metrics
print("Mean Squared Error1:", mse1)
print("Root Mean Squared Error:", rmse)
print("R-squared:", r2)

# Create a linear regression model
model = LinearRegression()

# Fit the model on the training data
model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = model.predict(X_test2)

# Evaluate the model
mse2 = mean_squared_error(y_test2, y_pred)
rmse = np.sqrt(mse2)
r2 = r2_score(y_test2, y_pred)

# Print the evaluation metrics
print("Mean Squared Error2:", mse2)
print("Root Mean Squared Error:", rmse)
print("R-squared:", r2)

# Create a linear regression model
model = LinearRegression()

# Fit the model on the training data
model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = model.predict(X_test3)

# Evaluate the model
mse3 = mean_squared_error(y_test3, y_pred)
rmse = np.sqrt(mse3)
r2 = r2_score(y_test3, y_pred)

# Print the evaluation metrics
print("Mean Squared Error3:", mse3)
print("Root Mean Squared Error:", rmse)
print("R-squared:", r2)

print("Average Mean Squared Error of 3 trials:", (mse1+mse2+mse3)/3)

Mean Squared Error1: 0.7690552519852273
Root Mean Squared Error: 0.876957953373608
R-squared: 0.5046479154774275
Mean Squared Error2: 0.8752143579212527
Root Mean Squared Error: 0.9355289188054278
R-squared: 0.41268788658536637
Mean Squared Error3: 0.7457771841259108
Root Mean Squared Error: 0.8635839184039446
R-squared: 0.526482890099544
Average Mean Squared Error of 3 trials: 0.7966822646774636


In [190]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Create a logistic regression model
logistic_model = LogisticRegression()

# Fit the model on the training data
logistic_model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = logistic_model.predict(X_test1)

# Evaluate the model
accuracy1 = accuracy_score(y_test1, y_pred)
conf_matrix = confusion_matrix(y_test1, y_pred)
classification_rep = classification_report(y_test1, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy1)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)


# Create a logistic regression model
logistic_model = LogisticRegression()

# Fit the model on the training data
logistic_model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = logistic_model.predict(X_test2)

# Evaluate the model
accuracy2 = accuracy_score(y_test2, y_pred)
conf_matrix = confusion_matrix(y_test2, y_pred)
classification_rep = classification_report(y_test2, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy2)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)



# Create a logistic regression model
logistic_model = LogisticRegression()

# Fit the model on the training data
logistic_model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = logistic_model.predict(X_test3)

# Evaluate the model
accuracy3 = accuracy_score(y_test3, y_pred)
conf_matrix = confusion_matrix(y_test3, y_pred)
classification_rep = classification_report(y_test3, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy3)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep, )


print("average mean of accuracy three trials:", (accuracy1+accuracy2+accuracy3)/3)

Accuracy: 0.5369127516778524
Confusion Matrix:
 [[66  1  1  2  0]
 [19  4  3  7  0]
 [ 2  5  5  7  0]
 [ 5  6  4  5  0]
 [ 2  2  2  1  0]]
Classification Report:
               precision    recall  f1-score   support

           0       0.70      0.94      0.80        70
           1       0.22      0.12      0.16        33
           2       0.33      0.26      0.29        19
           3       0.23      0.25      0.24        20
           4       0.00      0.00      0.00         7

    accuracy                           0.54       149
   macro avg       0.30      0.32      0.30       149
weighted avg       0.45      0.54      0.48       149

Accuracy: 0.5906040268456376
Confusion Matrix:
 [[70  6  3  3  6]
 [10  5  4  3  1]
 [ 1  4  6  3  1]
 [ 2  3  4  7  1]
 [ 1  0  1  4  0]]
Classification Report:
               precision    recall  f1-score   support

           0       0.83      0.80      0.81        88
           1       0.28      0.22      0.24        23
           2       0.3

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_

In [191]:
from sklearn.svm import SVC
# Create an SVM model
svm_model = SVC()

# Fit the model on the training data
svm_model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = svm_model.predict(X_test1)

# Evaluate the model
accuracy1 = accuracy_score(y_test1, y_pred)
conf_matrix = confusion_matrix(y_test1, y_pred)
classification_rep = classification_report(y_test1, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy1)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
svm_model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = svm_model.predict(X_test2)

# Evaluate the model
accuracy2 = accuracy_score(y_test2, y_pred)
conf_matrix = confusion_matrix(y_test2, y_pred)
classification_rep = classification_report(y_test2, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy2)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
svm_model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = svm_model.predict(X_test3)

# Evaluate the model
accuracy1 = accuracy_score(y_test3, y_pred)
conf_matrix = confusion_matrix(y_test3, y_pred)
classification_rep = classification_report(y_test3, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy3)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

print("average mean of accuracy three trials:", (accuracy1+accuracy2+accuracy3)/3)


Accuracy: 0.4697986577181208
Confusion Matrix:
 [[70  0  0  0  0]
 [33  0  0  0  0]
 [19  0  0  0  0]
 [20  0  0  0  0]
 [ 7  0  0  0  0]]
Classification Report:
               precision    recall  f1-score   support

           0       0.47      1.00      0.64        70
           1       0.00      0.00      0.00        33
           2       0.00      0.00      0.00        19
           3       0.00      0.00      0.00        20
           4       0.00      0.00      0.00         7

    accuracy                           0.47       149
   macro avg       0.09      0.20      0.13       149
weighted avg       0.22      0.47      0.30       149

Accuracy: 0.5906040268456376
Confusion Matrix:
 [[88  0  0  0  0]
 [23  0  0  0  0]
 [15  0  0  0  0]
 [17  0  0  0  0]
 [ 6  0  0  0  0]]
Classification Report:
               precision    recall  f1-score   support

           0       0.59      1.00      0.74        88
           1       0.00      0.00      0.00        23
           2       0.0

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [192]:
from sklearn.ensemble import RandomForestClassifier

# Create a Random Forest model
rf_model = RandomForestClassifier(n_estimators=100)

# Fit the model on the training data
rf_model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = rf_model.predict(X_test1)

# Evaluate the model
accuracy1 = accuracy_score(y_test1, y_pred)
conf_matrix = confusion_matrix(y_test1, y_pred)
classification_rep = classification_report(y_test1, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy1)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
rf_model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = rf_model.predict(X_test2)

# Evaluate the model
accuracy2 = accuracy_score(y_test2, y_pred)
conf_matrix = confusion_matrix(y_test2, y_pred)
classification_rep = classification_report(y_test2, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy2)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)


# Fit the model on the training data
rf_model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = rf_model.predict(X_test3)

# Evaluate the model
accuracy2 = accuracy_score(y_test3, y_pred)
conf_matrix = confusion_matrix(y_test3, y_pred)
classification_rep = classification_report(y_test3, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy3)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

print("average mean of accuracy three trials:", (accuracy1+accuracy2+accuracy3)/3)

Accuracy: 0.5100671140939598
Confusion Matrix:
 [[68  1  0  1  0]
 [21  0  5  7  0]
 [ 9  0  7  2  1]
 [ 9  2  7  1  1]
 [ 3  1  3  0  0]]
Classification Report:
               precision    recall  f1-score   support

           0       0.62      0.97      0.76        70
           1       0.00      0.00      0.00        33
           2       0.32      0.37      0.34        19
           3       0.09      0.05      0.06        20
           4       0.00      0.00      0.00         7

    accuracy                           0.51       149
   macro avg       0.21      0.28      0.23       149
weighted avg       0.34      0.51      0.41       149

Accuracy: 0.5838926174496645
Confusion Matrix:
 [[73 12  2  1  0]
 [ 9  6  3  5  0]
 [ 2  6  4  3  0]
 [ 2  7  3  4  1]
 [ 1  3  0  2  0]]
Classification Report:
               precision    recall  f1-score   support

           0       0.84      0.83      0.83        88
           1       0.18      0.26      0.21        23
           2       0.3

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [193]:

# Create a KNN model with best neighbor found
knn_model = KNeighborsClassifier(n_neighbors=best_neighbor)

# Fit the model on the training data
knn_model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = knn_model.predict(X_test1)

# Evaluate the model
accuracy1 = accuracy_score(y_test1, y_pred)
conf_matrix = confusion_matrix(y_test1, y_pred)
classification_rep = classification_report(y_test1, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy1)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
knn_model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = knn_model.predict(X_test2)

# Evaluate the model
accuracy2 = accuracy_score(y_test2, y_pred)
conf_matrix = confusion_matrix(y_test2, y_pred)
classification_rep = classification_report(y_test2, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy2)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
knn_model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = knn_model.predict(X_test3)

# Evaluate the model
accuracy3 = accuracy_score(y_test3, y_pred)
conf_matrix = confusion_matrix(y_test3, y_pred)
classification_rep = classification_report(y_test3, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy3)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

print("average mean of accuracy three trials:", (accuracy1+accuracy2+accuracy3)/3)

Accuracy: 0.44966442953020136
Confusion Matrix:
 [[67  2  0  1  0]
 [29  0  1  3  0]
 [14  1  0  4  0]
 [17  2  1  0  0]
 [ 7  0  0  0  0]]
Classification Report:
               precision    recall  f1-score   support

           0       0.50      0.96      0.66        70
           1       0.00      0.00      0.00        33
           2       0.00      0.00      0.00        19
           3       0.00      0.00      0.00        20
           4       0.00      0.00      0.00         7

    accuracy                           0.45       149
   macro avg       0.10      0.19      0.13       149
weighted avg       0.23      0.45      0.31       149

Accuracy: 0.5167785234899329
Confusion Matrix:
 [[73 11  3  1  0]
 [13  3  3  4  0]
 [11  1  0  3  0]
 [11  4  0  1  1]
 [ 2  2  2  0  0]]
Classification Report:
               precision    recall  f1-score   support

           0       0.66      0.83      0.74        88
           1       0.14      0.13      0.14        23
           2       0.

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [194]:
# 30% training, 70% testing partition
X_train1, X_test1, y_train1, y_test1 = train_test_split(X, y, test_size=0.7, shuffle= True, random_state=42)
X_train2, X_test2, y_train2, y_test2 = train_test_split(X, y, test_size=0.7, shuffle= True, random_state=10)
X_train3, X_test3, y_train3, y_test3 = train_test_split(X, y, test_size=0.7, shuffle= True, random_state=35)

# Print the shape of the resulting sets
print("X_train shape:", X_train1.shape, X_train2.shape, X_train3.shape)
print("X_test shape:", X_test1.shape, X_test2.shape, X_test3.shape)
print("y_train shape:", y_train1.shape, y_train2.shape, y_train3.shape)
print("y_test shape:", y_test1.shape, y_test2.shape, y_test3.shape)

X_train shape: (89, 13) (89, 13) (89, 13)
X_test shape: (208, 13) (208, 13) (208, 13)
y_train shape: (89,) (89,) (89,)
y_test shape: (208,) (208,) (208,)


In [195]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
model = LinearRegression()


# Fit the model on the training data
model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = model.predict(X_test1)

# Evaluate the model
mse1 = mean_squared_error(y_test1, y_pred)
rmse = np.sqrt(mse1)
r2 = r2_score(y_test1, y_pred)

# Print the evaluation metrics
print("Mean Squared Error1:", mse1)
print("Root Mean Squared Error:", rmse)
print("R-squared:", r2)

# Create a linear regression model
model = LinearRegression()

# Fit the model on the training data
model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = model.predict(X_test2)

# Evaluate the model
mse2 = mean_squared_error(y_test2, y_pred)
rmse = np.sqrt(mse2)
r2 = r2_score(y_test2, y_pred)

# Print the evaluation metrics
print("Mean Squared Error2:", mse2)
print("Root Mean Squared Error:", rmse)
print("R-squared:", r2)

# Create a linear regression model
model = LinearRegression()

# Fit the model on the training data
model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = model.predict(X_test3)

# Evaluate the model
mse3 = mean_squared_error(y_test3, y_pred)
rmse = np.sqrt(mse3)
r2 = r2_score(y_test3, y_pred)

# Print the evaluation metrics
print("Mean Squared Error3:", mse3)
print("Root Mean Squared Error:", rmse)
print("R-squared:", r2)

print("Average Mean Squared Error of 3 trials:", (mse1+mse2+mse3)/3)

Mean Squared Error1: 0.7961364821251968
Root Mean Squared Error: 0.8922648049347216
R-squared: 0.4586992588215909
Mean Squared Error2: 0.8803855149184874
Root Mean Squared Error: 0.9382886096071333
R-squared: 0.3791625414837013
Mean Squared Error3: 0.7947988736957152
Root Mean Squared Error: 0.8915149318411415
R-squared: 0.496917697304042
Average Mean Squared Error of 3 trials: 0.8237736235797998


In [196]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Create a logistic regression model
logistic_model = LogisticRegression()

# Fit the model on the training data
logistic_model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = logistic_model.predict(X_test1)

# Evaluate the model
accuracy1 = accuracy_score(y_test1, y_pred)
conf_matrix = confusion_matrix(y_test1, y_pred)
classification_rep = classification_report(y_test1, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy1)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)


# Create a logistic regression model
logistic_model = LogisticRegression()

# Fit the model on the training data
logistic_model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = logistic_model.predict(X_test2)

# Evaluate the model
accuracy2 = accuracy_score(y_test2, y_pred)
conf_matrix = confusion_matrix(y_test2, y_pred)
classification_rep = classification_report(y_test2, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy2)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)



# Create a logistic regression model
logistic_model = LogisticRegression()

# Fit the model on the training data
logistic_model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = logistic_model.predict(X_test3)

# Evaluate the model
accuracy3 = accuracy_score(y_test3, y_pred)
conf_matrix = confusion_matrix(y_test3, y_pred)
classification_rep = classification_report(y_test3, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy3)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep, )


print("average mean of accuracy three trials:", (accuracy1+accuracy2+accuracy3)/3)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Accuracy: 0.5769230769230769
Confusion Matrix:
 [[102   0   3   0   0]
 [ 28   3   8   5   0]
 [ 10   3   6   6   0]
 [  9   3   5   9   0]
 [  2   1   3   2   0]]
Classification Report:
               precision    recall  f1-score   support

           0       0.68      0.97      0.80       105
           1       0.30      0.07      0.11        44
           2       0.24      0.24      0.24        25
           3       0.41      0.35      0.38        26
           4       0.00      0.00      0.00         8

    accuracy                           0.58       208
   macro avg       0.32      0.33      0.30       208
weighted avg       0.48      0.58      0.50       208

Accuracy: 0.5817307692307693
Confusion Matrix:
 [[94 10  5  3  7]
 [15 10  8  3  1]
 [ 1  4 10  5  1]
 [ 2  6  7  7  2]
 [ 2  1  3  1  0]]
Classification Report:
               precision    recall  f1-score   support

           0       0.82      0.79      0.81       119
           1       0.32      0.27      0.29        

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [197]:
from sklearn.svm import SVC
# Create an SVM model
svm_model = SVC()

# Fit the model on the training data
svm_model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = svm_model.predict(X_test1)

# Evaluate the model
accuracy1 = accuracy_score(y_test1, y_pred)
conf_matrix = confusion_matrix(y_test1, y_pred)
classification_rep = classification_report(y_test1, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy1)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
svm_model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = svm_model.predict(X_test2)

# Evaluate the model
accuracy2 = accuracy_score(y_test2, y_pred)
conf_matrix = confusion_matrix(y_test2, y_pred)
classification_rep = classification_report(y_test2, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy2)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
svm_model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = svm_model.predict(X_test3)

# Evaluate the model
accuracy1 = accuracy_score(y_test3, y_pred)
conf_matrix = confusion_matrix(y_test3, y_pred)
classification_rep = classification_report(y_test3, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy3)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

print("average mean of accuracy three trials:", (accuracy1+accuracy2+accuracy3)/3)


Accuracy: 0.5048076923076923
Confusion Matrix:
 [[105   0   0   0   0]
 [ 44   0   0   0   0]
 [ 25   0   0   0   0]
 [ 26   0   0   0   0]
 [  8   0   0   0   0]]
Classification Report:
               precision    recall  f1-score   support

           0       0.50      1.00      0.67       105
           1       0.00      0.00      0.00        44
           2       0.00      0.00      0.00        25
           3       0.00      0.00      0.00        26
           4       0.00      0.00      0.00         8

    accuracy                           0.50       208
   macro avg       0.10      0.20      0.13       208
weighted avg       0.25      0.50      0.34       208

Accuracy: 0.5721153846153846
Confusion Matrix:
 [[119   0   0   0   0]
 [ 37   0   0   0   0]
 [ 21   0   0   0   0]
 [ 24   0   0   0   0]
 [  7   0   0   0   0]]
Classification Report:
               precision    recall  f1-score   support

           0       0.57      1.00      0.73       119
           1       0.00   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [198]:
from sklearn.ensemble import RandomForestClassifier

# Create a Random Forest model
rf_model = RandomForestClassifier(n_estimators=100)

# Fit the model on the training data
rf_model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = rf_model.predict(X_test1)

# Evaluate the model
accuracy1 = accuracy_score(y_test1, y_pred)
conf_matrix = confusion_matrix(y_test1, y_pred)
classification_rep = classification_report(y_test1, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy1)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
rf_model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = rf_model.predict(X_test2)

# Evaluate the model
accuracy2 = accuracy_score(y_test2, y_pred)
conf_matrix = confusion_matrix(y_test2, y_pred)
classification_rep = classification_report(y_test2, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy2)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)


# Fit the model on the training data
rf_model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = rf_model.predict(X_test3)

# Evaluate the model
accuracy2 = accuracy_score(y_test3, y_pred)
conf_matrix = confusion_matrix(y_test3, y_pred)
classification_rep = classification_report(y_test3, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy3)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

print("average mean of accuracy three trials:", (accuracy1+accuracy2+accuracy3)/3)

Accuracy: 0.5576923076923077
Confusion Matrix:
 [[102   1   2   0   0]
 [ 31   0   9   4   0]
 [ 11   2   9   3   0]
 [ 10   2   8   4   2]
 [  2   1   3   1   1]]
Classification Report:
               precision    recall  f1-score   support

           0       0.65      0.97      0.78       105
           1       0.00      0.00      0.00        44
           2       0.29      0.36      0.32        25
           3       0.33      0.15      0.21        26
           4       0.33      0.12      0.18         8

    accuracy                           0.56       208
   macro avg       0.32      0.32      0.30       208
weighted avg       0.42      0.56      0.47       208

Accuracy: 0.6153846153846154
Confusion Matrix:
 [[108   7   2   2   0]
 [ 22   3   4   7   1]
 [  2   4  10   4   1]
 [  3   6   7   7   1]
 [  1   1   2   3   0]]
Classification Report:
               precision    recall  f1-score   support

           0       0.79      0.91      0.85       119
           1       0.14   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [199]:

# Create a KNN model with best neighbor found
knn_model = KNeighborsClassifier(n_neighbors=best_neighbor)

# Fit the model on the training data
knn_model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = knn_model.predict(X_test1)

# Evaluate the model
accuracy1 = accuracy_score(y_test1, y_pred)
conf_matrix = confusion_matrix(y_test1, y_pred)
classification_rep = classification_report(y_test1, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy1)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
knn_model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = knn_model.predict(X_test2)

# Evaluate the model
accuracy2 = accuracy_score(y_test2, y_pred)
conf_matrix = confusion_matrix(y_test2, y_pred)
classification_rep = classification_report(y_test2, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy2)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
knn_model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = knn_model.predict(X_test3)

# Evaluate the model
accuracy3 = accuracy_score(y_test3, y_pred)
conf_matrix = confusion_matrix(y_test3, y_pred)
classification_rep = classification_report(y_test3, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy3)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

print("average mean of accuracy three trials:", (accuracy1+accuracy2+accuracy3)/3)

Accuracy: 0.4567307692307692
Confusion Matrix:
 [[92 10  3  0  0]
 [40  0  2  2  0]
 [21  0  3  1  0]
 [25  0  1  0  0]
 [ 7  1  0  0  0]]
Classification Report:
               precision    recall  f1-score   support

           0       0.50      0.88      0.63       105
           1       0.00      0.00      0.00        44
           2       0.33      0.12      0.18        25
           3       0.00      0.00      0.00        26
           4       0.00      0.00      0.00         8

    accuracy                           0.46       208
   macro avg       0.17      0.20      0.16       208
weighted avg       0.29      0.46      0.34       208

Accuracy: 0.5192307692307693
Confusion Matrix:
 [[102   5  10   2   0]
 [ 29   2   0   5   1]
 [ 17   0   1   3   0]
 [ 18   1   0   3   2]
 [  4   1   2   0   0]]
Classification Report:
               precision    recall  f1-score   support

           0       0.60      0.86      0.71       119
           1       0.22      0.05      0.09        

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [200]:

  
# fetch dataset 
wine = fetch_ucirepo(id=109) 
  
# data (as pandas dataframes) 
wine_X = wine.data.features 
wine_y = wine.data.targets 
  
# Combine features and target into one DataFrame
wine_df = pd.concat([wine_X, wine_y], axis=1)

# Drop rows with missing values
wine_df.dropna(inplace=True)
wine_df.reset_index(drop=True, inplace=True)
wine_df

Unnamed: 0,Alcohol,Malicacid,Ash,Alcalinity_of_ash,Magnesium,Total_phenols,Flavanoids,Nonflavanoid_phenols,Proanthocyanins,Color_intensity,Hue,0D280_0D315_of_diluted_wines,Proline,class
0,14.23,1.71,2.43,15.6,127,2.80,3.06,0.28,2.29,5.64,1.04,3.92,1065,1
1,13.20,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.40,1050,1
2,13.16,2.36,2.67,18.6,101,2.80,3.24,0.30,2.81,5.68,1.03,3.17,1185,1
3,14.37,1.95,2.50,16.8,113,3.85,3.49,0.24,2.18,7.80,0.86,3.45,1480,1
4,13.24,2.59,2.87,21.0,118,2.80,2.69,0.39,1.82,4.32,1.04,2.93,735,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
173,13.71,5.65,2.45,20.5,95,1.68,0.61,0.52,1.06,7.70,0.64,1.74,740,3
174,13.40,3.91,2.48,23.0,102,1.80,0.75,0.43,1.41,7.30,0.70,1.56,750,3
175,13.27,4.28,2.26,20.0,120,1.59,0.69,0.43,1.35,10.20,0.59,1.56,835,3
176,13.17,2.59,2.37,20.0,120,1.65,0.68,0.53,1.46,9.30,0.60,1.62,840,3


In [201]:
# Separate features (X) and target variable (y)
X = wine_df.drop('class', axis=1)  
y = wine_df['class']

# Split the data into training and testing sets. However, we also need to consider that using first 80% for training,
# rest 20% for testing is different from first 20% for training, rest for testing. So we would 3 times each for 
# every partition and compute average scores to remove potentials of having accidental results. We would set shuffle=true,
# and use different random_state each time.

# 80% training, 20% testing partition
X_train1, X_test1, y_train1, y_test1 = train_test_split(X, y, test_size=0.2, shuffle= True, random_state=42)
X_train2, X_test2, y_train2, y_test2 = train_test_split(X, y, test_size=0.2, shuffle= True, random_state=10)
X_train3, X_test3, y_train3, y_test3 = train_test_split(X, y, test_size=0.2, shuffle= True, random_state=35)

# Print the shape of the resulting sets
print("X_train shape:", X_train1.shape, X_train2.shape, X_train3.shape)
print("X_test shape:", X_test1.shape, X_test2.shape, X_test3.shape)
print("y_train shape:", y_train1.shape, y_train2.shape, y_train3.shape)
print("y_test shape:", y_test1.shape, y_test2.shape, y_test3.shape)


X_train shape: (142, 13) (142, 13) (142, 13)
X_test shape: (36, 13) (36, 13) (36, 13)
y_train shape: (142,) (142,) (142,)
y_test shape: (36,) (36,) (36,)


In [202]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
model = LinearRegression()


# Fit the model on the training data
model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = model.predict(X_test1)

# Evaluate the model
mse1 = mean_squared_error(y_test1, y_pred)
rmse = np.sqrt(mse1)
r2 = r2_score(y_test1, y_pred)

# Print the evaluation metrics
print("Mean Squared Error1:", mse1)
print("Root Mean Squared Error:", rmse)
print("R-squared:", r2)

# Create a linear regression model
model = LinearRegression()

# Fit the model on the training data
model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = model.predict(X_test2)

# Evaluate the model
mse2 = mean_squared_error(y_test2, y_pred)
rmse = np.sqrt(mse2)
r2 = r2_score(y_test2, y_pred)

# Print the evaluation metrics
print("Mean Squared Error2:", mse2)
print("Root Mean Squared Error:", rmse)
print("R-squared:", r2)

# Create a linear regression model
model = LinearRegression()

# Fit the model on the training data
model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = model.predict(X_test3)

# Evaluate the model
mse3 = mean_squared_error(y_test3, y_pred)
rmse = np.sqrt(mse3)
r2 = r2_score(y_test3, y_pred)

# Print the evaluation metrics
print("Mean Squared Error3:", mse3)
print("Root Mean Squared Error:", rmse)
print("R-squared:", r2)

print("Average Mean Squared Error of 3 trials:", (mse1+mse2+mse3)/3)

Mean Squared Error1: 0.06853348464256041
Root Mean Squared Error: 0.2617890078719128
R-squared: 0.8825140263270392
Mean Squared Error2: 0.06999745491730155
Root Mean Squared Error: 0.2645703213085352
R-squared: 0.8591355565639397
Mean Squared Error3: 0.097378733133621
Root Mean Squared Error: 0.3120556571088257
R-squared: 0.8398441140340447
Average Mean Squared Error of 3 trials: 0.07863655756449432


In [203]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Create a logistic regression model
logistic_model = LogisticRegression()

# Fit the model on the training data
logistic_model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = logistic_model.predict(X_test1)

# Evaluate the model
accuracy1 = accuracy_score(y_test1, y_pred)
conf_matrix = confusion_matrix(y_test1, y_pred)
classification_rep = classification_report(y_test1, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy1)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)


# Create a logistic regression model
logistic_model = LogisticRegression()

# Fit the model on the training data
logistic_model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = logistic_model.predict(X_test2)

# Evaluate the model
accuracy2 = accuracy_score(y_test2, y_pred)
conf_matrix = confusion_matrix(y_test2, y_pred)
classification_rep = classification_report(y_test2, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy2)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)



# Create a logistic regression model
logistic_model = LogisticRegression()

# Fit the model on the training data
logistic_model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = logistic_model.predict(X_test3)

# Evaluate the model
accuracy3 = accuracy_score(y_test3, y_pred)
conf_matrix = confusion_matrix(y_test3, y_pred)
classification_rep = classification_report(y_test3, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy3)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep, )


print("average mean of accuracy three trials:", (accuracy1+accuracy2+accuracy3)/3)

Accuracy: 0.9722222222222222
Confusion Matrix:
 [[13  1  0]
 [ 0 14  0]
 [ 0  0  8]]
Classification Report:
               precision    recall  f1-score   support

           1       1.00      0.93      0.96        14
           2       0.93      1.00      0.97        14
           3       1.00      1.00      1.00         8

    accuracy                           0.97        36
   macro avg       0.98      0.98      0.98        36
weighted avg       0.97      0.97      0.97        36

Accuracy: 0.9166666666666666
Confusion Matrix:
 [[10  0  0]
 [ 1 15  2]
 [ 0  0  8]]
Classification Report:
               precision    recall  f1-score   support

           1       0.91      1.00      0.95        10
           2       1.00      0.83      0.91        18
           3       0.80      1.00      0.89         8

    accuracy                           0.92        36
   macro avg       0.90      0.94      0.92        36
weighted avg       0.93      0.92      0.92        36

Accuracy: 0.91666666

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [204]:
from sklearn.svm import SVC
# Create an SVM model
svm_model = SVC()

# Fit the model on the training data
svm_model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = svm_model.predict(X_test1)

# Evaluate the model
accuracy1 = accuracy_score(y_test1, y_pred)
conf_matrix = confusion_matrix(y_test1, y_pred)
classification_rep = classification_report(y_test1, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy1)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
svm_model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = svm_model.predict(X_test2)

# Evaluate the model
accuracy2 = accuracy_score(y_test2, y_pred)
conf_matrix = confusion_matrix(y_test2, y_pred)
classification_rep = classification_report(y_test2, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy2)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
svm_model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = svm_model.predict(X_test3)

# Evaluate the model
accuracy1 = accuracy_score(y_test3, y_pred)
conf_matrix = confusion_matrix(y_test3, y_pred)
classification_rep = classification_report(y_test3, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy3)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

print("average mean of accuracy three trials:", (accuracy1+accuracy2+accuracy3)/3)


Accuracy: 0.8055555555555556
Confusion Matrix:
 [[14  0  0]
 [ 0 11  3]
 [ 0  4  4]]
Classification Report:
               precision    recall  f1-score   support

           1       1.00      1.00      1.00        14
           2       0.73      0.79      0.76        14
           3       0.57      0.50      0.53         8

    accuracy                           0.81        36
   macro avg       0.77      0.76      0.76        36
weighted avg       0.80      0.81      0.80        36

Accuracy: 0.6944444444444444
Confusion Matrix:
 [[ 7  0  3]
 [ 1 13  4]
 [ 0  3  5]]
Classification Report:
               precision    recall  f1-score   support

           1       0.88      0.70      0.78        10
           2       0.81      0.72      0.76        18
           3       0.42      0.62      0.50         8

    accuracy                           0.69        36
   macro avg       0.70      0.68      0.68        36
weighted avg       0.74      0.69      0.71        36

Accuracy: 0.91666666

In [205]:
from sklearn.ensemble import RandomForestClassifier

# Create a Random Forest model
rf_model = RandomForestClassifier(n_estimators=100)

# Fit the model on the training data
rf_model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = rf_model.predict(X_test1)

# Evaluate the model
accuracy1 = accuracy_score(y_test1, y_pred)
conf_matrix = confusion_matrix(y_test1, y_pred)
classification_rep = classification_report(y_test1, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy1)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
rf_model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = rf_model.predict(X_test2)

# Evaluate the model
accuracy2 = accuracy_score(y_test2, y_pred)
conf_matrix = confusion_matrix(y_test2, y_pred)
classification_rep = classification_report(y_test2, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy2)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)


# Fit the model on the training data
rf_model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = rf_model.predict(X_test3)

# Evaluate the model
accuracy2 = accuracy_score(y_test3, y_pred)
conf_matrix = confusion_matrix(y_test3, y_pred)
classification_rep = classification_report(y_test3, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy3)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

print("average mean of accuracy three trials:", (accuracy1+accuracy2+accuracy3)/3)

Accuracy: 1.0
Confusion Matrix:
 [[14  0  0]
 [ 0 14  0]
 [ 0  0  8]]
Classification Report:
               precision    recall  f1-score   support

           1       1.00      1.00      1.00        14
           2       1.00      1.00      1.00        14
           3       1.00      1.00      1.00         8

    accuracy                           1.00        36
   macro avg       1.00      1.00      1.00        36
weighted avg       1.00      1.00      1.00        36

Accuracy: 0.9166666666666666
Confusion Matrix:
 [[10  0  0]
 [ 0 15  3]
 [ 0  0  8]]
Classification Report:
               precision    recall  f1-score   support

           1       1.00      1.00      1.00        10
           2       1.00      0.83      0.91        18
           3       0.73      1.00      0.84         8

    accuracy                           0.92        36
   macro avg       0.91      0.94      0.92        36
weighted avg       0.94      0.92      0.92        36

Accuracy: 0.9166666666666666
Confus

In [206]:
from sklearn.neighbors import KNeighborsClassifier

from sklearn.model_selection import GridSearchCV

# Assuming 'X' and 'y' are your feature and target variables
param_grid = {'n_neighbors': [1, 3, 5, 7, 9]}  # Adjust the range as needed

knn_model = KNeighborsClassifier()
grid_search = GridSearchCV(knn_model, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X, y)
best_neighbor = grid_search.best_params_['n_neighbors']

# Print the results of the grid search
print("Best n_neighbors:", best_neighbor)


Best n_neighbors: 1


In [207]:

# Create a KNN model with best neighbor found
knn_model = KNeighborsClassifier(n_neighbors=best_neighbor)

# Fit the model on the training data
knn_model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = knn_model.predict(X_test1)

# Evaluate the model
accuracy1 = accuracy_score(y_test1, y_pred)
conf_matrix = confusion_matrix(y_test1, y_pred)
classification_rep = classification_report(y_test1, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy1)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
knn_model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = knn_model.predict(X_test2)

# Evaluate the model
accuracy2 = accuracy_score(y_test2, y_pred)
conf_matrix = confusion_matrix(y_test2, y_pred)
classification_rep = classification_report(y_test2, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy2)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
knn_model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = knn_model.predict(X_test3)

# Evaluate the model
accuracy3 = accuracy_score(y_test3, y_pred)
conf_matrix = confusion_matrix(y_test3, y_pred)
classification_rep = classification_report(y_test3, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy3)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

print("average mean of accuracy three trials:", (accuracy1+accuracy2+accuracy3)/3)

Accuracy: 0.7777777777777778
Confusion Matrix:
 [[12  0  2]
 [ 3 11  0]
 [ 1  2  5]]
Classification Report:
               precision    recall  f1-score   support

           1       0.75      0.86      0.80        14
           2       0.85      0.79      0.81        14
           3       0.71      0.62      0.67         8

    accuracy                           0.78        36
   macro avg       0.77      0.76      0.76        36
weighted avg       0.78      0.78      0.78        36

Accuracy: 0.6944444444444444
Confusion Matrix:
 [[ 9  0  1]
 [ 4 11  3]
 [ 0  3  5]]
Classification Report:
               precision    recall  f1-score   support

           1       0.69      0.90      0.78        10
           2       0.79      0.61      0.69        18
           3       0.56      0.62      0.59         8

    accuracy                           0.69        36
   macro avg       0.68      0.71      0.69        36
weighted avg       0.71      0.69      0.69        36

Accuracy: 0.69444444

In [208]:
# 50% training, 50% testing partition
X_train1, X_test1, y_train1, y_test1 = train_test_split(X, y, test_size=0.5, shuffle= True, random_state=42)
X_train2, X_test2, y_train2, y_test2 = train_test_split(X, y, test_size=0.5, shuffle= True, random_state=10)
X_train3, X_test3, y_train3, y_test3 = train_test_split(X, y, test_size=0.5, shuffle= True, random_state=35)

# Print the shape of the resulting sets
print("X_train shape:", X_train1.shape, X_train2.shape, X_train3.shape)
print("X_test shape:", X_test1.shape, X_test2.shape, X_test3.shape)
print("y_train shape:", y_train1.shape, y_train2.shape, y_train3.shape)
print("y_test shape:", y_test1.shape, y_test2.shape, y_test3.shape)

X_train shape: (89, 13) (89, 13) (89, 13)
X_test shape: (89, 13) (89, 13) (89, 13)
y_train shape: (89,) (89,) (89,)
y_test shape: (89,) (89,) (89,)


In [209]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
model = LinearRegression()


# Fit the model on the training data
model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = model.predict(X_test1)

# Evaluate the model
mse1 = mean_squared_error(y_test1, y_pred)
rmse = np.sqrt(mse1)
r2 = r2_score(y_test1, y_pred)

# Print the evaluation metrics
print("Mean Squared Error1:", mse1)
print("Root Mean Squared Error:", rmse)
print("R-squared:", r2)

# Create a linear regression model
model = LinearRegression()

# Fit the model on the training data
model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = model.predict(X_test2)

# Evaluate the model
mse2 = mean_squared_error(y_test2, y_pred)
rmse = np.sqrt(mse2)
r2 = r2_score(y_test2, y_pred)

# Print the evaluation metrics
print("Mean Squared Error2:", mse2)
print("Root Mean Squared Error:", rmse)
print("R-squared:", r2)

# Create a linear regression model
model = LinearRegression()

# Fit the model on the training data
model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = model.predict(X_test3)

# Evaluate the model
mse3 = mean_squared_error(y_test3, y_pred)
rmse = np.sqrt(mse3)
r2 = r2_score(y_test3, y_pred)

# Print the evaluation metrics
print("Mean Squared Error3:", mse3)
print("Root Mean Squared Error:", rmse)
print("R-squared:", r2)

print("Average Mean Squared Error of 3 trials:", (mse1+mse2+mse3)/3)

Mean Squared Error1: 0.08290159844081665
Root Mean Squared Error: 0.28792637677159183
R-squared: 0.8624500290637392
Mean Squared Error2: 0.09824282950970675
Root Mean Squared Error: 0.3134371220990053
R-squared: 0.8375404065665162
Mean Squared Error3: 0.0905871982559387
Root Mean Squared Error: 0.3009770726416527
R-squared: 0.8314047938474411
Average Mean Squared Error of 3 trials: 0.09057720873548736


In [210]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Create a logistic regression model
logistic_model = LogisticRegression()

# Fit the model on the training data
logistic_model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = logistic_model.predict(X_test1)

# Evaluate the model
accuracy1 = accuracy_score(y_test1, y_pred)
conf_matrix = confusion_matrix(y_test1, y_pred)
classification_rep = classification_report(y_test1, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy1)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)


# Create a logistic regression model
logistic_model = LogisticRegression()

# Fit the model on the training data
logistic_model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = logistic_model.predict(X_test2)

# Evaluate the model
accuracy2 = accuracy_score(y_test2, y_pred)
conf_matrix = confusion_matrix(y_test2, y_pred)
classification_rep = classification_report(y_test2, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy2)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)



# Create a logistic regression model
logistic_model = LogisticRegression()

# Fit the model on the training data
logistic_model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = logistic_model.predict(X_test3)

# Evaluate the model
accuracy3 = accuracy_score(y_test3, y_pred)
conf_matrix = confusion_matrix(y_test3, y_pred)
classification_rep = classification_report(y_test3, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy3)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep, )


print("average mean of accuracy three trials:", (accuracy1+accuracy2+accuracy3)/3)

Accuracy: 0.9325842696629213
Confusion Matrix:
 [[30  3  0]
 [ 2 32  0]
 [ 0  1 21]]
Classification Report:
               precision    recall  f1-score   support

           1       0.94      0.91      0.92        33
           2       0.89      0.94      0.91        34
           3       1.00      0.95      0.98        22

    accuracy                           0.93        89
   macro avg       0.94      0.93      0.94        89
weighted avg       0.93      0.93      0.93        89

Accuracy: 0.8876404494382022
Confusion Matrix:
 [[27  2  0]
 [ 6 27  2]
 [ 0  0 25]]
Classification Report:
               precision    recall  f1-score   support

           1       0.82      0.93      0.87        29
           2       0.93      0.77      0.84        35
           3       0.93      1.00      0.96        25

    accuracy                           0.89        89
   macro avg       0.89      0.90      0.89        89
weighted avg       0.89      0.89      0.89        89

Accuracy: 0.92134831

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [211]:
from sklearn.svm import SVC
# Create an SVM model
svm_model = SVC()

# Fit the model on the training data
svm_model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = svm_model.predict(X_test1)

# Evaluate the model
accuracy1 = accuracy_score(y_test1, y_pred)
conf_matrix = confusion_matrix(y_test1, y_pred)
classification_rep = classification_report(y_test1, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy1)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
svm_model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = svm_model.predict(X_test2)

# Evaluate the model
accuracy2 = accuracy_score(y_test2, y_pred)
conf_matrix = confusion_matrix(y_test2, y_pred)
classification_rep = classification_report(y_test2, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy2)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
svm_model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = svm_model.predict(X_test3)

# Evaluate the model
accuracy1 = accuracy_score(y_test3, y_pred)
conf_matrix = confusion_matrix(y_test3, y_pred)
classification_rep = classification_report(y_test3, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy3)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

print("average mean of accuracy three trials:", (accuracy1+accuracy2+accuracy3)/3)


Accuracy: 0.651685393258427
Confusion Matrix:
 [[28  0  5]
 [ 1 23 10]
 [ 0 15  7]]
Classification Report:
               precision    recall  f1-score   support

           1       0.97      0.85      0.90        33
           2       0.61      0.68      0.64        34
           3       0.32      0.32      0.32        22

    accuracy                           0.65        89
   macro avg       0.63      0.61      0.62        89
weighted avg       0.67      0.65      0.66        89

Accuracy: 0.6292134831460674
Confusion Matrix:
 [[22  1  6]
 [ 2 29  4]
 [ 1 19  5]]
Classification Report:
               precision    recall  f1-score   support

           1       0.88      0.76      0.81        29
           2       0.59      0.83      0.69        35
           3       0.33      0.20      0.25        25

    accuracy                           0.63        89
   macro avg       0.60      0.60      0.59        89
weighted avg       0.61      0.63      0.61        89

Accuracy: 0.921348314

In [212]:
from sklearn.ensemble import RandomForestClassifier

# Create a Random Forest model
rf_model = RandomForestClassifier(n_estimators=100)

# Fit the model on the training data
rf_model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = rf_model.predict(X_test1)

# Evaluate the model
accuracy1 = accuracy_score(y_test1, y_pred)
conf_matrix = confusion_matrix(y_test1, y_pred)
classification_rep = classification_report(y_test1, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy1)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
rf_model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = rf_model.predict(X_test2)

# Evaluate the model
accuracy2 = accuracy_score(y_test2, y_pred)
conf_matrix = confusion_matrix(y_test2, y_pred)
classification_rep = classification_report(y_test2, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy2)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)


# Fit the model on the training data
rf_model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = rf_model.predict(X_test3)

# Evaluate the model
accuracy2 = accuracy_score(y_test3, y_pred)
conf_matrix = confusion_matrix(y_test3, y_pred)
classification_rep = classification_report(y_test3, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy3)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

print("average mean of accuracy three trials:", (accuracy1+accuracy2+accuracy3)/3)

Accuracy: 0.9775280898876404
Confusion Matrix:
 [[31  2  0]
 [ 0 34  0]
 [ 0  0 22]]
Classification Report:
               precision    recall  f1-score   support

           1       1.00      0.94      0.97        33
           2       0.94      1.00      0.97        34
           3       1.00      1.00      1.00        22

    accuracy                           0.98        89
   macro avg       0.98      0.98      0.98        89
weighted avg       0.98      0.98      0.98        89

Accuracy: 0.9325842696629213
Confusion Matrix:
 [[27  2  0]
 [ 1 31  3]
 [ 0  0 25]]
Classification Report:
               precision    recall  f1-score   support

           1       0.96      0.93      0.95        29
           2       0.94      0.89      0.91        35
           3       0.89      1.00      0.94        25

    accuracy                           0.93        89
   macro avg       0.93      0.94      0.93        89
weighted avg       0.93      0.93      0.93        89

Accuracy: 0.92134831

In [213]:

# Create a KNN model with best neighbor found
knn_model = KNeighborsClassifier(n_neighbors=best_neighbor)

# Fit the model on the training data
knn_model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = knn_model.predict(X_test1)

# Evaluate the model
accuracy1 = accuracy_score(y_test1, y_pred)
conf_matrix = confusion_matrix(y_test1, y_pred)
classification_rep = classification_report(y_test1, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy1)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
knn_model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = knn_model.predict(X_test2)

# Evaluate the model
accuracy2 = accuracy_score(y_test2, y_pred)
conf_matrix = confusion_matrix(y_test2, y_pred)
classification_rep = classification_report(y_test2, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy2)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
knn_model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = knn_model.predict(X_test3)

# Evaluate the model
accuracy3 = accuracy_score(y_test3, y_pred)
conf_matrix = confusion_matrix(y_test3, y_pred)
classification_rep = classification_report(y_test3, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy3)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

print("average mean of accuracy three trials:", (accuracy1+accuracy2+accuracy3)/3)

Accuracy: 0.6853932584269663
Confusion Matrix:
 [[27  2  4]
 [ 4 24  6]
 [ 1 11 10]]
Classification Report:
               precision    recall  f1-score   support

           1       0.84      0.82      0.83        33
           2       0.65      0.71      0.68        34
           3       0.50      0.45      0.48        22

    accuracy                           0.69        89
   macro avg       0.66      0.66      0.66        89
weighted avg       0.68      0.69      0.68        89

Accuracy: 0.7303370786516854
Confusion Matrix:
 [[21  3  5]
 [ 4 28  3]
 [ 0  9 16]]
Classification Report:
               precision    recall  f1-score   support

           1       0.84      0.72      0.78        29
           2       0.70      0.80      0.75        35
           3       0.67      0.64      0.65        25

    accuracy                           0.73        89
   macro avg       0.74      0.72      0.73        89
weighted avg       0.74      0.73      0.73        89

Accuracy: 0.70786516

In [214]:
# 30% training, 70% testing partition
X_train1, X_test1, y_train1, y_test1 = train_test_split(X, y, test_size=0.7, shuffle= True, random_state=42)
X_train2, X_test2, y_train2, y_test2 = train_test_split(X, y, test_size=0.7, shuffle= True, random_state=10)
X_train3, X_test3, y_train3, y_test3 = train_test_split(X, y, test_size=0.7, shuffle= True, random_state=35)

# Print the shape of the resulting sets
print("X_train shape:", X_train1.shape, X_train2.shape, X_train3.shape)
print("X_test shape:", X_test1.shape, X_test2.shape, X_test3.shape)
print("y_train shape:", y_train1.shape, y_train2.shape, y_train3.shape)
print("y_test shape:", y_test1.shape, y_test2.shape, y_test3.shape)

X_train shape: (53, 13) (53, 13) (53, 13)
X_test shape: (125, 13) (125, 13) (125, 13)
y_train shape: (53,) (53,) (53,)
y_test shape: (125,) (125,) (125,)


In [215]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
model = LinearRegression()


# Fit the model on the training data
model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = model.predict(X_test1)

# Evaluate the model
mse1 = mean_squared_error(y_test1, y_pred)
rmse = np.sqrt(mse1)
r2 = r2_score(y_test1, y_pred)

# Print the evaluation metrics
print("Mean Squared Error1:", mse1)
print("Root Mean Squared Error:", rmse)
print("R-squared:", r2)

# Create a linear regression model
model = LinearRegression()

# Fit the model on the training data
model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = model.predict(X_test2)

# Evaluate the model
mse2 = mean_squared_error(y_test2, y_pred)
rmse = np.sqrt(mse2)
r2 = r2_score(y_test2, y_pred)

# Print the evaluation metrics
print("Mean Squared Error2:", mse2)
print("Root Mean Squared Error:", rmse)
print("R-squared:", r2)

# Create a linear regression model
model = LinearRegression()

# Fit the model on the training data
model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = model.predict(X_test3)

# Evaluate the model
mse3 = mean_squared_error(y_test3, y_pred)
rmse = np.sqrt(mse3)
r2 = r2_score(y_test3, y_pred)

# Print the evaluation metrics
print("Mean Squared Error3:", mse3)
print("Root Mean Squared Error:", rmse)
print("R-squared:", r2)

print("Average Mean Squared Error of 3 trials:", (mse1+mse2+mse3)/3)

Mean Squared Error1: 0.10259876164714439
Root Mean Squared Error: 0.3203104145155827
R-squared: 0.825862953428565
Mean Squared Error2: 0.08592441979926915
Root Mean Squared Error: 0.2931286744746565
R-squared: 0.8581393639725718
Mean Squared Error3: 0.09787171461038278
Root Mean Squared Error: 0.3128445534293074
R-squared: 0.8169883268564826
Average Mean Squared Error of 3 trials: 0.09546496535226545


In [216]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Create a logistic regression model
logistic_model = LogisticRegression()

# Fit the model on the training data
logistic_model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = logistic_model.predict(X_test1)

# Evaluate the model
accuracy1 = accuracy_score(y_test1, y_pred)
conf_matrix = confusion_matrix(y_test1, y_pred)
classification_rep = classification_report(y_test1, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy1)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)


# Create a logistic regression model
logistic_model = LogisticRegression()

# Fit the model on the training data
logistic_model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = logistic_model.predict(X_test2)

# Evaluate the model
accuracy2 = accuracy_score(y_test2, y_pred)
conf_matrix = confusion_matrix(y_test2, y_pred)
classification_rep = classification_report(y_test2, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy2)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)



# Create a logistic regression model
logistic_model = LogisticRegression()

# Fit the model on the training data
logistic_model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = logistic_model.predict(X_test3)

# Evaluate the model
accuracy3 = accuracy_score(y_test3, y_pred)
conf_matrix = confusion_matrix(y_test3, y_pred)
classification_rep = classification_report(y_test3, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy3)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep, )


print("average mean of accuracy three trials:", (accuracy1+accuracy2+accuracy3)/3)

Accuracy: 0.936
Confusion Matrix:
 [[41  3  0]
 [ 2 47  1]
 [ 0  2 29]]
Classification Report:
               precision    recall  f1-score   support

           1       0.95      0.93      0.94        44
           2       0.90      0.94      0.92        50
           3       0.97      0.94      0.95        31

    accuracy                           0.94       125
   macro avg       0.94      0.94      0.94       125
weighted avg       0.94      0.94      0.94       125

Accuracy: 0.896
Confusion Matrix:
 [[34  7  0]
 [ 3 44  2]
 [ 0  1 34]]
Classification Report:
               precision    recall  f1-score   support

           1       0.92      0.83      0.87        41
           2       0.85      0.90      0.87        49
           3       0.94      0.97      0.96        35

    accuracy                           0.90       125
   macro avg       0.90      0.90      0.90       125
weighted avg       0.90      0.90      0.90       125

Accuracy: 0.896
Confusion Matrix:
 [[35  4  1]

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [217]:
from sklearn.svm import SVC
# Create an SVM model
svm_model = SVC()

# Fit the model on the training data
svm_model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = svm_model.predict(X_test1)

# Evaluate the model
accuracy1 = accuracy_score(y_test1, y_pred)
conf_matrix = confusion_matrix(y_test1, y_pred)
classification_rep = classification_report(y_test1, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy1)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
svm_model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = svm_model.predict(X_test2)

# Evaluate the model
accuracy2 = accuracy_score(y_test2, y_pred)
conf_matrix = confusion_matrix(y_test2, y_pred)
classification_rep = classification_report(y_test2, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy2)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
svm_model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = svm_model.predict(X_test3)

# Evaluate the model
accuracy1 = accuracy_score(y_test3, y_pred)
conf_matrix = confusion_matrix(y_test3, y_pred)
classification_rep = classification_report(y_test3, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy3)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

print("average mean of accuracy three trials:", (accuracy1+accuracy2+accuracy3)/3)


Accuracy: 0.688
Confusion Matrix:
 [[37  0  7]
 [ 2 38 10]
 [ 0 20 11]]
Classification Report:
               precision    recall  f1-score   support

           1       0.95      0.84      0.89        44
           2       0.66      0.76      0.70        50
           3       0.39      0.35      0.37        31

    accuracy                           0.69       125
   macro avg       0.67      0.65      0.66       125
weighted avg       0.69      0.69      0.69       125

Accuracy: 0.64
Confusion Matrix:
 [[33  8  0]
 [ 2 47  0]
 [ 1 34  0]]
Classification Report:
               precision    recall  f1-score   support

           1       0.92      0.80      0.86        41
           2       0.53      0.96      0.68        49
           3       0.00      0.00      0.00        35

    accuracy                           0.64       125
   macro avg       0.48      0.59      0.51       125
weighted avg       0.51      0.64      0.55       125

Accuracy: 0.896
Confusion Matrix:
 [[34  0  6]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [218]:
from sklearn.ensemble import RandomForestClassifier

# Create a Random Forest model
rf_model = RandomForestClassifier(n_estimators=100)

# Fit the model on the training data
rf_model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = rf_model.predict(X_test1)

# Evaluate the model
accuracy1 = accuracy_score(y_test1, y_pred)
conf_matrix = confusion_matrix(y_test1, y_pred)
classification_rep = classification_report(y_test1, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy1)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
rf_model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = rf_model.predict(X_test2)

# Evaluate the model
accuracy2 = accuracy_score(y_test2, y_pred)
conf_matrix = confusion_matrix(y_test2, y_pred)
classification_rep = classification_report(y_test2, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy2)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)


# Fit the model on the training data
rf_model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = rf_model.predict(X_test3)

# Evaluate the model
accuracy2 = accuracy_score(y_test3, y_pred)
conf_matrix = confusion_matrix(y_test3, y_pred)
classification_rep = classification_report(y_test3, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy3)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

print("average mean of accuracy three trials:", (accuracy1+accuracy2+accuracy3)/3)

Accuracy: 0.944
Confusion Matrix:
 [[42  2  0]
 [ 2 47  1]
 [ 0  2 29]]
Classification Report:
               precision    recall  f1-score   support

           1       0.95      0.95      0.95        44
           2       0.92      0.94      0.93        50
           3       0.97      0.94      0.95        31

    accuracy                           0.94       125
   macro avg       0.95      0.94      0.95       125
weighted avg       0.94      0.94      0.94       125

Accuracy: 0.936
Confusion Matrix:
 [[36  5  0]
 [ 1 46  2]
 [ 0  0 35]]
Classification Report:
               precision    recall  f1-score   support

           1       0.97      0.88      0.92        41
           2       0.90      0.94      0.92        49
           3       0.95      1.00      0.97        35

    accuracy                           0.94       125
   macro avg       0.94      0.94      0.94       125
weighted avg       0.94      0.94      0.94       125

Accuracy: 0.896
Confusion Matrix:
 [[38  2  0]

In [219]:

# Create a KNN model with best neighbor found
knn_model = KNeighborsClassifier(n_neighbors=best_neighbor)

# Fit the model on the training data
knn_model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = knn_model.predict(X_test1)

# Evaluate the model
accuracy1 = accuracy_score(y_test1, y_pred)
conf_matrix = confusion_matrix(y_test1, y_pred)
classification_rep = classification_report(y_test1, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy1)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
knn_model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = knn_model.predict(X_test2)

# Evaluate the model
accuracy2 = accuracy_score(y_test2, y_pred)
conf_matrix = confusion_matrix(y_test2, y_pred)
classification_rep = classification_report(y_test2, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy2)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
knn_model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = knn_model.predict(X_test3)

# Evaluate the model
accuracy3 = accuracy_score(y_test3, y_pred)
conf_matrix = confusion_matrix(y_test3, y_pred)
classification_rep = classification_report(y_test3, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy3)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

print("average mean of accuracy three trials:", (accuracy1+accuracy2+accuracy3)/3)

Accuracy: 0.672
Confusion Matrix:
 [[36  5  3]
 [ 5 35 10]
 [ 2 16 13]]
Classification Report:
               precision    recall  f1-score   support

           1       0.84      0.82      0.83        44
           2       0.62      0.70      0.66        50
           3       0.50      0.42      0.46        31

    accuracy                           0.67       125
   macro avg       0.65      0.65      0.65       125
weighted avg       0.67      0.67      0.67       125

Accuracy: 0.696
Confusion Matrix:
 [[30  5  6]
 [ 1 32 16]
 [ 0 10 25]]
Classification Report:
               precision    recall  f1-score   support

           1       0.97      0.73      0.83        41
           2       0.68      0.65      0.67        49
           3       0.53      0.71      0.61        35

    accuracy                           0.70       125
   macro avg       0.73      0.70      0.70       125
weighted avg       0.73      0.70      0.71       125

Accuracy: 0.704
Confusion Matrix:
 [[35  0  5]

In [220]:
# Iris dataset.
iris = datasets.load_iris()     # Load Iris dataset.

# Create DataFrame from Iris dataset
iris_data = pd.DataFrame(data=iris.data, columns=iris.feature_names)
iris_target = pd.DataFrame(data=iris.target, columns=['target'])
iris_df = pd.concat([iris_data, iris_target], axis=1)

iris_df.dropna(inplace=True)
iris_df.reset_index(drop=True, inplace=True)
iris_df

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,2
146,6.3,2.5,5.0,1.9,2
147,6.5,3.0,5.2,2.0,2
148,6.2,3.4,5.4,2.3,2


In [221]:
# Separate features (X) and target variable (y)
X = iris_df.drop('target', axis=1)  
y = iris_df['target']

# Split the data into training and testing sets. However, we also need to consider that using first 80% for training,
# rest 20% for testing is different from first 20% for training, rest for testing. So we would 3 times each for 
# every partition and compute average scores to remove potentials of having accidental results. We would set shuffle=true,
# and use different random_state each time.

# 80% training, 20% testing partition
X_train1, X_test1, y_train1, y_test1 = train_test_split(X, y, test_size=0.2, shuffle= True, random_state=42)
X_train2, X_test2, y_train2, y_test2 = train_test_split(X, y, test_size=0.2, shuffle= True, random_state=10)
X_train3, X_test3, y_train3, y_test3 = train_test_split(X, y, test_size=0.2, shuffle= True, random_state=35)

# Print the shape of the resulting sets
print("X_train shape:", X_train1.shape, X_train2.shape, X_train3.shape)
print("X_test shape:", X_test1.shape, X_test2.shape, X_test3.shape)
print("y_train shape:", y_train1.shape, y_train2.shape, y_train3.shape)
print("y_test shape:", y_test1.shape, y_test2.shape, y_test3.shape)


X_train shape: (120, 4) (120, 4) (120, 4)
X_test shape: (30, 4) (30, 4) (30, 4)
y_train shape: (120,) (120,) (120,)
y_test shape: (30,) (30,) (30,)


In [222]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
model = LinearRegression()


# Fit the model on the training data
model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = model.predict(X_test1)

# Evaluate the model
mse1 = mean_squared_error(y_test1, y_pred)
rmse = np.sqrt(mse1)
r2 = r2_score(y_test1, y_pred)

# Print the evaluation metrics
print("Mean Squared Error1:", mse1)
print("Root Mean Squared Error:", rmse)
print("R-squared:", r2)

# Create a linear regression model
model = LinearRegression()

# Fit the model on the training data
model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = model.predict(X_test2)

# Evaluate the model
mse2 = mean_squared_error(y_test2, y_pred)
rmse = np.sqrt(mse2)
r2 = r2_score(y_test2, y_pred)

# Print the evaluation metrics
print("Mean Squared Error2:", mse2)
print("Root Mean Squared Error:", rmse)
print("R-squared:", r2)

# Create a linear regression model
model = LinearRegression()

# Fit the model on the training data
model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = model.predict(X_test3)

# Evaluate the model
mse3 = mean_squared_error(y_test3, y_pred)
rmse = np.sqrt(mse3)
r2 = r2_score(y_test3, y_pred)

# Print the evaluation metrics
print("Mean Squared Error3:", mse3)
print("Root Mean Squared Error:", rmse)
print("R-squared:", r2)

print("Average Mean Squared Error of 3 trials:", (mse1+mse2+mse3)/3)

Mean Squared Error1: 0.03711379440797689
Root Mean Squared Error: 0.1926494080135646
R-squared: 0.9468960016420045
Mean Squared Error2: 0.04921147699310945
Root Mean Squared Error: 0.22183659976006992
R-squared: 0.9115961491141746
Mean Squared Error3: 0.04371010442232157
Root Mean Squared Error: 0.20906961621029865
R-squared: 0.9339948087582393
Average Mean Squared Error of 3 trials: 0.0433451252744693


In [223]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Create a logistic regression model
logistic_model = LogisticRegression()

# Fit the model on the training data
logistic_model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = logistic_model.predict(X_test1)

# Evaluate the model
accuracy1 = accuracy_score(y_test1, y_pred)
conf_matrix = confusion_matrix(y_test1, y_pred)
classification_rep = classification_report(y_test1, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy1)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)


# Create a logistic regression model
logistic_model = LogisticRegression()

# Fit the model on the training data
logistic_model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = logistic_model.predict(X_test2)

# Evaluate the model
accuracy2 = accuracy_score(y_test2, y_pred)
conf_matrix = confusion_matrix(y_test2, y_pred)
classification_rep = classification_report(y_test2, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy2)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)



# Create a logistic regression model
logistic_model = LogisticRegression()

# Fit the model on the training data
logistic_model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = logistic_model.predict(X_test3)

# Evaluate the model
accuracy3 = accuracy_score(y_test3, y_pred)
conf_matrix = confusion_matrix(y_test3, y_pred)
classification_rep = classification_report(y_test3, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy3)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep, )


print("average mean of accuracy three trials:", (accuracy1+accuracy2+accuracy3)/3)

Accuracy: 1.0
Confusion Matrix:
 [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

Accuracy: 1.0
Confusion Matrix:
 [[10  0  0]
 [ 0 13  0]
 [ 0  0  7]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00        13
           2       1.00      1.00      1.00         7

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

Accuracy: 1.0
Confusion Matrix:
 [[11  0  0]
 [ 0 

In [224]:
from sklearn.svm import SVC
# Create an SVM model
svm_model = SVC()

# Fit the model on the training data
svm_model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = svm_model.predict(X_test1)

# Evaluate the model
accuracy1 = accuracy_score(y_test1, y_pred)
conf_matrix = confusion_matrix(y_test1, y_pred)
classification_rep = classification_report(y_test1, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy1)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
svm_model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = svm_model.predict(X_test2)

# Evaluate the model
accuracy2 = accuracy_score(y_test2, y_pred)
conf_matrix = confusion_matrix(y_test2, y_pred)
classification_rep = classification_report(y_test2, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy2)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
svm_model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = svm_model.predict(X_test3)

# Evaluate the model
accuracy1 = accuracy_score(y_test3, y_pred)
conf_matrix = confusion_matrix(y_test3, y_pred)
classification_rep = classification_report(y_test3, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy3)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

print("average mean of accuracy three trials:", (accuracy1+accuracy2+accuracy3)/3)


Accuracy: 1.0
Confusion Matrix:
 [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

Accuracy: 0.9666666666666667
Confusion Matrix:
 [[10  0  0]
 [ 0 12  1]
 [ 0  0  7]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      0.92      0.96        13
           2       0.88      1.00      0.93         7

    accuracy                           0.97        30
   macro avg       0.96      0.97      0.96        30
weighted avg       0.97      0.97      0.97        30

Accuracy: 1.0
Confusion Matrix:
 [[

In [225]:
from sklearn.ensemble import RandomForestClassifier

# Create a Random Forest model
rf_model = RandomForestClassifier(n_estimators=100)

# Fit the model on the training data
rf_model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = rf_model.predict(X_test1)

# Evaluate the model
accuracy1 = accuracy_score(y_test1, y_pred)
conf_matrix = confusion_matrix(y_test1, y_pred)
classification_rep = classification_report(y_test1, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy1)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
rf_model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = rf_model.predict(X_test2)

# Evaluate the model
accuracy2 = accuracy_score(y_test2, y_pred)
conf_matrix = confusion_matrix(y_test2, y_pred)
classification_rep = classification_report(y_test2, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy2)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)


# Fit the model on the training data
rf_model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = rf_model.predict(X_test3)

# Evaluate the model
accuracy2 = accuracy_score(y_test3, y_pred)
conf_matrix = confusion_matrix(y_test3, y_pred)
classification_rep = classification_report(y_test3, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy3)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

print("average mean of accuracy three trials:", (accuracy1+accuracy2+accuracy3)/3)

Accuracy: 1.0
Confusion Matrix:
 [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

Accuracy: 0.9666666666666667
Confusion Matrix:
 [[10  0  0]
 [ 0 12  1]
 [ 0  0  7]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      0.92      0.96        13
           2       0.88      1.00      0.93         7

    accuracy                           0.97        30
   macro avg       0.96      0.97      0.96        30
weighted avg       0.97      0.97      0.97        30

Accuracy: 1.0
Confusion Matrix:
 [[

In [226]:
from sklearn.neighbors import KNeighborsClassifier

from sklearn.model_selection import GridSearchCV

# Assuming 'X' and 'y' are your feature and target variables
param_grid = {'n_neighbors': [1, 3, 5, 7, 9]}  # Adjust the range as needed

knn_model = KNeighborsClassifier()
grid_search = GridSearchCV(knn_model, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X, y)
best_neighbor = grid_search.best_params_['n_neighbors']

# Print the results of the grid search
print("Best n_neighbors:", best_neighbor)


Best n_neighbors: 7


In [227]:

# Create a KNN model with best neighbor found
knn_model = KNeighborsClassifier(n_neighbors=best_neighbor)

# Fit the model on the training data
knn_model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = knn_model.predict(X_test1)

# Evaluate the model
accuracy1 = accuracy_score(y_test1, y_pred)
conf_matrix = confusion_matrix(y_test1, y_pred)
classification_rep = classification_report(y_test1, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy1)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
knn_model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = knn_model.predict(X_test2)

# Evaluate the model
accuracy2 = accuracy_score(y_test2, y_pred)
conf_matrix = confusion_matrix(y_test2, y_pred)
classification_rep = classification_report(y_test2, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy2)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
knn_model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = knn_model.predict(X_test3)

# Evaluate the model
accuracy3 = accuracy_score(y_test3, y_pred)
conf_matrix = confusion_matrix(y_test3, y_pred)
classification_rep = classification_report(y_test3, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy3)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

print("average mean of accuracy three trials:", (accuracy1+accuracy2+accuracy3)/3)

Accuracy: 0.9666666666666667
Confusion Matrix:
 [[10  0  0]
 [ 0  8  1]
 [ 0  0 11]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      0.89      0.94         9
           2       0.92      1.00      0.96        11

    accuracy                           0.97        30
   macro avg       0.97      0.96      0.97        30
weighted avg       0.97      0.97      0.97        30

Accuracy: 0.9666666666666667
Confusion Matrix:
 [[10  0  0]
 [ 0 12  1]
 [ 0  0  7]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      0.92      0.96        13
           2       0.88      1.00      0.93         7

    accuracy                           0.97        30
   macro avg       0.96      0.97      0.96        30
weighted avg       0.97      0.97      0.97        30

Accuracy: 1.0
Confus

In [228]:
# 50% training, 50% testing partition
X_train1, X_test1, y_train1, y_test1 = train_test_split(X, y, test_size=0.5, shuffle= True, random_state=42)
X_train2, X_test2, y_train2, y_test2 = train_test_split(X, y, test_size=0.5, shuffle= True, random_state=10)
X_train3, X_test3, y_train3, y_test3 = train_test_split(X, y, test_size=0.5, shuffle= True, random_state=35)

# Print the shape of the resulting sets
print("X_train shape:", X_train1.shape, X_train2.shape, X_train3.shape)
print("X_test shape:", X_test1.shape, X_test2.shape, X_test3.shape)
print("y_train shape:", y_train1.shape, y_train2.shape, y_train3.shape)
print("y_test shape:", y_test1.shape, y_test2.shape, y_test3.shape)

X_train shape: (75, 4) (75, 4) (75, 4)
X_test shape: (75, 4) (75, 4) (75, 4)
y_train shape: (75,) (75,) (75,)
y_test shape: (75,) (75,) (75,)


In [229]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
model = LinearRegression()


# Fit the model on the training data
model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = model.predict(X_test1)

# Evaluate the model
mse1 = mean_squared_error(y_test1, y_pred)
rmse = np.sqrt(mse1)
r2 = r2_score(y_test1, y_pred)

# Print the evaluation metrics
print("Mean Squared Error1:", mse1)
print("Root Mean Squared Error:", rmse)
print("R-squared:", r2)

# Create a linear regression model
model = LinearRegression()

# Fit the model on the training data
model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = model.predict(X_test2)

# Evaluate the model
mse2 = mean_squared_error(y_test2, y_pred)
rmse = np.sqrt(mse2)
r2 = r2_score(y_test2, y_pred)

# Print the evaluation metrics
print("Mean Squared Error2:", mse2)
print("Root Mean Squared Error:", rmse)
print("R-squared:", r2)

# Create a linear regression model
model = LinearRegression()

# Fit the model on the training data
model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = model.predict(X_test3)

# Evaluate the model
mse3 = mean_squared_error(y_test3, y_pred)
rmse = np.sqrt(mse3)
r2 = r2_score(y_test3, y_pred)

# Print the evaluation metrics
print("Mean Squared Error3:", mse3)
print("Root Mean Squared Error:", rmse)
print("R-squared:", r2)

print("Average Mean Squared Error of 3 trials:", (mse1+mse2+mse3)/3)

Mean Squared Error1: 0.04034157363532383
Root Mean Squared Error: 0.20085211882209217
R-squared: 0.9412729421069626
Mean Squared Error2: 0.05910121064094707
Root Mean Squared Error: 0.24310740556582613
R-squared: 0.907551637971266
Mean Squared Error3: 0.05727711046775983
Root Mean Squared Error: 0.23932636809963048
R-squared: 0.9104049648550754
Average Mean Squared Error of 3 trials: 0.05223996491467691


In [230]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Create a logistic regression model
logistic_model = LogisticRegression()

# Fit the model on the training data
logistic_model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = logistic_model.predict(X_test1)

# Evaluate the model
accuracy1 = accuracy_score(y_test1, y_pred)
conf_matrix = confusion_matrix(y_test1, y_pred)
classification_rep = classification_report(y_test1, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy1)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)


# Create a logistic regression model
logistic_model = LogisticRegression()

# Fit the model on the training data
logistic_model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = logistic_model.predict(X_test2)

# Evaluate the model
accuracy2 = accuracy_score(y_test2, y_pred)
conf_matrix = confusion_matrix(y_test2, y_pred)
classification_rep = classification_report(y_test2, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy2)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)



# Create a logistic regression model
logistic_model = LogisticRegression()

# Fit the model on the training data
logistic_model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = logistic_model.predict(X_test3)

# Evaluate the model
accuracy3 = accuracy_score(y_test3, y_pred)
conf_matrix = confusion_matrix(y_test3, y_pred)
classification_rep = classification_report(y_test3, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy3)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep, )


print("average mean of accuracy three trials:", (accuracy1+accuracy2+accuracy3)/3)

Accuracy: 1.0
Confusion Matrix:
 [[29  0  0]
 [ 0 23  0]
 [ 0  0 23]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        29
           1       1.00      1.00      1.00        23
           2       1.00      1.00      1.00        23

    accuracy                           1.00        75
   macro avg       1.00      1.00      1.00        75
weighted avg       1.00      1.00      1.00        75

Accuracy: 0.96
Confusion Matrix:
 [[23  0  0]
 [ 0 24  3]
 [ 0  0 25]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        23
           1       1.00      0.89      0.94        27
           2       0.89      1.00      0.94        25

    accuracy                           0.96        75
   macro avg       0.96      0.96      0.96        75
weighted avg       0.96      0.96      0.96        75

Accuracy: 0.9333333333333333
Confusion Matrix:
 [

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [231]:
from sklearn.svm import SVC
# Create an SVM model
svm_model = SVC()

# Fit the model on the training data
svm_model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = svm_model.predict(X_test1)

# Evaluate the model
accuracy1 = accuracy_score(y_test1, y_pred)
conf_matrix = confusion_matrix(y_test1, y_pred)
classification_rep = classification_report(y_test1, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy1)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
svm_model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = svm_model.predict(X_test2)

# Evaluate the model
accuracy2 = accuracy_score(y_test2, y_pred)
conf_matrix = confusion_matrix(y_test2, y_pred)
classification_rep = classification_report(y_test2, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy2)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
svm_model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = svm_model.predict(X_test3)

# Evaluate the model
accuracy1 = accuracy_score(y_test3, y_pred)
conf_matrix = confusion_matrix(y_test3, y_pred)
classification_rep = classification_report(y_test3, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy3)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

print("average mean of accuracy three trials:", (accuracy1+accuracy2+accuracy3)/3)


Accuracy: 1.0
Confusion Matrix:
 [[29  0  0]
 [ 0 23  0]
 [ 0  0 23]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        29
           1       1.00      1.00      1.00        23
           2       1.00      1.00      1.00        23

    accuracy                           1.00        75
   macro avg       1.00      1.00      1.00        75
weighted avg       1.00      1.00      1.00        75

Accuracy: 0.9466666666666667
Confusion Matrix:
 [[23  0  0]
 [ 0 24  3]
 [ 0  1 24]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        23
           1       0.96      0.89      0.92        27
           2       0.89      0.96      0.92        25

    accuracy                           0.95        75
   macro avg       0.95      0.95      0.95        75
weighted avg       0.95      0.95      0.95        75

Accuracy: 0.9333333333333333
Confus

In [232]:
from sklearn.ensemble import RandomForestClassifier

# Create a Random Forest model
rf_model = RandomForestClassifier(n_estimators=100)

# Fit the model on the training data
rf_model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = rf_model.predict(X_test1)

# Evaluate the model
accuracy1 = accuracy_score(y_test1, y_pred)
conf_matrix = confusion_matrix(y_test1, y_pred)
classification_rep = classification_report(y_test1, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy1)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
rf_model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = rf_model.predict(X_test2)

# Evaluate the model
accuracy2 = accuracy_score(y_test2, y_pred)
conf_matrix = confusion_matrix(y_test2, y_pred)
classification_rep = classification_report(y_test2, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy2)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)


# Fit the model on the training data
rf_model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = rf_model.predict(X_test3)

# Evaluate the model
accuracy2 = accuracy_score(y_test3, y_pred)
conf_matrix = confusion_matrix(y_test3, y_pred)
classification_rep = classification_report(y_test3, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy3)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

print("average mean of accuracy three trials:", (accuracy1+accuracy2+accuracy3)/3)

Accuracy: 0.9733333333333334
Confusion Matrix:
 [[29  0  0]
 [ 0 23  0]
 [ 0  2 21]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        29
           1       0.92      1.00      0.96        23
           2       1.00      0.91      0.95        23

    accuracy                           0.97        75
   macro avg       0.97      0.97      0.97        75
weighted avg       0.98      0.97      0.97        75

Accuracy: 0.9466666666666667
Confusion Matrix:
 [[23  0  0]
 [ 0 24  3]
 [ 0  1 24]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        23
           1       0.96      0.89      0.92        27
           2       0.89      0.96      0.92        25

    accuracy                           0.95        75
   macro avg       0.95      0.95      0.95        75
weighted avg       0.95      0.95      0.95        75

Accuracy: 0.93333333

In [233]:

# Create a KNN model with best neighbor found
knn_model = KNeighborsClassifier(n_neighbors=best_neighbor)

# Fit the model on the training data
knn_model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = knn_model.predict(X_test1)

# Evaluate the model
accuracy1 = accuracy_score(y_test1, y_pred)
conf_matrix = confusion_matrix(y_test1, y_pred)
classification_rep = classification_report(y_test1, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy1)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
knn_model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = knn_model.predict(X_test2)

# Evaluate the model
accuracy2 = accuracy_score(y_test2, y_pred)
conf_matrix = confusion_matrix(y_test2, y_pred)
classification_rep = classification_report(y_test2, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy2)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
knn_model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = knn_model.predict(X_test3)

# Evaluate the model
accuracy3 = accuracy_score(y_test3, y_pred)
conf_matrix = confusion_matrix(y_test3, y_pred)
classification_rep = classification_report(y_test3, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy3)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

print("average mean of accuracy three trials:", (accuracy1+accuracy2+accuracy3)/3)

Accuracy: 0.9466666666666667
Confusion Matrix:
 [[29  0  0]
 [ 0 23  0]
 [ 0  4 19]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        29
           1       0.85      1.00      0.92        23
           2       1.00      0.83      0.90        23

    accuracy                           0.95        75
   macro avg       0.95      0.94      0.94        75
weighted avg       0.95      0.95      0.95        75

Accuracy: 0.9333333333333333
Confusion Matrix:
 [[23  0  0]
 [ 0 23  4]
 [ 0  1 24]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        23
           1       0.96      0.85      0.90        27
           2       0.86      0.96      0.91        25

    accuracy                           0.93        75
   macro avg       0.94      0.94      0.94        75
weighted avg       0.94      0.93      0.93        75

Accuracy: 0.96
Confu

In [234]:
# 30% training, 70% testing partition
X_train1, X_test1, y_train1, y_test1 = train_test_split(X, y, test_size=0.7, shuffle= True, random_state=42)
X_train2, X_test2, y_train2, y_test2 = train_test_split(X, y, test_size=0.7, shuffle= True, random_state=10)
X_train3, X_test3, y_train3, y_test3 = train_test_split(X, y, test_size=0.7, shuffle= True, random_state=35)

# Print the shape of the resulting sets
print("X_train shape:", X_train1.shape, X_train2.shape, X_train3.shape)
print("X_test shape:", X_test1.shape, X_test2.shape, X_test3.shape)
print("y_train shape:", y_train1.shape, y_train2.shape, y_train3.shape)
print("y_test shape:", y_test1.shape, y_test2.shape, y_test3.shape)

X_train shape: (45, 4) (45, 4) (45, 4)
X_test shape: (105, 4) (105, 4) (105, 4)
y_train shape: (45,) (45,) (45,)
y_test shape: (105,) (105,) (105,)


In [235]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
model = LinearRegression()


# Fit the model on the training data
model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = model.predict(X_test1)

# Evaluate the model
mse1 = mean_squared_error(y_test1, y_pred)
rmse = np.sqrt(mse1)
r2 = r2_score(y_test1, y_pred)

# Print the evaluation metrics
print("Mean Squared Error1:", mse1)
print("Root Mean Squared Error:", rmse)
print("R-squared:", r2)

# Create a linear regression model
model = LinearRegression()

# Fit the model on the training data
model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = model.predict(X_test2)

# Evaluate the model
mse2 = mean_squared_error(y_test2, y_pred)
rmse = np.sqrt(mse2)
r2 = r2_score(y_test2, y_pred)

# Print the evaluation metrics
print("Mean Squared Error2:", mse2)
print("Root Mean Squared Error:", rmse)
print("R-squared:", r2)

# Create a linear regression model
model = LinearRegression()

# Fit the model on the training data
model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = model.predict(X_test3)

# Evaluate the model
mse3 = mean_squared_error(y_test3, y_pred)
rmse = np.sqrt(mse3)
r2 = r2_score(y_test3, y_pred)

# Print the evaluation metrics
print("Mean Squared Error3:", mse3)
print("Root Mean Squared Error:", rmse)
print("R-squared:", r2)

print("Average Mean Squared Error of 3 trials:", (mse1+mse2+mse3)/3)

Mean Squared Error1: 0.04985988920765808
Root Mean Squared Error: 0.22329328070423007
R-squared: 0.9266668518523973
Mean Squared Error2: 0.051153871247161205
Root Mean Squared Error: 0.22617221590452088
R-squared: 0.9220603329878452
Mean Squared Error3: 0.05294585420762915
Root Mean Squared Error: 0.23009966146787167
R-squared: 0.918061757069187
Average Mean Squared Error of 3 trials: 0.051319871554149486


In [236]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Create a logistic regression model
logistic_model = LogisticRegression()

# Fit the model on the training data
logistic_model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = logistic_model.predict(X_test1)

# Evaluate the model
accuracy1 = accuracy_score(y_test1, y_pred)
conf_matrix = confusion_matrix(y_test1, y_pred)
classification_rep = classification_report(y_test1, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy1)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)


# Create a logistic regression model
logistic_model = LogisticRegression()

# Fit the model on the training data
logistic_model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = logistic_model.predict(X_test2)

# Evaluate the model
accuracy2 = accuracy_score(y_test2, y_pred)
conf_matrix = confusion_matrix(y_test2, y_pred)
classification_rep = classification_report(y_test2, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy2)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)



# Create a logistic regression model
logistic_model = LogisticRegression()

# Fit the model on the training data
logistic_model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = logistic_model.predict(X_test3)

# Evaluate the model
accuracy3 = accuracy_score(y_test3, y_pred)
conf_matrix = confusion_matrix(y_test3, y_pred)
classification_rep = classification_report(y_test3, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy3)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep, )


print("average mean of accuracy three trials:", (accuracy1+accuracy2+accuracy3)/3)

Accuracy: 0.9619047619047619
Confusion Matrix:
 [[40  0  0]
 [ 0 30  3]
 [ 0  1 31]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        40
           1       0.97      0.91      0.94        33
           2       0.91      0.97      0.94        32

    accuracy                           0.96       105
   macro avg       0.96      0.96      0.96       105
weighted avg       0.96      0.96      0.96       105

Accuracy: 0.9714285714285714
Confusion Matrix:
 [[33  0  0]
 [ 0 34  2]
 [ 0  1 35]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        33
           1       0.97      0.94      0.96        36
           2       0.95      0.97      0.96        36

    accuracy                           0.97       105
   macro avg       0.97      0.97      0.97       105
weighted avg       0.97      0.97      0.97       105

Accuracy: 0.95238095

In [237]:
from sklearn.svm import SVC
# Create an SVM model
svm_model = SVC()

# Fit the model on the training data
svm_model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = svm_model.predict(X_test1)

# Evaluate the model
accuracy1 = accuracy_score(y_test1, y_pred)
conf_matrix = confusion_matrix(y_test1, y_pred)
classification_rep = classification_report(y_test1, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy1)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
svm_model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = svm_model.predict(X_test2)

# Evaluate the model
accuracy2 = accuracy_score(y_test2, y_pred)
conf_matrix = confusion_matrix(y_test2, y_pred)
classification_rep = classification_report(y_test2, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy2)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
svm_model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = svm_model.predict(X_test3)

# Evaluate the model
accuracy1 = accuracy_score(y_test3, y_pred)
conf_matrix = confusion_matrix(y_test3, y_pred)
classification_rep = classification_report(y_test3, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy3)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

print("average mean of accuracy three trials:", (accuracy1+accuracy2+accuracy3)/3)


Accuracy: 0.9619047619047619
Confusion Matrix:
 [[40  0  0]
 [ 0 30  3]
 [ 0  1 31]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        40
           1       0.97      0.91      0.94        33
           2       0.91      0.97      0.94        32

    accuracy                           0.96       105
   macro avg       0.96      0.96      0.96       105
weighted avg       0.96      0.96      0.96       105

Accuracy: 0.9619047619047619
Confusion Matrix:
 [[33  0  0]
 [ 0 33  3]
 [ 0  1 35]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        33
           1       0.97      0.92      0.94        36
           2       0.92      0.97      0.95        36

    accuracy                           0.96       105
   macro avg       0.96      0.96      0.96       105
weighted avg       0.96      0.96      0.96       105

Accuracy: 0.95238095

In [238]:
from sklearn.ensemble import RandomForestClassifier

# Create a Random Forest model
rf_model = RandomForestClassifier(n_estimators=100)

# Fit the model on the training data
rf_model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = rf_model.predict(X_test1)

# Evaluate the model
accuracy1 = accuracy_score(y_test1, y_pred)
conf_matrix = confusion_matrix(y_test1, y_pred)
classification_rep = classification_report(y_test1, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy1)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
rf_model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = rf_model.predict(X_test2)

# Evaluate the model
accuracy2 = accuracy_score(y_test2, y_pred)
conf_matrix = confusion_matrix(y_test2, y_pred)
classification_rep = classification_report(y_test2, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy2)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)


# Fit the model on the training data
rf_model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = rf_model.predict(X_test3)

# Evaluate the model
accuracy2 = accuracy_score(y_test3, y_pred)
conf_matrix = confusion_matrix(y_test3, y_pred)
classification_rep = classification_report(y_test3, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy3)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

print("average mean of accuracy three trials:", (accuracy1+accuracy2+accuracy3)/3)

Accuracy: 0.9428571428571428
Confusion Matrix:
 [[40  0  0]
 [ 0 30  3]
 [ 0  3 29]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        40
           1       0.91      0.91      0.91        33
           2       0.91      0.91      0.91        32

    accuracy                           0.94       105
   macro avg       0.94      0.94      0.94       105
weighted avg       0.94      0.94      0.94       105

Accuracy: 0.9523809523809523
Confusion Matrix:
 [[33  0  0]
 [ 0 33  3]
 [ 0  2 34]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        33
           1       0.94      0.92      0.93        36
           2       0.92      0.94      0.93        36

    accuracy                           0.95       105
   macro avg       0.95      0.95      0.95       105
weighted avg       0.95      0.95      0.95       105

Accuracy: 0.95238095

In [239]:

# Create a KNN model with best neighbor found
knn_model = KNeighborsClassifier(n_neighbors=best_neighbor)

# Fit the model on the training data
knn_model.fit(X_train1, y_train1)

# Make predictions on the test set
y_pred = knn_model.predict(X_test1)

# Evaluate the model
accuracy1 = accuracy_score(y_test1, y_pred)
conf_matrix = confusion_matrix(y_test1, y_pred)
classification_rep = classification_report(y_test1, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy1)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
knn_model.fit(X_train2, y_train2)

# Make predictions on the test set
y_pred = knn_model.predict(X_test2)

# Evaluate the model
accuracy2 = accuracy_score(y_test2, y_pred)
conf_matrix = confusion_matrix(y_test2, y_pred)
classification_rep = classification_report(y_test2, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy2)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

# Fit the model on the training data
knn_model.fit(X_train3, y_train3)

# Make predictions on the test set
y_pred = knn_model.predict(X_test3)

# Evaluate the model
accuracy3 = accuracy_score(y_test3, y_pred)
conf_matrix = confusion_matrix(y_test3, y_pred)
classification_rep = classification_report(y_test3, y_pred)

# Print the evaluation metrics
print("Accuracy:", accuracy3)
print("Confusion Matrix:\n", conf_matrix)
print("Classification Report:\n", classification_rep)

print("average mean of accuracy three trials:", (accuracy1+accuracy2+accuracy3)/3)

Accuracy: 0.9619047619047619
Confusion Matrix:
 [[40  0  0]
 [ 0 31  2]
 [ 0  2 30]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        40
           1       0.94      0.94      0.94        33
           2       0.94      0.94      0.94        32

    accuracy                           0.96       105
   macro avg       0.96      0.96      0.96       105
weighted avg       0.96      0.96      0.96       105

Accuracy: 0.9523809523809523
Confusion Matrix:
 [[33  0  0]
 [ 0 33  3]
 [ 0  2 34]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        33
           1       0.94      0.92      0.93        36
           2       0.92      0.94      0.93        36

    accuracy                           0.95       105
   macro avg       0.95      0.95      0.95       105
weighted avg       0.95      0.95      0.95       105

Accuracy: 0.94285714