<a href="https://colab.research.google.com/github/drstannwoji2019/AITools_LRL_NLP/blob/main/ISEM565SupervisedCodeRes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
import numpy as np
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

# Load the dataset
file_path = '/Car.csv'
data = pd.read_csv(file_path)

# Separate features and target variable
X = data.drop(['car_ID', 'price', 'CarName'], axis=1)
y = data['price']

# Define bins and labels
bins = [0, 10000, 20000, np.inf]
labels = ['Low', 'Medium', 'High']

# Create a new categorical target variable
y_binned = pd.cut(y, bins=bins, labels=labels)

# Identify categorical and numerical columns
categorical_cols = X.select_dtypes(include=['object']).columns
numerical_cols = X.select_dtypes(include=['int64', 'float64']).columns

# Preprocessing for numerical data: Imputation and Standardization
numerical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())])

# Preprocessing for categorical data: Imputation and One-hot Encoding
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

# Combine preprocessors
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_cols),
        ('cat', categorical_transformer, categorical_cols)])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_binned, test_size=0.2, random_state=42)

# Apply the preprocessing steps to the training and testing data
X_train = preprocessor.fit_transform(X_train)
X_test = preprocessor.transform(X_test)

# Train the ANN model
ann_model = MLPClassifier(hidden_layer_sizes=(100,), max_iter=1000, random_state=42)
ann_model.fit(X_train, y_train)
y_pred_ann = ann_model.predict(X_test)

# Train the SVM model
svm_model = SVC(kernel='rbf')
svm_model.fit(X_train, y_train)
y_pred_svm = svm_model.predict(X_test)

# Train the Decision Tree model
tree_model = DecisionTreeClassifier(random_state=42)
tree_model.fit(X_train, y_train)
y_pred_tree = tree_model.predict(X_test)

# Define a function to evaluate the models
def evaluate_model(y_test, y_pred):
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average='weighted')
    precision = precision_score(y_test, y_pred, average='weighted')
    sensitivity = recall_score(y_test, y_pred, average='weighted')
    return accuracy, f1, precision, sensitivity

# Evaluate the ANN model
acc_ann, f1_ann, prec_ann, sens_ann = evaluate_model(y_test, y_pred_ann)

# Evaluate the SVM model
acc_svm, f1_svm, prec_svm, sens_svm = evaluate_model(y_test, y_pred_svm)

# Evaluate the Decision Tree model
acc_tree, f1_tree, prec_tree, sens_tree = evaluate_model(y_test, y_pred_tree)

# Print the results
print(f"ANN Model: Accuracy = {acc_ann}, F1 Score = {f1_ann}, Precision = {prec_ann}, Sensitivity = {sens_ann}")
print(f"SVM Model: Accuracy = {acc_svm}, F1 Score = {f1_svm}, Precision = {prec_svm}, Sensitivity = {sens_svm}")
print(f"Decision Tree Model: Accuracy = {acc_tree}, F1 Score = {f1_tree}, Precision = {prec_tree}, Sensitivity = {sens_tree}")


ANN Model: Accuracy = 0.8780487804878049, F1 Score = 0.8795497185741088, Precision = 0.8836772983114447, Sensitivity = 0.8780487804878049
SVM Model: Accuracy = 0.9024390243902439, F1 Score = 0.9044024608403508, Precision = 0.9117305458768873, Sensitivity = 0.9024390243902439
Decision Tree Model: Accuracy = 0.926829268292683, F1 Score = 0.928794782453319, Precision = 0.9414634146341464, Sensitivity = 0.926829268292683
