In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, accuracy_score
import warnings
warnings.filterwarnings('ignore')

In [2]:
class ModelTraining:
    def train(self, X_train, y_train):
        raise NotImplementedError

    def predict(self, X_test):
        raise NotImplementedError

    def evaluate(self, y_test, y_pred):
        raise NotImplementedError




In [3]:
class KNNModel(ModelTraining):
    def __init__(self, n_neighbors=5, p=2):
        self.classifier = KNeighborsClassifier(n_neighbors=n_neighbors, p=p)

    def train(self, X_train, y_train):
        self.classifier.fit(X_train, y_train)

    def predict(self, X_test):
        return self.classifier.predict(X_test)

    def evaluate(self, y_test, y_pred):
        cm = confusion_matrix(y_test, y_pred)
        ac = accuracy_score(y_test, y_pred)
        return cm, ac





In [4]:
class SVMModel(ModelTraining):
    def __init__(self, kernel='rbf'):
        self.classifier = SVC(kernel=kernel)

    def train(self, X_train, y_train):
        self.classifier.fit(X_train, y_train)

    def predict(self, X_test):
        return self.classifier.predict(X_test)

    def evaluate(self, y_test, y_pred):
        accuracy = accuracy_score(y_test, y_pred)
        return accuracy





In [6]:
class DataPreprocessing:
    def __init__(self, dataset):
        self.dataset = dataset

    def handle_missing_data(self, columns, strategy='mean'):
        for column in columns:
            if strategy == 'mean':
                mean_value = self.dataset[column].mean()
                self.dataset[column] = self.dataset[column].fillna(mean_value)
        return self.dataset

    def feature_scaling(self, X_train, X_test):
        scaler = StandardScaler()
        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)
        return X_train, X_test




In [7]:
class Plotting:
    @staticmethod
    def scatter_plot(X, Y, xlabel, ylabel, title):
        plt.figure(figsize=(10, 6))
        plt.scatter(X[Y == 0, 1], X[Y == 0, 0], color='red', label='Not Purchased')
        plt.scatter(X[Y == 1, 1], X[Y == 1, 0], color='green', label='Purchased')
        plt.xlabel(xlabel)
        plt.ylabel(ylabel)
        plt.title(title)
        plt.legend()
        plt.grid(True)
        plt.show()

    @staticmethod
    def box_plot(df, columns):
        plt.figure(figsize=(24, 20))
        for i, column in enumerate(columns, 1):
            plt.subplot(4, 2, i)
            df.boxplot(column=column)
            plt.ylabel(column)
        plt.show()



In [8]:
class MainExecution:
    def __init__(self, model, dataset, features, target):
        self.model = model
        self.dataset = dataset
        self.features = features
        self.target = target

    def execute(self):
        # Splitting the dataset into the Training set and Test set
        X = self.dataset[self.features].values
        Y = self.dataset[self.target].values
        X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.60, random_state=0)

        # Feature Scaling
        preprocessing = DataPreprocessing(self.dataset)
        X_train, X_test = preprocessing.feature_scaling(X_train, X_test)

        # Training the model
        self.model.train(X_train, y_train)

        # Predicting the Test set results
        y_pred = self.model.predict(X_test)

        # Evaluating the results
        return self.model.evaluate(y_test, y_pred)




In [9]:
# Running the KNN Model
knn_dataset = pd.read_csv('knn_data.csv')
label_encoder = LabelEncoder()
# Fit and transform the 'gender' column
knn_dataset['Gender'] = label_encoder.fit_transform(knn_dataset['Gender'])
knn_execution = MainExecution(
    model=KNNModel(n_neighbors=5, p=2),
    dataset=knn_dataset,
    features=['Age', 'Gender', 'EstimatedSalary'],
    target='Purchased'  # Single target column name
)
knn_cm, knn_accuracy = knn_execution.execute()
print("KNN Confusion Matrix:\n", knn_cm)
print("KNN Accuracy:", knn_accuracy)





KNN Confusion Matrix:
 [[137  10]
 [ 22  71]]
KNN Accuracy: 0.8666666666666667


In [10]:
# Running the SVM Model
svm_dataset = pd.read_csv('svm_data.csv')
svm_dataset.columns = ['IP Mean', 'IP Sd', 'IP Kurtosis', 'IP Skewness', 
                       'DM-SNR Mean', 'DM-SNR Sd', 'DM-SNR Kurtosis', 'DM-SNR Skewness', 'target_class']

# Handle missing data
preprocessing = DataPreprocessing(svm_dataset)
svm_dataset = preprocessing.handle_missing_data(
    columns=['IP Mean', 'IP Sd', 'IP Kurtosis', 'IP Skewness', 
             'DM-SNR Mean', 'DM-SNR Sd', 'DM-SNR Kurtosis', 'DM-SNR Skewness']
)

# Define the feature columns and target column
features = svm_dataset.drop(['target_class'], axis=1)
target = 'target_class'

svm_execution = MainExecution(
    model=SVMModel(kernel='rbf'),
    dataset=svm_dataset,
    features=features.columns.tolist(),  # Convert DataFrame columns to list
    target=target  # Single target column name
)

# Execute the SVM model and print accuracy
svm_accuracy = svm_execution.execute()
print("SVM Accuracy:", svm_accuracy)

SVM Accuracy: 0.9768524677397898
