### 6C

In [12]:
import pandas as pd
# Read data from file 'diabetes.csv'
# (in the same directory that your python process is based)
# Control delimiters, rows, column names with read_csv (see later)
data = pd.read_csv(r"C:\Users\khali\Downloads\student_pocketmoney.csv")
# Preview the first 5 lines of the loaded data
print(data.head())

     Name  Age  Pocket Money Dormitory
0   Aizah   18          98.0        No
1    Bela   17          87.0       Yes
2  Cherry   16          66.0        No
3    Dhea   18          50.0        No
4  Ellisa   19           NaN       Yes


In [13]:
# Importing necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.neighbors import KNeighborsClassifier
from scipy.stats import mode

# K Nearest Neighbors Classification
class K_Nearest_Neighbors_Classifier():
    
    def __init__(self, K):
        self.K = K
        
    # Function to store training set
    def fit(self, X_train, Y_train):
        self.X_train = X_train
        self.Y_train = Y_train
        # no_of_training_examples, no_of_features
        self.m, self.n = X_train.shape
    
    # Function for prediction
    def predict(self, X_test):
        self.X_test = X_test
        # no_of_test_examples, no_of_features
        self.m_test, self.n = X_test.shape
        # initialize Y_predict
        Y_predict = np.zeros(self.m_test)
        for i in range(self.m_test):
            x = self.X_test[i]
            # find the K nearest neighbors from current test example
            neighbors = np.zeros(self.K)
            neighbors = self.find_neighbors(x)
            # most frequent class in K neighbors
            Y_predict[i] = mode(neighbors, keepdims=True)[0][0]
        return Y_predict

    # Function to find the K nearest neighbors to current test example
    def find_neighbors(self, x):
        # calculate all the euclidean distances between current
        # test example x and training set X_train
        euclidean_distances = np.zeros(self.m)
        for i in range(self.m):
            d = self.euclidean(x, self.X_train[i])
            euclidean_distances[i] = d
        # sort Y_train according to euclidean_distance_array and
        # store into Y_train_sorted
        inds = euclidean_distances.argsort()
        Y_train_sorted = self.Y_train[inds]
        return Y_train_sorted[:self.K]
    
    # Function to calculate euclidean distance
    def euclidean(self, x, x_train):
        return np.sqrt(np.sum(np.square(x - x_train)))

# Driver code
def main():
    # Importing dataset
    df = pd.read_csv(r"C:\Users\khali\Downloads\student_pocketmoney.csv")
    
    # Handling missing values
    imputer = SimpleImputer(strategy='mean')
    df[df.select_dtypes(include=[np.number]).columns] = imputer.fit_transform(df.select_dtypes(include=[np.number]))

    # Identifying categorical and numeric columns
    categorical_columns = df.select_dtypes(include=['object']).columns
    numeric_columns = df.select_dtypes(exclude=['object']).columns

    # Encoding categorical features
    encoder = OneHotEncoder(sparse_output=False)
    X_categorical = encoder.fit_transform(df[categorical_columns])

    # Scaling numeric features
    scaler = StandardScaler()
    X_numeric = scaler.fit_transform(df[numeric_columns])

    # Combining the encoded categorical features and scaled numeric features
    X = np.concatenate([X_numeric, X_categorical], axis=1)
    
    # Encoding the target variable
    label_encoder = LabelEncoder()
    Y = label_encoder.fit_transform(df.iloc[:, -1].values)  # Assuming the last column is the target

    # Splitting dataset into train and test set
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=1/3, random_state=0)
    
    # Model training
    model = K_Nearest_Neighbors_Classifier(K=3)
    model.fit(X_train, Y_train)
    
    model1 = KNeighborsClassifier(n_neighbors=3)
    model1.fit(X_train, Y_train)
    
    # Prediction on test set
    Y_pred = model.predict(X_test)
    Y_pred1 = model1.predict(X_test)
    
    # Measure performance
    correctly_classified = 0
    correctly_classified1 = 0
    count = 0
    
    for count in range(np.size(Y_pred)):
        if Y_test[count] == Y_pred[count]:
            correctly_classified += 1
        if Y_test[count] == Y_pred1[count]:
            correctly_classified1 += 1
        count += 1
        
    print("Accuracy on test set by our model     : ", (correctly_classified / count) * 100)
    print("Accuracy on test set by sklearn model : ", (correctly_classified1 / count) * 100)
    
if __name__ == "__main__":
    main()


Accuracy on test set by our model     :  75.0
Accuracy on test set by sklearn model :  75.0


In [14]:
#The accuracy achieved by our model and sklearn is equal which indicates the correct implementation of our model.

### 6D

In [17]:
# Importing libraries for regression
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer

# K Nearest Neighbors Regression
class K_Nearest_Neighbors_Regressor:
    def __init__(self, K):
        self.K = K

    # Function to store training set
    def fit(self, X_train, Y_train):
        self.X_train = X_train
        self.Y_train = Y_train
        # no_of_training_examples, no_of_features
        self.m, self.n = X_train.shape

    # Function for prediction
    def predict(self, X_test):
        self.X_test = X_test
        # no_of_test_examples, no_of_features
        self.m_test, self.n = X_test.shape
        # initialize Y_predict
        Y_predict = np.zeros(self.m_test)
        for i in range(self.m_test):
            x = self.X_test[i]
            # find the K nearest neighbors from current test example
            neighbors = self.find_neighbors(x)
            # calculate the mean of K nearest neighbors
            Y_predict[i] = np.mean(neighbors)
        return Y_predict

    # Function to find the K nearest neighbors to current test example
    def find_neighbors(self, x):
        # calculate all the euclidean distances between current test
        # example x and training set X_train
        euclidean_distances = np.zeros(self.m)
        for i in range(self.m):
            d = self.euclidean(x, self.X_train[i])
            euclidean_distances[i] = d
        # sort Y_train according to euclidean_distance_array and
        # store into Y_train_sorted
        inds = euclidean_distances.argsort()
        Y_train_sorted = self.Y_train[inds]
        return Y_train_sorted[:self.K]

    # Function to calculate euclidean distance
    def euclidean(self, x, x_train):
        return np.sqrt(np.sum(np.square(x - x_train)))

# Driver code
def main():
    # Importing dataset
    df = pd.read_csv(r"C:\Users\khali\Downloads\student_pocketmoney.csv")

    # Checking for categorical columns and encoding them
    categorical_columns = df.select_dtypes(include=['object']).columns
    if len(categorical_columns) > 0:
        encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
        X_encoded = encoder.fit_transform(df[categorical_columns])
        df = df.drop(columns=categorical_columns)
        df = pd.concat([df, pd.DataFrame(X_encoded, index=df.index)], axis=1)

    # Splitting dataset into features and target
    X = df.iloc[:, :-1].values
    Y = df.iloc[:, -1].values

    # Handling missing values by imputing them with the mean
    imputer = SimpleImputer(strategy='mean')
    X = imputer.fit_transform(X)

    # Splitting dataset into train and test set
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=1/3, random_state=0)

    # Model training
    model = K_Nearest_Neighbors_Regressor(K=3)
    model.fit(X_train, Y_train)

    model1 = KNeighborsRegressor(n_neighbors=3)
    model1.fit(X_train, Y_train)

    # Prediction on test set
    Y_pred = model.predict(X_test)
    Y_pred1 = model1.predict(X_test)

    print("Predicted values by our model     : ", np.round(Y_pred[:3], 2))
    print("Predicted values by sklearn model : ", np.round(Y_pred1[:3], 2))
    print("Real values                      : ", Y_test[:3])

if __name__ == "__main__":
    main()


Predicted values by our model     :  [0.33 0.33 0.33]
Predicted values by sklearn model :  [0.33 0.33 0.33]
Real values                      :  [0. 0. 1.]
