# Bonus

In [13]:
train_path = "data_train.csv"
validation_path = "data_validation.csv"

import pandas as pd
import numpy as np

df_train = pd.read_csv(train_path)
df_validation = pd.read_csv(validation_path)

nominal_columns = [
    'blue',
    'dual_sim',
    'four_g',
    'three_g',
    'touch_screen',
    'wifi',
    'price_range' # ordinal are included as well, target value
]

ratio_columns = [
    'battery_power',
    'clock_speed',
    'fc',
    'int_memory',
    'm_dep',
    'mobile_wt',
    'n_cores',
    'pc',
    'px_height',
    'px_width',
    'ram',
    'sc_h',
    'sc_w',
    'talk_time'
]

In [14]:
from typing import List

def standardize_data(df: pd.DataFrame, columns: List[str]):
        '''
        Melakukan standarisasi dari data yang diberikan pada kolom yang diberikan dari data tersebut, lalu mengembalikan hasilnya
        
        Parameters:
                df (Dataframe) : Data yang ingin dinormalisasi
                columns (List[str]) : Kolom dari df yang ingin dinormalisasi

        Returns:
                df_standardized (Dataframe): salinan dari df yang telah dinormalisasi pada kolom yang diberikan
        '''
        df_standardized = df.copy()

        for col in columns:
            mean = df[col].mean()
            std = df[col].std()
            df_standardized[col] = (df[col] - mean)/std 

        return df_standardized

In [15]:
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB, MultinomialNB, CategoricalNB, BernoulliNB

x_train = df_train.drop(['price_range'], axis=1)
y_train = df_train['price_range']

x_train_standardized = standardize_data(x_train, ratio_columns)

x_validation = df_validation.drop(['price_range'], axis=1)
y_validation = df_validation['price_range']

# TEST WITH KNN
model = KNeighborsClassifier(n_neighbors=17, weights='distance')
model.fit(x_train, y_train)
knn_accuracy = accuracy_score(y_validation, model.predict(x_validation))

print(f"KNN Accuracy {knn_accuracy}")

# TEST WITH KNN : Normalized
model = KNeighborsClassifier(n_neighbors=17, weights='distance')
model.fit(x_train_standardized, y_train)
knn_accuracy = accuracy_score(y_validation, model.predict(standardize_data(x_validation, ratio_columns)))

print(f"KNN Normalized Accuracy {knn_accuracy}")

# TEST WITH NAIVE BAYES (Gaussian)
model_gaussian = GaussianNB()
model_gaussian.fit(x_train, y_train)
gaussian_accuracy = accuracy_score(y_validation, model_gaussian.predict(x_validation))
print(f"Gaussian accuracy {gaussian_accuracy}")

# Test with Naive Bayes (Multinomial)
model_multinomial = MultinomialNB()
model_multinomial.fit(x_train, y_train)
multinomial_acc = accuracy_score(y_validation, model_multinomial.predict(x_validation))
print(f"Multinomial accuracy {multinomial_acc}")

# Test with Naive Bayes (Categorical)
model_categorical = CategoricalNB()
model_categorical.fit(x_train, y_train)
categorical_acc = accuracy_score(y_validation, model_categorical.predict(x_validation))
print(f"Categorical accuracy {categorical_acc}")


# Test with Naive Bayes (Multinomial)
model_bernoulli = BernoulliNB()
model_bernoulli.fit(x_train, y_train)
bernoulli_acc = accuracy_score(y_validation, model_bernoulli.predict(x_validation))
print(f"Bernoulli accuracy {bernoulli_acc}")

KNN Accuracy 0.9366666666666666
KNN Normalized Accuracy 0.685
Gaussian accuracy 0.7816666666666666
Multinomial accuracy 0.5316666666666666
Categorical accuracy 0.2966666666666667
Bernoulli accuracy 0.23333333333333334
