## Import Dependencies

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os

In [None]:
from numpy.random import seed
seed(42)

In [None]:
from sklearn.neighbors import KNeighborsClassifier
import tensorflow
tensorflow.keras.__version__

## Access and Format Data

In [None]:
#Read in CSV
chrome_df = pd.read_csv(os.path.join("Cleaned_Data", "chromatic.csv"))
chrome_df.head()

In [None]:
chrome_df["Sub_Region"].nunique()

In [None]:
chrome_df.drop(columns=['Latitude', 'Longitude', 'Country', 'Region' ])
chrome_df = chrome_df.drop(columns=['Latitude', 'Longitude', 'Country', 'Region'])

## One-Hot Encoding

In [None]:
# Step 0: Reformat data
data = chrome_df.values
X = data[:, 0:115]
y = data[:, 116]

In [None]:
from sklearn.preprocessing import LabelEncoder

# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y)
encoded_y = label_encoder.transform(y)

In [None]:
from tensorflow.keras.utils import to_categorical

# Step 2: One-hot encoding
one_hot_y = to_categorical(encoded_y)
one_hot_y

In [None]:
for label, original_class in zip(encoded_y, y):
    print('Original Class: ' + str(original_class))
    print('Encoded Label: ' + str(label))
    print('-' * 12)

## Training and Testing Sets

In [None]:
#Change strings values to float
chrome_df["Sub_Region"] = pd.to_numeric(chrome_df.Sub_Region, errors="coerce")
chrome_df.dtypes

In [None]:
#create training and testing data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

## Shape the Data

In [None]:
print('X_train Shape:', X_train.shape)
print('y_train Shape:', y_train.shape)
print('X_test Shape:', X_test.shape)
print('y_test Shape:', y_test.shape)

In [None]:
# Create a StandardScater model and fit it to the training dat
from sklearn.preprocessing import StandardScaler
X_scaler = StandardScaler().fit(X_train)

In [None]:
#scale both training and testing models
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
#K Nearest Neighbors
# Loop through different k values to see which has the highest accuracy
# Note: We only use odd numbers because we don't want any ties
train_scores = []
test_scores = []
for k in range(1, 16, 2):
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train_scaled, y_train)
    train_score = knn.score(X_train_scaled, y_train)
    test_score = knn.score(X_test_scaled, y_test)
    train_scores.append(train_score)
    test_scores.append(test_score)
    print(f"k: {k}, Train/Test Score: {train_score:.3f}/{test_score:.3f}")
    
    
plt.plot(range(1, 16, 2), train_scores, marker='o')
plt.plot(range(1, 16, 2), test_scores, marker="x")
plt.xlabel("k neighbors")
plt.ylabel("Testing accuracy Score")
plt.title("Testing Accuracy - Chromatic Dataset")
plt.savefig("static/images/chromatic_knn.png", transparent=True)

In [None]:
# Note that k: 9 provides the best accuracy where the classifier starts to stablize
knn = KNeighborsClassifier(n_neighbors=9)
knn.fit(X_train_scaled, y_train)
print('k=9 Test Acc: %.3f' % knn.score(X_test_scaled, y_test))

In [None]:
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import Dense

In [None]:
# # Create model and add layers
# model = Sequential()
# model.add(Dense(units=100, activation='relu', input_dim=20))
# model.add(Dense(units=100, activation='relu'))
# model.add(Dense(units=2, activation='softmax'))

In [None]:
# # Compile and fit the model
# model.compile(optimizer='adam',
#               loss='categorical_crossentropy',
#               metrics=['accuracy'])

In [None]:
# model.summary()

In [None]:
# model.fit(
#     X_train_scaled,
#     y_test,
#     epochs=60,
#     shuffle=True,
#     verbose=2
# )

In [None]:
# model_loss, model_accuracy = model.evaluate(
#     X_test_scaled, y_test_categorical, verbose=2)
# print(
#     f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
# encoded_predictions = model.predict_classes(X_test_scaled[:5])
# prediction_labels = label_encoder.inverse_transform(encoded_predictions)

In [None]:
# print(f"Predicted classes: {prediction_labels}")
# print(f"Actual Labels: {list(y_test[:5])}")