## Imports

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from scipy.stats import uniform

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report, roc_curve, auc
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.utils.class_weight import compute_class_weight

from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Input, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping


## Preprocessing


In [None]:
# Read Data

df = pd.read_csv('../Data/wine_data.csv')

print(f'{df.head(10)}\n')
print(f'Shape: {df.shape}')

In [None]:
sns.countplot(x="Cultivar", data=df)

In [None]:
df.describe()

In [None]:
# Split Dataset

X = df.drop('Cultivar', axis=1).values
y = df.Cultivar.values

print(X.shape)
print(y.shape)

In [None]:
# Create train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=70, random_state=1)

print(f'Shape test set: {X_test.shape}')

In [None]:
# Normalising
scaler = StandardScaler().fit(X_train)

X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

## Logistic Regression

In [None]:
# Training

logreg_model = LogisticRegression(class_weight='balanced')

paramaters = {'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],
              'C': uniform(0.0001, 20)
             }

nmb_iterations = 50
max_nmb_cross_validation = 10

# Randomized search for the best parameters
for nmb_cross_validation in range(max_nmb_cross_validation):

    logreg = RandomizedSearchCV(estimator = logreg_model, 
                               param_distributions = paramaters,
                               n_iter = nmb_iterations,
                               scoring = 'accuracy',
                               cv = nmb_cross_validation + 2,
                               n_jobs = -1,
                               verbose = 1)

    logreg = logreg.fit(X_train, y_train)
    
    print(f'Best estimator: \u001b[36;1m{logreg.best_estimator_}\u001b[0m')
    print(f'Best accuracy: \u001b[32;1m{logreg.best_score_}\u001b[0m')
    

In [None]:
# Testing

y_pred = logreg.predict(X_test)

print(f'{classification_report(y_test, y_pred)}\n')
print(f'Accuracy: \u001b[32;1m{accuracy_score(y_test, y_pred) * 100}\u001b[0m \n')
print(f'Confusion Matrix:\n{confusion_matrix(y_test, y_pred)}\n') 

## Random Forest

In [None]:
number_of_trees = 1000
max_number_of_features = 2

rfc = RandomForestClassifier(n_estimators=number_of_trees, max_features=max_number_of_features)
rfc.fit(X_train, y_train)

In [None]:
y_pred = rfc.predict(X_test)

print(f'{classification_report(y_test, y_pred)}\n')
print(f'Accuracy: \u001b[32;1m{accuracy_score(y_test, y_pred) * 100}\u001b[0m \n')
print(f'Confusion Matrix:\n{confusion_matrix(y_test, y_pred)}\n') 

## Ensemble methodes

In [None]:
# Adaboost

adaboost = AdaBoostClassifier(n_estimators=150,learning_rate=0.9)
adaboost.fit(X_train,y_train)

In [None]:
y_pred = adaboost.predict(X_test)

print(f'{classification_report(y_test, y_pred)}\n')
print(f'Accuracy: \u001b[32;1m{accuracy_score(y_test, y_pred) * 100}\u001b[0m \n')
print(f'Confusion Matrix:\n{confusion_matrix(y_test, y_pred)}\n') 

## Neural Network

In [None]:
# Preprocessing

y_train_hot = to_categorical(y_train)

In [None]:
# Create a neural network

unique_classes = len(df.Cultivar.unique())
input_shape = X_train.shape[1]

dropoutrate = 0.5

neural_network = Sequential([
    Input(shape=(input_shape,)), 
    Dense(20, activation='relu'),
    Dropout(dropoutrate),
    Dense(20, activation='relu'),
    Dropout(dropoutrate),
    Dense(unique_classes, activation='sigmoid')
])

neural_network.compile(optimizer=Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# Training

epochs = 1000

early_stopping = EarlyStopping(monitor='val_loss', patience=2)

class_weights = compute_class_weight('balanced', np.unique(y_train), y_train)
class_weights = dict(enumerate(class_weights))

history = neural_network.fit(X_train, y_train_hot, epochs=epochs , batch_size=32, validation_split=0.2, class_weight=class_weights, callbacks=[early_stopping], verbose=1)

In [None]:
# Plot history

plt.plot(history.history["loss"], label="Training Loss")
plt.plot(history.history["val_loss"], label="Validation Loss")

plt.legend()

plt.show()

In [None]:
# Testing

y_pred = neural_network.predict_classes(X_test)

print(f'{classification_report(y_test, y_pred)}\n')
print(f'Accuracy: \u001b[32;1m{accuracy_score(y_test, y_pred) * 100}\u001b[0m \n')
print(f'Confusion Matrix:\n{confusion_matrix(y_test, y_pred)}\n') 