# Import Useful Modules 

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from time import gmtime, strftime
import time
import datetime
from collections import Counter
import pickle

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Preprocessing
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

# Embedder
from gensim.models import FastText

# Classifier
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier as GBC
from sklearn.linear_model import LogisticRegression as LR
from sklearn.ensemble import RandomForestClassifier as RFC
from sklearn.grid_search import GridSearchCV as GS
from sklearn.model_selection import validation_curve, learning_curve
from sklearn.decomposition import PCA
from sklearn.preprocessing import PolynomialFeatures as Poly

from keras.models import Sequential, load_model
from keras.layers import Dense, Activation, Dropout
from keras.utils import to_categorical

In [None]:
#Determine Model's File Location

version = "version_2"

# ---------------------------------------------------------------------------------------------------------------

# Import Model

In [None]:
from preprocessing_pipeline import preprocessing

In [None]:
with open("model/{}/word_embedder.pickle".format(version), "rb") as file:
    word_embedder = pickle.load(file)

neural_network = load_model("model/{}/neural_network.h5".format(version))

In [None]:
def predict_product_class(query):
    pre=preprocessing_pipeline.remove_parentheses(query)
    embedded_query=preprocessing_pipeline.vectorize_sentence(pre,100,word_embedder).reshape(-1,100)
    prediction=robust.predict_classes(embedded_query)
    class_prediction=large_label_encoder.inverse_transform(prediction[0])
    return class_prediction

## Check

#### Preprocessing

In [None]:
preprocessor=preprocessing(word_embedder.vector_size,word_embedder)

#### Word Embedding

In [None]:
word_embedder

#### Classifier

In [None]:
neural_network

# ---------------------------------------------------------------------------------------------------------------

# Read Data To Pandas Dataframe

In [None]:
large_data_for_classification=pd.read_csv("data/big.csv",header=None)
large_data_for_classification.dropna(axis=0,inplace=True)

In [None]:
large_data_for_classification.head()

### Preprocess Data

In [None]:
#preprocess product title to 100-dimensional vector
#and preprocess category name to integer label
large_embedded_data, large_label_encoder = preprocessor.preprocess_data(
    large_data_for_classification[1],
    large_data_for_classification[0]
)

In [None]:
large_embedded_data.head()

In [None]:
large_embedded_data.shape

In [None]:
large_label_encoder

In [None]:
with open("model/{}/label_encoder.pickle".format(version), "wb") as file:
    pickle.dump(large_label_encoder,file)

# ---------------------------------------------------------------------------------------------------------------

### Test

In [None]:
model=neural_network
data=large_embedded_data.copy()
sampled_embedded_data=data.sample(n=len(data))

In [None]:
nn_X_train,nn_X_test,nn_y_train,nn_y_test=train_test_split(sampled_embedded_data.drop("Labels",axis=1),to_categorical(sampled_embedded_data["Labels"]),test_size=0.2)

In [None]:
def accuracy(predicted,truth):
    result=[int(value) for value in np.array(predicted)==np.array(truth)]
    return sum(result)/len(result)

In [None]:
nn_y_truth=[np.argmax(value) for value in nn_y_test]
nn_y_pred=[np.argmax(value) for value in model.predict(nn_X_test)]
print("Validation Accuracy : {}".format(accuracy(nn_y_pred,nn_y_truth)))

# ---------------------------------------------------------------------------------------------------------------