# Import Useful Modules 

In [7]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from time import gmtime, strftime
import time
import datetime
from collections import Counter
import pickle

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Preprocessing
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

# Embedder
from gensim.models import FastText

# Classifier
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier as GBC
from sklearn.linear_model import LogisticRegression as LR
from sklearn.ensemble import RandomForestClassifier as RFC
from sklearn.grid_search import GridSearchCV as GS

from keras.models import Sequential, load_model
from keras.layers import Dense, Activation, Dropout
from keras.utils import to_categorical

# ---------------------------------------------------------------------------------------------------------------

# Import Model

In [51]:
with open("model/version_1/preprocessing_pipeline.pickle", "rb") as file:
    preprocessing_pipeline = pickle.load(file)
    
with open("model/version_1/word_embedder.pickle", "rb") as file:
    word_embedder = pickle.load(file)

with open("model/version_1/classifier_lr.pickle", "rb") as file:
    logistic_regression = pickle.load(file)

with open("model/version_1/classifier_svm.pickle", "rb") as file:
    support_vector_machine = pickle.load(file)
    
neural_network = load_model("model/version_1/classifer_nn(keras).h5")

## Check

#### Preprocessing

In [52]:
preprocessing_pipeline

{'preprocess_data': <function __main__.preprocess_data>,
 'remove_parentheses': <function __main__.remove_parentheses>,
 'vectorize_sentence': <function __main__.vectorize_sentence>,
 'vectorize_word': <function __main__.vectorize_word>}

#### Word Embedding

In [13]:
word_embedder

<gensim.models.fasttext.FastText at 0x284c0c8400>

#### Classifier

In [14]:
logistic_regression

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='multinomial',
          n_jobs=-1, penalty='l2', random_state=None, solver='newton-cg',
          tol=0.0001, verbose=0, warm_start=True)

In [15]:
support_vector_machine

SVC(C=10, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.01, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [12]:
neural_network

<keras.models.Sequential at 0x284032b748>

# ---------------------------------------------------------------------------------------------------------------

# Read Data To Pandas Dataframe

In [17]:
# data untuk klasifikasi kategori produk
data_for_classification=pd.read_csv("data/product_data.csv")

In [40]:
data_for_classification.head()

Unnamed: 0,Product Id,Category Id,Category Name,Product Title
0,114628582,2,Desktop,PC HP Pavillion 251VGA-i5(4460) with LED-20
1,640569762,2,Desktop,Unik SanDisk Flashdisk 64GB Ultra USB 3 3 0 Fl...
2,757005547,2,Desktop,Paket Spek PC Agan Bagas 2
3,758393462,2,Desktop,PAket Spek PC Agan JP Wogo
4,757008997,2,Desktop,PC HP All In One AIO 20 C303D


### Preprocess Data

In [55]:
#preprocess product title to 100-dimensional vector
#and preprocess category name to integer label
embedded_data, label_encoder = preprocessing_pipeline["preprocess_data"](
    data_for_classification["Product Title"],
    data_for_classification["Category Name"],
    word_embedder.vector_size,
    word_embedder
)

  del sys.path[0]


In [59]:
embedded_data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,91,92,93,94,95,96,97,98,99,Labels
0,-1.353583,-3.1719,1.371736,0.123707,-0.62016,1.464381,1.22238,-4.205549,-0.133768,-2.833227,...,3.284703,2.795318,-0.21433,0.750985,-1.134297,-2.441089,-0.397881,-0.623074,-0.85346,60
1,-1.411664,-3.624061,-0.124707,1.157138,-2.769813,0.722179,1.752806,-6.023485,1.032547,-3.752555,...,5.373015,2.577363,1.723452,0.312116,1.192225,-4.973745,3.088582,0.075678,3.797503,60
2,-1.210998,-1.52154,-0.512093,1.162975,-0.206553,0.776194,1.275206,-1.973089,-0.176616,-2.115182,...,1.901303,1.018535,0.845923,0.409504,-0.487097,-1.452161,0.795725,-1.173649,1.033765,60
3,-0.62024,-1.366821,-0.797583,1.449532,-0.178919,0.559059,1.577674,-2.021882,-0.063093,-1.323352,...,1.7254,0.725442,0.841356,0.316991,-0.575546,-1.296193,0.435206,-0.329237,0.069929,60
4,-0.494332,-4.235569,0.269955,-0.220324,0.461205,1.288682,1.095711,-5.439537,1.180161,-2.108301,...,5.4325,1.697883,-0.479877,-0.55601,-1.109091,-0.888318,0.026072,0.021216,0.143521,60


In [60]:
label_encoder

LabelEncoder()

# ---------------------------------------------------------------------------------------------------------------