# Import Useful Modules 

In [65]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from time import gmtime, strftime
import time
import datetime
from collections import Counter
import pickle

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Preprocessing
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

# Embedder
from gensim.models import FastText

# Classifier
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier as GBC
from sklearn.linear_model import LogisticRegression as LR
from sklearn.ensemble import RandomForestClassifier as RFC
from sklearn.grid_search import GridSearchCV as GS
from sklearn.model_selection import validation_curve, learning_curve
from sklearn.decomposition import PCA
from sklearn.preprocessing import PolynomialFeatures as Poly

from keras.models import Sequential, load_model
from keras.layers import Dense, Activation, Dropout
from keras.utils import to_categorical
from keras.optimizers import SGD

import tqdm

In [2]:
#Determine Model's File Location

version = "version_5"

# ---------------------------------------------------------------------------------------------------------------

# Import Model

In [3]:
from preprocessing_pipeline import preprocessing

In [4]:
with open("model/{}/word_embedder_20_new.pickle".format(version), "rb") as file:
    word_embedder = pickle.load(file)

## Check

#### Preprocessing

In [5]:
preprocessor=preprocessing(word_embedder.vector_size,word_embedder)
preprocessor

<preprocessing_pipeline.preprocessing at 0x4658c38a58>

#### Word Embedding

In [6]:
word_embedder

<gensim.models.fasttext.FastText at 0x4658c38c50>

# ---------------------------------------------------------------------------------------------------------------

# Read Data To Pandas Dataframe

In [7]:
large_data_for_classification=pd.read_csv("data/big.csv",header=None)
large_data_for_classification.dropna(axis=0,inplace=True)

In [8]:
large_data_for_classification.head()

Unnamed: 0,0,1,2
0,hardware,KINGSTON+KVR1333D3N9,1510.0
1,musik,power+amplifier+wisdom+,62.0
2,outwear-motor,jas%20hujan%20anak,391.0
3,celana,Celana+bahan+formal,288.0
4,komputer,Preset+lightroom,1.0


In [13]:
raw_category_mapper=pd.read_csv("category_mapping.csv",index_col=0)

category_mapper={}
for i in raw_category_mapper.index:
    category_mapper[raw_category_mapper["l2"][i]]=raw_category_mapper["l1"][i]

In [14]:
new_category=[category_mapper[value] for value in large_data_for_classification[0]]
large_data_for_classification[0]=new_category

In [15]:
large_data_for_classification.head()

Unnamed: 0,0,1,2
0,komputer,KINGSTON+KVR1333D3N9,1510.0
1,hobi_dan_koleksi,power+amplifier+wisdom+,62.0
2,motor,jas%20hujan%20anak,391.0
3,fashion_wanita,Celana+bahan+formal,288.0
4,komputer,Preset+lightroom,1.0


In [16]:
#preprocess product title to 100-dimensional vector
#and preprocess category name to integer label
large_embedded_data, large_label_encoder = preprocessor.preprocess_data(
    large_data_for_classification[1],
    large_data_for_classification[0],
)

REMOVING PUNCTUATIONS


100%|█████████████████████████████████████████████████████████████████████████| 396099/396099 [00:04<00:00, 87591.32it/s]


CONVERTING SENTENCE TO VECTOR


100%|█████████████████████████████████████████████████████████████████████████| 396099/396099 [00:14<00:00, 27776.24it/s]


SAVE VECTOR TO PANDAS DATAFRAME


100%|██████████████████████████████████████████████████████████████████████████████████| 100/100 [00:16<00:00,  6.20it/s]


In [17]:
large_embedded_data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,91,92,93,94,95,96,97,98,99,Labels
0,-1.012349,-0.75877,-0.350302,0.166808,-0.75108,-0.460484,-0.09217,-0.56502,-0.285008,-0.081576,...,-0.280283,-0.186133,-0.410584,0.1545,-0.208072,0.005299,1.112637,0.402039,0.414983,10
1,-0.795659,-2.895371,-1.744382,0.395833,-0.306053,0.551503,-0.205609,-1.9929,-0.347923,0.280318,...,-0.375908,0.806441,0.564788,1.876183,1.148381,0.799,0.305677,-0.64342,0.623411,6
2,-0.960375,-0.504543,0.515864,0.896507,0.344629,0.50543,-1.599178,-1.249479,-1.578761,2.026936,...,-0.161255,0.004695,-1.989833,0.951701,0.752408,-1.261439,1.029763,0.189695,0.723305,12
3,-2.369716,-0.911321,-0.494526,-0.126446,-0.021067,0.283687,0.012714,-0.269755,-0.109006,0.688226,...,0.985987,0.006689,-1.561995,0.029477,0.27267,1.021256,1.340632,0.985763,0.906968,3
4,-1.159122,-0.447368,-0.592817,0.131752,-1.350843,0.111322,-0.583496,-1.801566,0.54063,0.376193,...,0.004681,-0.056971,-0.586562,1.2876,0.441914,-0.338393,0.84119,1.215016,1.141698,10


In [18]:
large_embedded_data["sum"]=large_embedded_data.drop(["Labels"],axis=1).sum(axis=1)
large_embedded_data=large_embedded_data.loc[large_embedded_data["sum"]!=0].drop("sum",axis=1)

In [19]:
large_embedded_data.shape

(392825, 101)

In [20]:
large_label_encoder

LabelEncoder()

#### Product Title Only

In [231]:
def read_product_title_data():
    product_title_only=pd.read_fwf('data/products2m.txt',header=None)
    product_title_only["Product Title"]=product_title_only[0]
    product_title_only=product_title_only[["Product Title"]]
    product_title_only.dropna(inplace=True,axis=0)

    dummy_category=[0 for element in tqdm.tqdm(product_title_only["Product Title"])]

    embedded_product_title, dummy_encoder = preprocessor.preprocess_data(
        product_title_only["Product Title"],
        dummy_category,
    )
    
    embedded_product_title["sum"]=embedded_product_title.drop(["Labels"],axis=1).sum(axis=1)
    embedded_product_title=embedded_product_title.loc[embedded_product_title["sum"]!=0].drop("sum",axis=1)
    embedded_product_title.drop("Labels",axis=1,inplace=True)
    
    return embedded_product_title

In [None]:
product_title=read_product_title_data()

100%|█████████████████████████████████████████████████████████████████████| 1938532/1938532 [00:01<00:00, 1839555.39it/s]


REMOVING PUNCTUATIONS


100%|███████████████████████████████████████████████████████████████████████| 1938532/1938532 [00:41<00:00, 46917.38it/s]


CONVERTING SENTENCE TO VECTOR


 83%|████████████████████████████████████████████████████████████            | 1617825/1938532 [02:48<00:33, 9580.41it/s]

# ---------------------------------------------------------------------------------------------------------------

### Build Neural Network

In [175]:
def accuracy(predicted,truth):
    result=[int(value) for value in np.array(predicted)==np.array(truth)]
    return sum(result)/len(result)

In [193]:
def evaluate_semi_supervised(dataset):
    accepted_predictions=[]
    for index in tqdm.tqdm(dataset.index):
        predictions=[dataset["Prediction 1"][index],
                            dataset["Prediction 2"][index],
                            dataset["Prediction 3"][index],
                            dataset["Prediction 4"][index],
                            dataset["Prediction 5"][index],
                            dataset["Prediction 6"][index],
                            dataset["Prediction 7"][index],
                            dataset["Prediction 8"][index],
                            dataset["Prediction 9"][index],
                            dataset["Prediction 10"][index]]
        if(len(set(predictions))==1):
            accepted_predictions.append(dataset["Prediction 1"][index])
        else:
            accepted_predictions.append(-1)
    dataset["Success"]=accepted_predictions
    
    base_truth=dataset["Base Truth"]
    prediction=dataset["Success"]
    
    count=0
    total=0
    for index,pred in tqdm.tqdm(enumerate(prediction)):
        if(pred!=-1):
            total+=1
            if(pred==base_truth[index]):
                count+=1
    
    return count, total, len(dataset)    

In [178]:
data=large_embedded_data.copy()

sampled_embedded_data=data.sample(n=len(data))

nn_X_train,nn_X_test,nn_y_train,nn_y_test=train_test_split(sampled_embedded_data.drop("Labels",axis=1),to_categorical(sampled_embedded_data["Labels"]),test_size=0.1)

In [179]:
features_1=nn_X_train[[*range(0,10)]]
validation_features_1=nn_X_test[[*range(0,10)]]

features_2=nn_X_train[[*range(10,20)]]
validation_features_2=nn_X_test[[*range(10,20)]]

features_3=nn_X_train[[*range(20,30)]]
validation_features_3=nn_X_test[[*range(20,30)]]

features_4=nn_X_train[[*range(30,40)]]
validation_features_4=nn_X_test[[*range(30,40)]]

features_5=nn_X_train[[*range(40,50)]]
validation_features_5=nn_X_test[[*range(40,50)]]

features_6=nn_X_train[[*range(50,60)]]
validation_features_6=nn_X_test[[*range(50,60)]]

features_7=nn_X_train[[*range(60,70)]]
validation_features_7=nn_X_test[[*range(60,70)]]

features_8=nn_X_train[[*range(70,80)]]
validation_features_8=nn_X_test[[*range(70,80)]]

features_9=nn_X_train[[*range(80,90)]]
validation_features_9=nn_X_test[[*range(80,90)]]

features_10=nn_X_train[[*range(90,100)]]
validation_features_10=nn_X_test[[*range(90,100)]]

labels=nn_y_train
validation_labels=nn_y_test

In [180]:
model_1 = Sequential()
model_1.add(Dense(750, input_shape=(10,), activation='relu'))
model_1.add(Dense(500, activation='relu'))
model_1.add(Dense(20, activation='softmax'))

model_1.compile(optimizer="Adagrad", loss='categorical_crossentropy',metrics=['accuracy'])

history = model_1.fit(features_1, labels, epochs=5, batch_size=100, validation_data=(validation_features_1,validation_labels), shuffle=True, verbose=2)

Train on 353542 samples, validate on 39283 samples
Epoch 1/5
 - 51s - loss: 1.9332 - acc: 0.4221 - val_loss: 1.8544 - val_acc: 0.4447
Epoch 2/5
 - 49s - loss: 1.8097 - acc: 0.4600 - val_loss: 1.7942 - val_acc: 0.4635
Epoch 3/5
 - 50s - loss: 1.7600 - acc: 0.4750 - val_loss: 1.7555 - val_acc: 0.4761
Epoch 4/5
 - 49s - loss: 1.7263 - acc: 0.4856 - val_loss: 1.7306 - val_acc: 0.4850
Epoch 5/5
 - 49s - loss: 1.7006 - acc: 0.4932 - val_loss: 1.7112 - val_acc: 0.4907


In [181]:
model_2 = Sequential()
model_2.add(Dense(750, input_shape=(10,), activation='relu'))
model_2.add(Dense(500, activation='relu'))
model_2.add(Dense(20, activation='softmax'))

model_2.compile(optimizer="Adagrad", loss='categorical_crossentropy',metrics=['accuracy'])

history = model_2.fit(features_2, labels, epochs=5, batch_size=100, validation_data=(validation_features_2,validation_labels), shuffle=True, verbose=2)

Train on 353542 samples, validate on 39283 samples
Epoch 1/5
 - 50s - loss: 1.9047 - acc: 0.4257 - val_loss: 1.8371 - val_acc: 0.4513
Epoch 2/5
 - 50s - loss: 1.7941 - acc: 0.4649 - val_loss: 1.7817 - val_acc: 0.4706
Epoch 3/5
 - 50s - loss: 1.7450 - acc: 0.4821 - val_loss: 1.7407 - val_acc: 0.4884
Epoch 4/5
 - 50s - loss: 1.7115 - acc: 0.4935 - val_loss: 1.7128 - val_acc: 0.4950
Epoch 5/5
 - 50s - loss: 1.6854 - acc: 0.5018 - val_loss: 1.6913 - val_acc: 0.5018


In [182]:
model_3 = Sequential()
model_3.add(Dense(750, input_shape=(10,), activation='relu'))
model_3.add(Dense(500, activation='relu'))
model_3.add(Dense(20, activation='softmax'))

model_3.compile(optimizer="Adagrad", loss='categorical_crossentropy',metrics=['accuracy'])

history = model_3.fit(features_3, labels, epochs=5, batch_size=100, validation_data=(validation_features_3,validation_labels), shuffle=True, verbose=2)

Train on 353542 samples, validate on 39283 samples
Epoch 1/5
 - 51s - loss: 1.8915 - acc: 0.4258 - val_loss: 1.8135 - val_acc: 0.4528
Epoch 2/5
 - 50s - loss: 1.7806 - acc: 0.4637 - val_loss: 1.7582 - val_acc: 0.4744
Epoch 3/5
 - 50s - loss: 1.7350 - acc: 0.4792 - val_loss: 1.7306 - val_acc: 0.4840
Epoch 4/5
 - 50s - loss: 1.7037 - acc: 0.4893 - val_loss: 1.7046 - val_acc: 0.4957
Epoch 5/5
 - 50s - loss: 1.6792 - acc: 0.4977 - val_loss: 1.6845 - val_acc: 0.4994


In [183]:
model_4 = Sequential()
model_4.add(Dense(750, input_shape=(10,), activation='relu'))
model_4.add(Dense(500, activation='relu'))
model_4.add(Dense(20, activation='softmax'))

model_4.compile(optimizer="Adagrad", loss='categorical_crossentropy',metrics=['accuracy'])

history = model_4.fit(features_4, labels, epochs=5, batch_size=100, validation_data=(validation_features_4,validation_labels), shuffle=True, verbose=2)

Train on 353542 samples, validate on 39283 samples
Epoch 1/5
 - 51s - loss: 1.9237 - acc: 0.4266 - val_loss: 1.8495 - val_acc: 0.4541
Epoch 2/5
 - 50s - loss: 1.8103 - acc: 0.4643 - val_loss: 1.7899 - val_acc: 0.4737
Epoch 3/5
 - 50s - loss: 1.7612 - acc: 0.4809 - val_loss: 1.7554 - val_acc: 0.4860
Epoch 4/5
 - 50s - loss: 1.7276 - acc: 0.4917 - val_loss: 1.7270 - val_acc: 0.4939
Epoch 5/5
 - 50s - loss: 1.7017 - acc: 0.4992 - val_loss: 1.7058 - val_acc: 0.4996


In [184]:
model_5 = Sequential()
model_5.add(Dense(750, input_shape=(10,), activation='relu'))
model_5.add(Dense(500, activation='relu'))
model_5.add(Dense(20, activation='softmax'))

model_5.compile(optimizer="Adagrad", loss='categorical_crossentropy',metrics=['accuracy'])

history = model_5.fit(features_5, labels, epochs=5, batch_size=100, validation_data=(validation_features_5,validation_labels), shuffle=True, verbose=2)

Train on 353542 samples, validate on 39283 samples
Epoch 1/5
 - 51s - loss: 1.8235 - acc: 0.4499 - val_loss: 1.7464 - val_acc: 0.4753
Epoch 2/5
 - 50s - loss: 1.7171 - acc: 0.4854 - val_loss: 1.6918 - val_acc: 0.4933
Epoch 3/5
 - 50s - loss: 1.6724 - acc: 0.4993 - val_loss: 1.6614 - val_acc: 0.5037
Epoch 4/5
 - 50s - loss: 1.6416 - acc: 0.5094 - val_loss: 1.6346 - val_acc: 0.5121
Epoch 5/5
 - 50s - loss: 1.6179 - acc: 0.5176 - val_loss: 1.6179 - val_acc: 0.5192


In [185]:
model_6 = Sequential()
model_6.add(Dense(750, input_shape=(10,), activation='relu'))
model_6.add(Dense(500, activation='relu'))
model_6.add(Dense(20, activation='softmax'))

model_6.compile(optimizer="Adagrad", loss='categorical_crossentropy',metrics=['accuracy'])

history = model_6.fit(features_6, labels, epochs=5, batch_size=100, validation_data=(validation_features_6,validation_labels), shuffle=True, verbose=2)

Train on 353542 samples, validate on 39283 samples
Epoch 1/5
 - 51s - loss: 1.8481 - acc: 0.4425 - val_loss: 1.7844 - val_acc: 0.4658
Epoch 2/5
 - 50s - loss: 1.7432 - acc: 0.4773 - val_loss: 1.7300 - val_acc: 0.4865
Epoch 3/5
 - 50s - loss: 1.6973 - acc: 0.4920 - val_loss: 1.6994 - val_acc: 0.4972
Epoch 4/5
 - 50s - loss: 1.6655 - acc: 0.5030 - val_loss: 1.6723 - val_acc: 0.5017
Epoch 5/5
 - 50s - loss: 1.6406 - acc: 0.5111 - val_loss: 1.6534 - val_acc: 0.5119


In [186]:
model_7 = Sequential()
model_7.add(Dense(750, input_shape=(10,), activation='relu'))
model_7.add(Dense(500, activation='relu'))
model_7.add(Dense(20, activation='softmax'))

model_7.compile(optimizer="Adagrad", loss='categorical_crossentropy',metrics=['accuracy'])

history = model_7.fit(features_7, labels, epochs=5, batch_size=100, validation_data=(validation_features_7,validation_labels), shuffle=True, verbose=2)

Train on 353542 samples, validate on 39283 samples
Epoch 1/5
 - 50s - loss: 1.8907 - acc: 0.4294 - val_loss: 1.8207 - val_acc: 0.4538
Epoch 2/5
 - 50s - loss: 1.7809 - acc: 0.4646 - val_loss: 1.7652 - val_acc: 0.4737
Epoch 3/5
 - 50s - loss: 1.7352 - acc: 0.4788 - val_loss: 1.7293 - val_acc: 0.4845
Epoch 4/5
 - 49s - loss: 1.7042 - acc: 0.4893 - val_loss: 1.7055 - val_acc: 0.4948
Epoch 5/5
 - 50s - loss: 1.6798 - acc: 0.4971 - val_loss: 1.6854 - val_acc: 0.4991


In [187]:
model_8 = Sequential()
model_8.add(Dense(750, input_shape=(10,), activation='relu'))
model_8.add(Dense(500, activation='relu'))
model_8.add(Dense(20, activation='softmax'))

model_8.compile(optimizer="Adagrad", loss='categorical_crossentropy',metrics=['accuracy'])

history = model_8.fit(features_8, labels, epochs=5, batch_size=100, validation_data=(validation_features_8,validation_labels), shuffle=True, verbose=2)

Train on 353542 samples, validate on 39283 samples
Epoch 1/5
 - 51s - loss: 1.9596 - acc: 0.4086 - val_loss: 1.8781 - val_acc: 0.4370
Epoch 2/5
 - 50s - loss: 1.8423 - acc: 0.4474 - val_loss: 1.8173 - val_acc: 0.4590
Epoch 3/5
 - 50s - loss: 1.7932 - acc: 0.4646 - val_loss: 1.7824 - val_acc: 0.4712
Epoch 4/5
 - 50s - loss: 1.7600 - acc: 0.4758 - val_loss: 1.7563 - val_acc: 0.4773
Epoch 5/5
 - 50s - loss: 1.7343 - acc: 0.4844 - val_loss: 1.7356 - val_acc: 0.4849


In [188]:
model_9 = Sequential()
model_9.add(Dense(750, input_shape=(10,), activation='relu'))
model_9.add(Dense(500, activation='relu'))
model_9.add(Dense(20, activation='softmax'))

model_9.compile(optimizer="Adagrad", loss='categorical_crossentropy',metrics=['accuracy'])

history = model_9.fit(features_9, labels, epochs=5, batch_size=100, validation_data=(validation_features_9,validation_labels), shuffle=True, verbose=2)

Train on 353542 samples, validate on 39283 samples
Epoch 1/5
 - 51s - loss: 1.8910 - acc: 0.4277 - val_loss: 1.8146 - val_acc: 0.4535
Epoch 2/5
 - 50s - loss: 1.7804 - acc: 0.4638 - val_loss: 1.7574 - val_acc: 0.4738
Epoch 3/5
 - 50s - loss: 1.7331 - acc: 0.4792 - val_loss: 1.7247 - val_acc: 0.4831
Epoch 4/5
 - 50s - loss: 1.7004 - acc: 0.4894 - val_loss: 1.6994 - val_acc: 0.4915
Epoch 5/5
 - 50s - loss: 1.6752 - acc: 0.4975 - val_loss: 1.6783 - val_acc: 0.5005


In [189]:
model_10 = Sequential()
model_10.add(Dense(750, input_shape=(10,), activation='relu'))
model_10.add(Dense(500, activation='relu'))
model_10.add(Dense(20, activation='softmax'))

model_10.compile(optimizer="Adagrad", loss='categorical_crossentropy',metrics=['accuracy'])

history = model_10.fit(features_10, labels, epochs=5, batch_size=100, validation_data=(validation_features_10,validation_labels), shuffle=True, verbose=2)

Train on 353542 samples, validate on 39283 samples
Epoch 1/5
 - 52s - loss: 1.9392 - acc: 0.4109 - val_loss: 1.8803 - val_acc: 0.4298
Epoch 2/5
 - 51s - loss: 1.8243 - acc: 0.4502 - val_loss: 1.8158 - val_acc: 0.4549
Epoch 3/5
 - 51s - loss: 1.7757 - acc: 0.4666 - val_loss: 1.7785 - val_acc: 0.4685
Epoch 4/5
 - 51s - loss: 1.7428 - acc: 0.4773 - val_loss: 1.7505 - val_acc: 0.4736
Epoch 5/5
 - 51s - loss: 1.7173 - acc: 0.4858 - val_loss: 1.7325 - val_acc: 0.4800


In [194]:
evaluate_semi_supervised(semi_supervised_prediction)

100%|████████████████████████████████████████████████████████████████████████████| 39283/39283 [00:07<00:00, 5606.54it/s]
39283it [00:00, 306083.11it/s]


(6531, 6856, 39283)

In [211]:
def semi_supervised_classification_report(dataset):
    accepted_predictions=[]
    for index in tqdm.tqdm(dataset.index):
        predictions=[dataset["Prediction 1"][index],
                            dataset["Prediction 2"][index],
                            dataset["Prediction 3"][index],
                            dataset["Prediction 4"][index],
                            dataset["Prediction 5"][index],
                            dataset["Prediction 6"][index],
                            dataset["Prediction 7"][index],
                            dataset["Prediction 8"][index],
                            dataset["Prediction 9"][index],
                            dataset["Prediction 10"][index]]
        if(len(set(predictions))==1):
            accepted_predictions.append(dataset["Prediction 1"][index])
        else:
            accepted_predictions.append(-1)
    dataset["Success"]=accepted_predictions
    
    base_truth=dataset["Base Truth"]
    prediction=dataset["Success"]
    
    taken_base_truth=[]
    taken_prediction=[]
    for index,pred in tqdm.tqdm(enumerate(prediction)):
        if(pred!=-1):
            taken_base_truth.append(base_truth[index])
            taken_prediction.append(pred)
            
    
    print(classification_report(taken_base_truth,taken_prediction))

In [None]:
def semi_supervised_dataset(model,features):
    semi_supervised_prediction=pd.DataFrame()
    print("PREDICTION 1")
    semi_supervised_prediction["Prediction 1"]=[np.argmax(value) for value in tqdm.tqdm(model[0].predict(features[[*range(0,10)]]))]
    print("PREDICTION 2")
    semi_supervised_prediction["Prediction 2"]=[np.argmax(value) for value in tqdm.tqdm(model[1].predict(features[[*range(10,20)]]))]
    print("PREDICTION 3")
    semi_supervised_prediction["Prediction 3"]=[np.argmax(value) for value in tqdm.tqdm(model[2].predict(features[[*range(20,30)]]))]
    print("PREDICTION 4")
    semi_supervised_prediction["Prediction 4"]=[np.argmax(value) for value in tqdm.tqdm(model[3].predict(features[[*range(30,40)]]))]
    print("PREDICTION 5")
    semi_supervised_prediction["Prediction 5"]=[np.argmax(value) for value in tqdm.tqdm(model[4].predict(features[[*range(40,50)]]))]
    print("PREDICTION 6")
    semi_supervised_prediction["Prediction 6"]=[np.argmax(value) for value in tqdm.tqdm(model[5].predict(features[[*range(50,60)]]))]
    print("PREDICTION 7")
    semi_supervised_prediction["Prediction 7"]=[np.argmax(value) for value in tqdm.tqdm(model[6].predict(features[[*range(60,70)]]))]
    print("PREDICTION 8")
    semi_supervised_prediction["Prediction 8"]=[np.argmax(value) for value in tqdm.tqdm(model[7].predict(features[[*range(70,80)]]))]
    print("PREDICTION 9")
    semi_supervised_prediction["Prediction 9"]=[np.argmax(value) for value in tqdm.tqdm(model[8].predict(features[[*range(80,90)]]))]
    print("PREDICTION 10")
    semi_supervised_prediction["Prediction 10"]=[np.argmax(value) for value in tqdm.tqdm(model[9].predict(features[[*range(90,100)]]))]
    
    result_dataset=features.copy()
    
    accepted_predictions=[]
    
    print("ENSEMBLING")
    for index in tqdm.tqdm(semi_supervised_prediction.index):
        predictions=[semi_supervised_prediction["Prediction {}".format(i)][index] for i in range(1,10+1)]
        
        if(len(set(predictions))==1):
            accepted_predictions.append(semi_supervised_prediction["Prediction 1"][index])
        else:
            accepted_predictions.append(-1)
            
    result_dataset["Labels"]=accepted_predictions
    
    result_dataset=result_dataset.loc[result_dataset["Labels"]!=-1]
    
    return result_dataset
    

In [None]:
models=[model_1,model_2,model_3,model_4,model_5,model_6,model_7,model_8,model_9,model_10]

In [None]:
additional_dataset = semi_supervised_dataset(product_,embedded_product_title)

# ---------------------------------------------------------------------------------------------------------------

In [None]:
nn_y_truth=[np.argmax(value) for value in validation_labels]
nn_y_pred=[np.argmax(value) for value in model_1.predict(validation_features_1)]
print("Validation Accuracy : {}".format(accuracy(nn_y_pred,nn_y_truth)))

In [None]:
model_1.save("level1_semsup_1.h5")
model_2.save("level1_semsup_2.h5")
model_3.save("level1_semsup_3.h5")
model_4.save("level1_semsup_4.h5")
model_5.save("level1_semsup_5.h5")
model_6.save("level1_semsup_6.h5")
model_7.save("level1_semsup_7.h5")
model_8.save("level1_semsup_8.h5")
model_9.save("level1_semsup_9.h5")
model_10.save("level1_semsup_10.h5")