In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from spacy.lang.es import Spanish
from spacy.lang.pt import Portuguese
from spacy.tokenizer import Tokenizer
from sklearn.metrics import balanced_accuracy_score
from multiprocessing import  Pool
import numpy as np
import fasttext
import csv
import unicodedata
from collections import defaultdict
from joblib import Parallel, delayed
from sklearn.utils import resample
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
pd.set_option('display.max_rows', 200)

## Preprocess

In [88]:
# Load Dataset
data_train = pd.read_csv('../data/train.csv')
data_test = pd.read_csv('../data/test.csv')


In [26]:
def normalize_text(text,nlp):
    s = []
    for tok in nlp.tokenizer(text.lower()):
        if not tok.is_stop:
            if tok.is_alpha and not (tok.is_digit or len(tok.text) == 1):
                if not tok.is_ascii:
                    tok = ''.join(c for c in unicodedata.normalize('NFD', tok.text.lower()) if unicodedata.category(c) != 'Mn')
                    s.append(tok)
                else:
                    s.append(tok.text)
    if not s:
        return "emptystring"
    else:
        s = ' '.join(s)
        return s

In [27]:
nlp_es = Spanish()
nlp_pt = Portuguese()
normalize_text("Disfraz De General Grievous Para Adultos, Talla: Única,", nlp_es)

'disfraz grievous adultos talla unica'

In [None]:
tk = nlp_es.tokenizer('para, 1983, de con 18te ')
for t in tk:
    print("Text:", t.text)
    print("Is Stop: ", t.is_stop)
    print("Is Ascii: ", t.is_ascii)
    print("Is Alpha: ", t.is_alpha)
    print("Is Digit: ", t.is_digit)

In [4]:
def parallelize_dataframe(df, func, n_cores=8):
    df_split = np.array_split(df, n_cores)
    pool = Pool(n_cores)
    df = pd.concat(pool.map(func, df_split))
    pool.close()
    pool.join()
    return df

In [42]:
def preprocess(df):
    nlp_es = Spanish()
    nlp_pt = Portuguese()
    mask_spanish    = df["language"] == 'spanish'
    mask_portuguese = df["language"] == 'portuguese'
    df.loc[mask_spanish, "tokens"] = df["title"].apply(normalize_text,args=(nlp_es,))
    df.loc[mask_portuguese, "tokens"] = df["title"].apply(normalize_text,args=(nlp_pt,))
    df["label"] = df["category"].apply(lambda x: '__label__'+ x)
    return df[["label","tokens","language"]]

In [75]:
def preprocess_test(df):
    nlp_es = Spanish()
    nlp_pt = Portuguese()
    mask_spanish    = df["language"] == 'spanish'
    mask_portuguese = df["language"] == 'portuguese'
    df.loc[mask_spanish, "tokens"] = df["title"].apply(normalize_text,args=(nlp_es,))
    df.loc[mask_portuguese, "tokens"] = df["title"].apply(normalize_text,args=(nlp_pt,))
    return df[["id","tokens","language"]]

In [44]:
def create_fasttext_split_files(train_df, test_df, outputfiles):
    # train and validation set files
    train = parallelize_dataframe(train_df, preprocess)
    X_train, X_val, y_train, y_val = train_test_split(train[["tokens","language"]], train["label"], test_size=0.05, random_state=42, stratify=train["label"])
    train_fasttext = pd.concat([y_train,X_train["tokens"]], axis=1)
    val_fasttext = pd.concat([y_val,X_val["tokens"]], axis=1)
    train_fasttext.to_csv(outputfiles[0],index=False, sep=' ', header=False, quoting=csv.QUOTE_NONE, quotechar="", escapechar=" ")
    val_fasttext.to_csv(outputfiles[1],index=False, sep=' ', header=False, quoting=csv.QUOTE_NONE, quotechar="", escapechar=" ")   
    
    X_train["language"].to_csv("../data/train_language_mapping.csv",index=False,line_terminator='\n')
    X_val.to_csv("../data/val_language_mapping.csv",index=False,line_terminator='\n')
    
    #test set file
    if test_df:
        test = parallelize_dataframe(test_df, preprocess_test)
        test["tokens"].to_csv(outputfiles[2],index=False,header=False,line_terminator='\n')
        test[["id","language"]].to_csv('test_language_mappping.csv',index=False,header=False,line_terminator='\n')

In [10]:
%time create_fasttext_split_files(data_train, data_test)

CPU times: user 3min 38s, sys: 21.1 s, total: 3min 59s
Wall time: 26min 7s


In [93]:
X_train, X_val, y_train, y_val = train_test_split(data_train["language"], data_train["category"], test_size=0.05, random_state=42, stratify=data_train["category"])

pandas.core.series.Series

In [None]:
train_language_mapping = X_train
train_language_mapping.to_csv("../data/train_language_mapping.csv",index=False,line_terminator='\n')
val_language_mapping = X_val
val_language_mapping.to_csv("../data/val_language_mapping.csv",index=False,line_terminator='\n')

## Training

In [2]:
%time model = fasttext.train_supervised(input="../data/train_fasttext.csv", epoch=100, lr=0.075, wordNgrams=2, thread=8)

CPU times: user 12h 55min 58s, sys: 22.4 s, total: 12h 56min 20s
Wall time: 1h 37min 34s


In [3]:
%time model.test('../data/val_fasttext.csv')

CPU times: user 4min 26s, sys: 1.38 s, total: 4min 28s
Wall time: 4min 25s


(1000000, 0.881847, 0.881847)

In [None]:
model.predict('bici playera',5)

In [4]:
model.save_model("../models/model2.bin")

In [None]:
## Test

In [None]:
[x[0] for x in predictions[0]]

In [None]:
data_test.columns

In [None]:
%time model2 = fasttext.train_supervised(input="../data/train_fasttext.csv", epoch=10, lr=0.5, wordNgrams=2, loss='hs', thread=8)

In [None]:
def print_results(N, p, r):
    print("N\t" + str(N))
    print("P@{}\t{:.3f}".format(1, p))
    print("R@{}\t{:.3f}".format(1, r))

print_results(*model2.test('../data/test_fasttext.csv'))

In [None]:
%time model2 = fasttext.train_supervised(input="../data/train_fasttext.csv", epoch=5, lr=0.5, wordNgrams=2, loss='hs', thread=8)

In [None]:
def print_results(N, p, r):
    print("N\t" + str(N))
    print("P@{}\t{:.3f}".format(1, p))
    print("R@{}\t{:.3f}".format(1, r))

print_results(*model2.test('../data/test_fasttext.csv'))

In [None]:
%time model3 = fasttext.train_supervised(input="../data/train_fasttext.csv", epoch=5, lr=0.8, wordNgrams=2, loss='hs', thread=8)

In [None]:
def print_results(N, p, r):
    print("N\t" + str(N))
    print("P@{}\t{:.3f}".format(1, p))
    print("R@{}\t{:.3f}".format(1, r))

print_results(*model2.test('../data/test_fasttext.csv'))

## Predict

In [18]:
model = fasttext.load_model("../models/model_norm1.bin")




In [14]:
test_data = pd.read_csv('../data/test_fasttext_norm.txt',names=['tokens'])

In [15]:
test_data.replace(np.nan, 'notitle',inplace=True)

In [19]:
%time predictions = model.predict(test_data["tokens"].values.tolist())

CPU times: user 1min 6s, sys: 164 ms, total: 1min 6s
Wall time: 1min 6s


In [20]:
submission = pd.Series([x[0][9:] for x in predictions[0]])

In [24]:
submission.to_csv("./submissions/submission_test.csv",header=["category"],index_label="id")

In [None]:
submission

## Models By Language

In [26]:
data_train.sample()

Unnamed: 0,title,label_quality,language,category
14794562,Vital Cat V43 Overweight X 750 Gr Mascota Food,unreliable,spanish,CATS_AND_DOGS_FOODS


In [4]:
mask_spanish    = data_train["language"] == 'spanish'
mask_portuguese = data_train["language"] == 'portuguese'

In [5]:
spanish_df = data_train[data_train["language"] == 'spanish']
portuguese_df = data_train[data_train["language"] == 'portuguese']

In [6]:
spanish_test = data_test[data_test["language"] == 'spanish']
portuguese_test = data_test[data_test["language"] == 'portuguese']

In [7]:
print(len(portuguese_df),len(spanish_df))

10000000 10000000


In [14]:
%time create_fasttext_split_files(spanish_df, spanish_test, ["../data/train_fasttext_spanish_norm.csv","../data/val_fasttext_spanish_norm.csv","../data/test_fasttext_spanish_norm.csv"])

CPU times: user 1min 35s, sys: 12.1 s, total: 1min 47s
Wall time: 12min 3s


In [15]:
%time create_fasttext_split_files(portuguese_df, portuguese_test, ["../data/train_fasttext_portuguese_norm.csv","../data/val_fasttext_portuguese_norm.csv","../data/test_fasttext_portuguese_norm.csv"])

CPU times: user 1min 46s, sys: 11.8 s, total: 1min 58s
Wall time: 11min 26s


In [23]:
data_test = pd.read_csv('../data/test.csv')

In [28]:
data_test[3:4]["id"].values[0]

3

In [40]:
model_es = fasttext.load_model("../models/model_spanish_norm.bin")
model_pt = fasttext.load_model("../models/model_portuguese_norm.bin")





In [47]:
model_pt.predict("testing")[0][0]

'__label__DIGITAL_PORTABLE_MEDIA_PLAYERS'

In [68]:
nlp_es = Spanish()
nlp_pt = Portuguese()
from collections import defaultdict

data = {"id": [], "category": []}

for index, row in data_test.iterrows():
    if row["language"] == 'portuguese':
        title = normalize_text(row["title"],nlp_pt)
        category = model_pt.predict(title)[0][0]
    if row["language"] == 'spanish':
        title = normalize_text(row["title"],nlp_es)
        category = model_es.predict(title)[0][0]
    data["id"].append(row["id"])
    data["category"].append(category[9:])
        

In [69]:
submission = pd.DataFrame.from_dict(data)

In [None]:
submission

In [71]:
submission.to_csv("./submissions/submission_multiple_lang_1.csv",header=["id","category"],index=False)

## Error Analysis

In [74]:
best_model=fasttext.load_model("../models/model_norm1.bin")




In [105]:
val_data = pd.read_csv("../data/val_fasttext_norm.csv",nrows=200000,header=None,names=["tokens"])

In [106]:
val_data["category"] = val_data["tokens"].apply(lambda x: x.split()[0][9:])
val_data["title"] = val_data["tokens"].apply(lambda x:  ' '.join(x.split()[1:]))

In [114]:
val_data["predicted"] = ''
val_data["score"] = ''

In [136]:
predictions = {"id": [], "tokens": [], "category": [], "predicted": [], "score": []}

for index, row in val_data.iterrows():
    result = best_model.predict(row["tokens"])
    predictions["id"].append(index)
    predictions["tokens"].append(row["tokens"])
    predictions["category"].append(row["category"])
    predictions["predicted"].append(result[0][0][9:])
    predictions["score"].append(result[1][0])

prediction_data = pd.DataFrame.from_dict(predictions)

In [None]:
prediction_data

In [None]:
data_train = pd.read_csv('../data/train.csv')

In [198]:
X_train, X_val, y_train, y_val = train_test_split(data_train[["title","label_quality","language"]], data_train["category"], test_size=0.05, random_state=42, stratify=data_train["category"])

In [200]:
#prediction_data["original_title"] = X_val[:200000]["title"].values
#prediction_data["label_quality"] = X_val[:200000]["label_quality"].values
prediction_data["language"] = X_val[:200000]["language"].values

In [None]:
prediction_data 

In [189]:
prediction_data.rename(columns={'title':'tokens'}, inplace=True)

In [212]:
errors_data = prediction_data[prediction_data["category"] != prediction_data["predicted"]][["id","original_title","tokens","category","predicted","label_quality","language","score"]].copy()

In [206]:
len(errors_data["category"].unique())

1556

In [205]:
errors_data["language"].value_counts(normalize=True)

spanish       0.507781
portuguese    0.492219
Name: language, dtype: float64

In [None]:
errors_data.head(100)[["original_title","tokens","category","predicted","label_quality"]]

In [207]:
data_train["label_quality"].value_counts(normalize=True)

unreliable    0.940788
reliable      0.059212
Name: label_quality, dtype: float64

In [208]:
errors_data["label_quality"].value_counts(normalize=True)

unreliable    0.960193
reliable      0.039807
Name: label_quality, dtype: float64

In [213]:
errors_data.sample(10)[["original_title","tokens","category","predicted","label_quality","score"]]

Unnamed: 0,original_title,tokens,category,predicted,label_quality,score
120636,Coxim Axial Bucha Pivo Rolamento Da Roda Fies...,coxim axial bucha pivo rolamento da roda fiest...,SUSPENSION_BALL_JOINTS,SHOCK_MOUNT_INSOLATORS,unreliable,0.363446
186122,Correntinha + Pingente Folheado A Ouro 46 Cm,correntinha pingente folheado ouro cm,NECKLACES,CHARMS_AND_MEDALS,unreliable,0.939457
4115,Pés De Mesa / Aparador ( Monte Facil ),pes de mesa aparador monte facil,STOOLS,TV_AND_MONITOR_MOUNTS,unreliable,0.305697
60545,Tubo Do Escapamento Motorm Sailor Yamaha 15hp 2t,tubo do escapamento motorm sailor yamaha,MEMORY_CARDS,MOTORCYCLE_EXHAUSTS,unreliable,0.830055
158944,Lote X 10 Codos Tigre 25 Mm A 45 Grados,lote codos tigre mm grados,CONNECTING_COUPLERS,PIPES_AND_TUBES,unreliable,0.845858
8472,Eixo Bmw 116i - 2014 - Sucata Peças,eixo bmw sucata pecas,AUTOMOTIVE_FRONT_BUMPERS,REAR_WHEEL_HUBS_BEARING_ASSEMBLY,unreliable,0.996451
128025,Fix-30p-d8 Baja Puntuación De La Pantalla De L...,baja puntuacion de la pantalla de la linea de,LCD_DISPLAYS,LAPTOP_LCD_SCREENS,unreliable,0.04392
39842,Niebla Fabricante 12 Led Fogger Niebla Fuente ...,niebla fabricante led fogger niebla fuente agu...,DEHUMIDIFIERS,INSECTICIDES,unreliable,0.069133
44770,Borracha Do Bojo Do Farol Olho De Boi E Parala...,borracha do bojo do farol olho de boi paralama...,AUTOMOTIVE_EMBLEMS,AUTOMOTIVE_WEATHERSTRIPS,reliable,0.596206
35632,Valv Agulha Weber 40,valv agulha weber,TURNTABLE_NEEDLES,CAR_CARBURETORS,unreliable,0.961341


## Reliable Labels Oversampling Model

In [17]:
data_train = pd.read_csv('../data/train.csv')

In [9]:
rlabel = data_train[data_train["label_quality"] == 'reliable'].copy()

In [24]:
urlabel = data_train[data_train["label_quality"] == 'unreliable'].sample(1184245).copy()

In [25]:
data = pd.concat([rlabel,urlabel])

In [None]:
data.category.value_counts()

In [38]:
%time create_fasttext_split_files(data, None, ["../data/train_fasttext_reliable_norm.csv","../data/val_fasttext_reliable_norm.csv"])

CPU times: user 22.1 s, sys: 9.14 s, total: 31.2 s
Wall time: 3min


In [None]:
%time model = fasttext.train_supervised(input="../data/train_fasttext_reliable_norm.csv", epoch=5, lr=0.5, wordNgrams=2, thread=8)

In [50]:
del data
del urlabel
del rlabel

## Ensemble Models

In [67]:
voted = {'id':[1,2,3,4], 
         1: ['PANTS','SHOES','POSTER','TOYS'], 
         2: ['SHORTS','CLASSIC SHOES','POSTER','TOYS'], 
         3: ['SHORTS','SHOES','CHILD POSTER','TOYS'],
         4: ['PANTS','CLASSIC SHOES','POSTER','TOYS'], 
         5: ['SHORTS','OLD SHOES','POSTER','SMALL TOYS']}
testing = pd.DataFrame.from_dict(voted)

In [68]:
testing

Unnamed: 0,id,1,2,3,4,5
0,1,PANTS,SHORTS,SHORTS,PANTS,SHORTS
1,2,SHOES,CLASSIC SHOES,SHOES,CLASSIC SHOES,OLD SHOES
2,3,POSTER,POSTER,CHILD POSTER,POSTER,POSTER
3,4,TOYS,TOYS,TOYS,TOYS,SMALL TOYS


In [86]:
for index, row in testing.iloc[:,1:].iterrows():
    print(row.value_counts().index[0])


SHORTS
SHOES
POSTER
TOYS


In [1]:
# Loading models

In [4]:
def predict_test_ensemble(modelfiles, datafile):
    print("Loading data file...")
    data = pd.read_csv(datafile,names=['title'])
    language_mapping = pd.read_csv('../data/test_language_mapping.csv')
    results_df = pd.DataFrame(data.index.values, columns = ['id']) 
    voted_results = {"id": [], "category": []}

    print("Loading models and predicting Test...")
    for i, file in enumerate(modelfiles):
        print("Loading model file ", file, '...')
        if isinstance(file, dict):
            # Here we predict combining models for each language
            predictions = []
            test_language = pd.concat([data,language_mapping['language']],axis=1)
            model_sp = fasttext.load_model(file["spanish"])
            model_pt = fasttext.load_model(file["portuguese"])
            print("Running predict on test set...")
            for index, row in test_language.iterrows():
                if row["language"] == 'spanish':
                    category = model_sp.predict(row["title"])[0][0]
                if row["language"] == 'portuguese':
                    category = model_pt.predict(row["title"])[0][0]
                predictions.append(category[9:])
            results_df[i] = pd.Series(predictions)
        else:            
            model = fasttext.load_model(file)
            print("Running predict on test set...")
            #print(test_data["tokens"].values.tolist()[0:10])
            predictions = model.predict(data["title"].values.tolist())
            results_df[i] = pd.Series([x[0][9:] for x in predictions[0]])
        print("Predict finished for model ", file)
    print("Finished loading models and making predictions for test set")
  
    print("Counting votes and defining prediction...")         
    for index, row in results_df.iloc[:,1:].iterrows():
        voted_results["id"].append(index)
        voted_results["category"].append(row.value_counts().index[0])
    
    voted_results_df = pd.DataFrame.from_dict(voted_results)
    print("Finished")
    return voted_results_df
    

In [22]:
model_files = [{"spanish": "../models/model_spanish_norm.bin","portuguese":"../models/model_portuguese_norm.bin"},
               "../models/model_norm2.bin","../models/model_norm1.bin","../models/model_reliable_norm1.bin", "../models/model_norm3.bin"]

In [None]:
language_mapping

In [23]:
%time results = predict_test_ensemble(model_files,'../data/test_fasttext_norm.txt')

Loading data file...
Loading models and predicting Test...
Loading model file  {'spanish': '../models/model_spanish_norm.bin', 'portuguese': '../models/model_portuguese_norm.bin'} ...






Running predict on test set...
Predict finished for model  {'spanish': '../models/model_spanish_norm.bin', 'portuguese': '../models/model_portuguese_norm.bin'}
Loading model file  ../models/model_norm2.bin ...
Running predict on test set...





Predict finished for model  ../models/model_norm2.bin
Loading model file  ../models/model_norm1.bin ...





Running predict on test set...
Predict finished for model  ../models/model_norm1.bin
Loading model file  ../models/model_reliable_norm1.bin ...





Running predict on test set...
Predict finished for model  ../models/model_reliable_norm1.bin
Loading model file  ../models/model_norm3.bin ...
Running predict on test set...





Predict finished for model  ../models/model_norm3.bin
Finished loading models and making predictions for test set
Counting votes and defining prediction...
Finished
CPU times: user 10min 42s, sys: 18.6 s, total: 11min
Wall time: 10min 55s


In [24]:
results

Unnamed: 0,id,category
0,0,DIAPER_BAGS
1,1,BABY_CHANGING_PADS
2,2,ENGINE_COOLING_FAN_MOTORS
3,3,AUTOMOTIVE_SHOCK_ABSORBER_BUMP_STOPS
4,4,BABY_CAR_SEATS
...,...,...
246950,246950,VEHICLE_BRAKE_DISCS
246951,246951,WALKIE_TALKIES
246952,246952,CALCULATORS
246953,246953,DINING_SETS


In [25]:
results.to_csv("./submissions/submission_ensemble_3.csv",header=["id","category"],index=False)

In [35]:
val_data = pd.read_csv('../data/val_fasttext_norm.csv',header=None,names=['title'])

In [38]:
val_data['title'] = val_data['title'].apply(lambda x: ' '.join(x.split()[1:]))

In [None]:
val_data

In [5]:
def run_model_val(model):
    val_data = pd.read_csv('../data/val_fasttext_norm.csv',header=None,names=['title'])
    val_data['title'] = val_data['title'].apply(lambda x: ' '.join(x.split()[1:]))
    
    print("Loading model file ", model, '...')
    if isinstance(model, dict):
        # Here we predict combining models for each language
        predictions = []
        language_mapping = pd.read_csv('../data/val_language_mapping.csv',names=["language"])
        val_language = pd.concat([val_data,language_mapping['language']],axis=1)
        model_sp = fasttext.load_model(model["spanish"])
        model_pt = fasttext.load_model(model["portuguese"])
        print("Running predict on val set...")
        for index, row in val_language.iterrows():
            if row["language"] == 'spanish':
                category = model_sp.predict(row["title"])[0][0]
            if row["language"] == 'portuguese':
                category = model_pt.predict(row["title"])[0][0]
            predictions.append(category[9:])
        print("Predict finished for model ", model)
        return pd.Series(predictions)

    else:            
        model = fasttext.load_model(model)
        print("Running predict on val set...")
        predictions = model.predict(val_data["title"].values.tolist())
        print("Predict finished for model ", model)
        return pd.Series([x[0][9:] for x in predictions[0]])

            
def parallel_models_get_val_results(model_files, n_cores=5):
    results = Parallel(n_jobs=n_cores)(delayed(run_model_val)(model) for model in model_files)
    return results

In [6]:
model_files = [{"spanish": "../models/model_spanish_norm200.bin","portuguese":"../models/model_portuguese_norm200.bin"},
               "../models/model_norm1.bin","../models/model_norm2.bin","../models/model_norm3.bin","../models/model_reliable_norm1.bin"]

%time results = parallel_models_get_val_results(model_files, n_cores=5)

CPU times: user 826 ms, sys: 327 ms, total: 1.15 s
Wall time: 11min 52s


In [10]:
results_df = pd.concat([x for x in results], axis=1)

In [15]:
voted_results = {"id": [], "category": []}

for index, row in results_df.iloc[:,1:].iterrows():
    voted_results["id"].append(index)
    voted_results["category"].append(row.value_counts().index[0])

voted_results_df = pd.DataFrame.from_dict(voted_results)
voted_results_df

Unnamed: 0,id,category
0,0,TENNIS_BAGS
1,1,ORTHOTICS
2,2,PUPPETS
3,3,AUDIO_INTERFACES
4,4,EPILATORS
...,...,...
999995,999995,COMPUTER_PROCESSORS
999996,999996,LIP_BALMS
999997,999997,CRIBS
999998,999998,AUTOMOTIVE_SPRING_SUSPENSIONS


In [13]:
val_data = pd.read_csv('../data/val_fasttext_norm.csv',header=None,names=['title'])

In [80]:
val_data['category'] = val_data['title'].apply(lambda x: x.split()[0][9:])

In [82]:
results_df["category"] = val_data['category']

In [102]:
test_data = pd.read_csv('../data/test_fasttext_norm.csv',header=None,names=['title'])
test_data['title'] = test_data['title'].apply(lambda x: ' '.join(x.split()))
language_mapping = pd.read_csv('../data/test_language_mapping.csv',names=["language"])
test_language = pd.concat([test_data,language_mapping['language']],axis=1)
test_language.head(11)

Unnamed: 0,title,language
0,kit maternidade bolsa mala baby bebe vinho menina,portuguese
1,trocador de fraldas fisher price feminino rosa...,portuguese
2,motor ventoinha fiat idea palio,portuguese
3,amortecedor mola batente dir new civic,portuguese
4,cadeirinha de carro bebe princesa princess kgs,portuguese
5,cabo freio mao tras direito vw up cod,portuguese
6,mini pc dell optiplex atom gb ram ssd gb wifi,portuguese
7,kit bi xenon lampada,portuguese
8,protetor pe botinha kickboxing karate taekwond...,portuguese
9,disco rigido externo western digital elements tb,spanish


In [109]:
def run_model_test(model):
    test_data = pd.read_csv('../data/test_fasttext_norm.csv',header=None,names=['title'])
    test_data['title'] = test_data['title'].apply(lambda x: ' '.join(x.split()))
    test_data.head(10).to_csv('rv.csv')
    print("Loading model file ", model, '...')
    if isinstance(model, dict):
        # Here we predict combining models for each language
        predictions = []
        language_mapping = pd.read_csv('../data/test_language_mapping.csv',names=["language"])
        test_language = pd.concat([test_data,language_mapping['language']],axis=1)
        model_sp = fasttext.load_model(model["spanish"])
        model_pt = fasttext.load_model(model["portuguese"])
        print("Running predict on test set...")
        for index, row in test_language.iterrows():
            if row["language"] == 'spanish':
                category = model_sp.predict(row["title"])[0][0]
            if row["language"] == 'portuguese':
                category = model_pt.predict(row["title"])[0][0]
            predictions.append(category[9:])
        print("Predict finished for model ", model)
        return pd.Series(predictions)

    else:            
        model = fasttext.load_model(model)
        print("Running predict on test set...")
        predictions = model.predict(test_data["title"].values.tolist())
        print("Predict finished for model ", model)
        return pd.Series([x[0][9:] for x in predictions[0]])

            
def parallel_models_get_test_results(model_files, n_cores=8):
    results = Parallel(n_jobs=n_cores)(delayed(run_model_test)(model) for model in model_files)
    return results

In [110]:
model_files = [{"spanish": "../models/model_spanish_norm200.bin","portuguese":"../models/model_portuguese_norm200.bin"},
               "../models/model_norm1.bin","../models/model_norm2.bin","../models/model_norm3.bin","../models/model_reliable_norm1.bin"]

%time results = parallel_models_get_test_results(model_files, n_cores=8)

CPU times: user 338 ms, sys: 884 ms, total: 1.22 s
Wall time: 3min 3s


In [114]:
def calculate_results(results):
    voted_results = {"id": [], "category": []}
    results_df = pd.concat([x for x in results], axis=1)
    for index, row in results_df.iloc[:,1:].iterrows():
        voted_results["id"].append(index)
        voted_results["category"].append(row.value_counts().index[0])

    voted_results_df = pd.DataFrame.from_dict(voted_results)
    return voted_results_df
    print("Finished")

In [115]:
%time voted_results_df = calculate_results(results)

CPU times: user 3min 16s, sys: 962 ms, total: 3min 17s
Wall time: 3min 15s


In [116]:
voted_results_df.to_csv("./submissions/submission_ensemble_5.csv",header=["id","category"],index=False)

## Testing

In [7]:
import projectconfig

In [8]:
projectconfig.model_files

{'model_full_2gram': '../models/model_full_2gram',
 'model_full_3gram': '../models/model_full_3gram',
 'model_reliable_2gram': '../models/model_reliable_2gram',
 'model_reliable_3gram': '../models/model_reliable_3gram',
 'model_spanish_2gram': '../models/model_spanish_2gram',
 'model_spanish_3gram': '../models/model_spanish_3gram',
 'model_portuguese_2gram': '../models/model_portuguese_2gram',
 'model_portuguese_3gram': '../models/model_portuguese_3gram'}

In [None]:
for key, value in projectconfig.models.items():
    model = fasttext.load_model(value)
    print("model: " key, )

In [13]:
model = fasttext.load_model('../models/model_full_1gram')




In [16]:
%time model.test('../data/val_full_norm.csv')

CPU times: user 8min 42s, sys: 602 ms, total: 8min 43s
Wall time: 8min 41s


(1000000, 0.825809, 0.825809)

In [18]:
def run_model_test(model):
    results = {"model": model, "accuracy": 0}
    ftmodel = fasttext.load_model(model)
    file = '../data/val_full_norm.csv'
    if 'reliable' in model:
        file = '../data/val_reliable_norm.csv'
    elif 'spanish' in model:
        file = '../data/val_spanish_norm.csv'
    elif 'portuguese' in model:
        file = '../data/val_portuguese_norm.csv'
    results["accuracy"] = ftmodel.test(file)[2]
    return results

In [19]:
def parallel_models_validate(model_files, n_cores=8):
    results = Parallel(n_jobs=n_cores)(delayed(run_model_test)(value) for key, value in model_files.items())
    return results

In [20]:
%time results = parallel_models_validate(projectconfig.model_files)

CPU times: user 106 ms, sys: 367 ms, total: 473 ms
Wall time: 21min


In [21]:
results

[{'model': '../models/model_full_2gram', 'accuracy': 0.8832546666666666},
 {'model': '../models/model_full_3gram', 'accuracy': 0.885148},
 {'model': '../models/model_reliable_2gram', 'accuracy': 0.8747523043204132},
 {'model': '../models/model_reliable_3gram', 'accuracy': 0.8757280751793918},
 {'model': '../models/model_spanish_2gram', 'accuracy': 0.8833813333333334},
 {'model': '../models/model_spanish_3gram', 'accuracy': 0.88548},
 {'model': '../models/model_portuguese_2gram', 'accuracy': 0.88886},
 {'model': '../models/model_portuguese_3gram', 'accuracy': 0.8911706666666667}]

In [47]:
model_files = {"model_reliable":projectconfig.base_model_dir + "model_reliable", 
               "model_spanish":projectconfig.base_model_dir + "model_spanish", 
               "model_portuguese":projectconfig.base_model_dir + "model_portuguese"}

%time results = parallel_models_validate(model_files)

CPU times: user 21.7 ms, sys: 208 µs, total: 21.9 ms
Wall time: 4min 18s


In [48]:
results

[{'model': '../models/model_reliable', 'accuracy': 0.8847709520793752},
 {'model': '../models/model_spanish', 'accuracy': 0.883604},
 {'model': '../models/model_portuguese', 'accuracy': 0.889236}]

In [50]:
test_df = pd.read_csv('../data/test.csv')

In [57]:
test_df["language"].to_csv(projectconfig.normalized_language_files["mapping"]["test"], index=False, header=False)

In [1]:
from utils import voting_ensemble_predict, parallel_test_predict

In [2]:
import projectconfig
projectconfig.models_for_predict

{'model_pretrained': '/backups/models/model_test_pretrained.bin',
 'model_full_100': '/home/franco_camporeale/models/model_full_100.bin',
 'model_reliable_2gram': '/backups/models/model_reliable_2gram',
 'model_reliable_3gram': '/backups/models/model_reliable_3gram',
 'model_bilingual_3gram': {'spanish': '/backups/models/model_spanish_3gram',
  'portuguese': '/backups/models/model_portuguese_3gram'},
 'model_bilingual_2gram_100': {'spanish': '/home/franco_camporeale/models/model_spanish_100.bin',
  'portuguese': '/home/franco_camporeale/models/model_portuguese100.bin'},
 'model_bilingual_2gram_300': {'spanish': '/home/franco_camporeale/models/model_spanish_300.bin',
  'portuguese': '/home/franco_camporeale/models/model_portuguese_300.bin'}}

In [48]:
%time base_classifiers_results, voted_results = voting_ensemble_predict()

Executing parallel  count_votes
CPU times: user 1.28 s, sys: 1.11 s, total: 2.39 s
Wall time: 7min 51s


In [59]:
base_classifiers_results["bilingual"].sample(100)

Unnamed: 0,bilingual,bilingual.1,bilingual.2
214016,NETBOOKS,NETBOOKS,NETBOOKS
210498,CELL_BATTERIES,CELL_BATTERIES,CELL_BATTERIES
169585,SWEATSHIRTS_AND_HOODIES,SWEATSHIRTS_AND_HOODIES,SWEATSHIRTS_AND_HOODIES
219414,STEAM_CLEANERS,STEAM_CLEANERS,STEAM_CLEANERS
237931,SOUVENIRS,CANDLE_HOLDERS,SOUVENIRS
...,...,...,...
20050,AUTOMOTIVE_CLUTCH_MASTER_CYLINDERS,AUTOMOTIVE_CLUTCH_MASTER_CYLINDERS,CLUTCH_SLAVE_CYLINDERS
128935,GAMEPADS_AND_JOYSTICKS,GAMEPADS_AND_JOYSTICKS,GAMEPADS_AND_JOYSTICKS
244753,STRAWS,STRAWS,STRAWS
222212,MOTORCYCLE_FAIRINGS,MOTORCYCLE_FAIRINGS,MOTORCYCLE_FAIRINGS


In [60]:
voted_results

Unnamed: 0,id,category
0,0,DIAPER_BAGS
1,1,BABY_CHANGING_PADS
2,2,ENGINE_COOLING_FAN_MOTORS
3,3,AUTOMOTIVE_SHOCK_ABSORBER_BUMP_STOPS
4,4,BABY_CAR_SEATS
...,...,...
30864,246950,VEHICLE_BRAKE_DISCS
30865,246951,WALKIE_TALKIES
30866,246952,CALCULATORS
30867,246953,DINING_SETS


In [61]:
voted_results[["id","category"]].to_csv("./submissions/submission_ensemble_8.csv",index=False)

In [4]:
voted_results

Unnamed: 0,id,category
0,0,DIAPER_BAGS
1,1,BABY_CHANGING_PADS
2,2,ENGINE_COOLING_FAN_MOTORS
3,3,AUTOMOTIVE_SHOCK_ABSORBER_BUMP_STOPS
4,4,BABY_CAR_SEATS
...,...,...
30864,246950,VEHICLE_BRAKE_DISCS
30865,246951,WALKIE_TALKIES
30866,246952,CALCULATORS
30867,246953,DINING_SETS


In [None]:
import pandas as pd
test_data = pd.read_csv(projectconfig.normalized_files["test"],header=None,names=['title'])
language_mapping = pd.read_csv(projectconfig.normalized_language_files["mapping"]["test"],names=["language"])



In [None]:
pd.concat([test_data,language_mapping['language']],axis=1).head(50)

In [46]:
test_df = pd.read_csv("../data/test.csv")
test_df["language"].to_csv(projectconfig.normalized_language_files["mapping"]["test"],index=False,header=False,line_terminator='\n')

In [2]:
model = fasttext.load_model("../models/model_full_100.bin")




In [7]:
predictions = model.predict("notebook toshiba", k=5)

## Training 7 models

In [14]:
train_data = pd.read_csv('../data/train_full_norm.csv',header=None,names=['title'])


In [105]:
files = []
count = 0
for i in range(7,15):
    files.append(train_data.sample(train_data,random_state=i))
    files[count].to_csv('../data/sets/train_set_' + str(i) + '.csv',index=False,header=False,line_terminator='\n' )
    count+=1

4000000

In [25]:
## Predicting with multiple models
models = []
for i in range(5):
    models.append(fasttext.load_model("../models/ensemble/ensemble1_" + str(i) + ".bin"))








In [30]:
predictions = []
for i in range(5):
    
    predictions.append(models[i].predict("notebook netbook toshiba", k=3))

In [32]:
models[1].predict("notebook netbook toshiba", k=3)

(('__label__NETBOOKS',
  '__label__EXTERNAL_LAPTOP_COOLERS',
  '__label__LAPTOP_CHARGERS'),
 array([9.96358752e-01, 1.37456728e-03, 8.17679393e-04]))

In [71]:
p = models[1].predict("notebook toshiba", k=1588)
p2 = models[1].predict("notebook hp", k=1588)

In [72]:
p

(('__label__NOTEBOOKS',
  '__label__NETBOOKS',
  '__label__LAPTOP_HOUSINGS',
  '__label__DESKTOP_COMPUTERS',
  '__label__LAPTOP_LCD_SCREENS'),
 array([9.50225174e-01, 3.46942171e-02, 6.60157669e-03, 5.57851186e-03,
        7.98502122e-04]))

In [73]:
p2

(('__label__NOTEBOOKS',
  '__label__NETBOOKS',
  '__label__DESKTOP_COMPUTERS',
  '__label__LAPTOP_HOUSINGS',
  '__label__LAPTOP_STANDS'),
 array([9.27800536e-01, 6.44499511e-02, 3.68276075e-03, 2.92944722e-03,
        4.22653597e-04]))

In [82]:
res = [p, p2]
redict = defaultdict(float)
for r in res: 
    for i1, i2,  in zip(r[0],r[1]):
        print(i1[9:], i2)
    

NOTEBOOKS 0.9502251744270325
NETBOOKS 0.03469421714544296
LAPTOP_HOUSINGS 0.006601576693356037
DESKTOP_COMPUTERS 0.005578511860221624
LAPTOP_LCD_SCREENS 0.0007985021220520139
NOTEBOOKS 0.9278005361557007
NETBOOKS 0.06444995105266571
DESKTOP_COMPUTERS 0.0036827607546001673
LAPTOP_HOUSINGS 0.002929447218775749
LAPTOP_STANDS 0.0004226535966154188


In [6]:
import utils
models_for_predict = {
               "model0":"/home/franco_camporeale/models/ensemble/ensemble1_0.bin",
               "model1":"/home/franco_camporeale/models/ensemble/ensemble1_1.bin",
               "model2":"/home/franco_camporeale/models/ensemble/ensemble1_2.bin",
               "model3":"/home/franco_camporeale/models/ensemble/ensemble1_3.bin",
               "model4":"/home/franco_camporeale/models/ensemble/ensemble1_4.bin",
               "model5":"/home/franco_camporeale/models/ensemble/ensemble1_5.bin",
               "model6":"/home/franco_camporeale/models/ensemble/ensemble1_6.bin",
               "model7":"/home/franco_camporeale/models/ensemble/ensemble1_0.bin",
               "model8":"/home/franco_camporeale/models/ensemble/ensemble1_1.bin",
               "model9":"/home/franco_camporeale/models/ensemble/ensemble1_2.bin",
               "model10":"/home/franco_camporeale/models/ensemble/ensemble1_3.bin",
               "model11":"/home/franco_camporeale/models/ensemble/ensemble1_4.bin",
               "model12":"/home/franco_camporeale/models/ensemble/ensemble1_5.bin",
               "model13":"/home/franco_camporeale/models/ensemble/ensemble1_6.bin",
               "model14":"/home/franco_camporeale/models/ensemble/ensemble1_6.bin"
              }

def run_model_on_val(model_file):
    model = fasttext.load_model(model_file)
    print("Running predict on val set...")
    val_data = pd.read_csv('../data/val_full_norm.csv',header=None,names=['title'])
    val_data['title'] = val_data['title'].apply(lambda x: ' '.join(x.split()[1:]))
    predictions = model.predict(val_data["title"].values.tolist())
    print("Predict finished for model ", model_file)
    return pd.Series([x[0][9:] for x in predictions[0]], name=model_file[10:])

def count_votes(results_df):
    voted_results = {"id": [], "category": []}
    for index, row in results_df.iloc[:,1:].iterrows():
        voted_results["id"].append(index)
        voted_results["category"].append(row.value_counts().index[0])

    voted_results_df = pd.DataFrame.from_dict(voted_results)
    return voted_results_df

def parallel_val_predict():
    results = Parallel(n_jobs=8)(delayed(run_model_on_val)(model) for name, model in models_for_predict.items())
    return results

def voting_ensemble_val_predict():
    results = parallel_val_predict()
    base_classifiers_results = pd.concat([x for x in results], axis=1)
    voted_results = utils.parallelize_dataframe(base_classifiers_results, count_votes)

    return base_classifiers_results, voted_results[["id","category"]]


In [None]:
%time base_classifiers_results, voted_results = voting_ensemble_val_predict()

In [None]:
val_data = pd.read_csv('../data/val_full_norm.csv',header=None,names=['title'])
val_data['label'] = val_data['title'].apply(lambda x: x.split()[0][9:])

In [14]:
acc_score = balanced_accuracy_score(val_data["label"], voted_results["category"])

In [15]:
acc_score

0.8645562647388264

In [12]:
acc_score

12

In [None]:
!echo $acc_score > result.txt

In [106]:
count=0
for i in range(7,15):
    df = resample(train_data, replace=True, random_state=i)
    df.to_csv('../data/sets/train_set_' + str(i) + '.csv',index=False,header=False,line_terminator='\n' )
    count+=1

0                         CAR_SCANNERS
1                       NOTEBOOK_CASES
2                            KEY_RACKS
3                   AUTOMOTIVE_EMBLEMS
4             WIRELESS_FM_TRANSMITTERS
                      ...             
187495        AUTOMOBILE_FENDER_LINERS
187496    SOUND_CONSOLES_AND_DJ_MIXERS
187497                     TOOTHPASTES
187498                      EYESHADOWS
187499                   PILATES_BALLS
Name: category, Length: 1500000, dtype: object

In [36]:
complete_df = pd.concat([pd.Series(val_data["label"].values),pd.Series(voted_results["category"].values),base_classifiers_results], axis=1)
error_df = complete_df[complete_df[0]!= complete_df[1]]

In [39]:
error_df[0].value_counts()

SOUVENIRS                              862
BOOKS                                  720
DECORATIVE_BOXES                       672
ACTION_FIGURES                         619
AUDIO_AND_VIDEO_CABLES_AND_ADAPTERS    590
                                      ... 
HOSE_CLAMP_PLIERS                        2
CONSTRUCTION_LIME_BAGS                   2
STADIOMETERS                             2
FORCE_GAUGES                             2
PAINTBALL_SMOKE_GRENADES                 1
Name: 0, Length: 1586, dtype: int64

In [54]:
error_df[error_df[0]=='SOUVENIRS']

Unnamed: 0,0,1,co_camporeale/models/ensemble/ensemble1_0.bin,co_camporeale/models/ensemble/ensemble1_1.bin,co_camporeale/models/ensemble/ensemble1_2.bin,co_camporeale/models/ensemble/ensemble1_3.bin,co_camporeale/models/ensemble/ensemble1_4.bin,co_camporeale/models/ensemble/ensemble1_5.bin,co_camporeale/models/ensemble/ensemble1_6.bin,co_camporeale/models/ensemble/ensemble1_0.bin.1,co_camporeale/models/ensemble/ensemble1_1.bin.1,co_camporeale/models/ensemble/ensemble1_2.bin.1,co_camporeale/models/ensemble/ensemble1_3.bin.1,co_camporeale/models/ensemble/ensemble1_4.bin.1,co_camporeale/models/ensemble/ensemble1_5.bin.1,co_camporeale/models/ensemble/ensemble1_6.bin.1,co_camporeale/models/ensemble/ensemble1_6.bin.2
621,SOUVENIRS,NECKLACES,NECKLACES,NECKLACES,NECKLACES,NECKLACES,NECKLACES,NECKLACES,NECKLACES,NECKLACES,NECKLACES,NECKLACES,NECKLACES,NECKLACES,NECKLACES,NECKLACES,NECKLACES
5361,SOUVENIRS,TOILETRY_BAGS,SOUVENIRS,PICTURE_FRAMES,TOILETRY_BAGS,SOUVENIRS,TOILETRY_BAGS,HANDICRAFT_BOXES,DOORS,SOUVENIRS,PICTURE_FRAMES,TOILETRY_BAGS,SOUVENIRS,TOILETRY_BAGS,HANDICRAFT_BOXES,DOORS,DOORS
6051,SOUVENIRS,STUFFED_TOYS,STUFFED_TOYS,STUFFED_TOYS,STUFFED_TOYS,STUFFED_TOYS,STUFFED_TOYS,KEYCHAINS,STUFFED_TOYS,STUFFED_TOYS,STUFFED_TOYS,STUFFED_TOYS,STUFFED_TOYS,STUFFED_TOYS,KEYCHAINS,STUFFED_TOYS,STUFFED_TOYS
6508,SOUVENIRS,DECORATIVE_BOXES,DECORATIVE_BOXES,DECORATIVE_BOXES,DECORATIVE_BOXES,DECORATIVE_BOXES,DECORATIVE_BOXES,DECORATIVE_BOXES,DECORATIVE_BOXES,DECORATIVE_BOXES,DECORATIVE_BOXES,DECORATIVE_BOXES,DECORATIVE_BOXES,DECORATIVE_BOXES,DECORATIVE_BOXES,DECORATIVE_BOXES,DECORATIVE_BOXES
7303,SOUVENIRS,JEWELRY_BOXES,JEWELRY_BOXES,JEWELRY_BOXES,JEWELRY_BOXES,JEWELRY_BOXES,JEWELRY_BOXES,JEWELRY_BOXES,JEWELRY_BOXES,JEWELRY_BOXES,JEWELRY_BOXES,JEWELRY_BOXES,JEWELRY_BOXES,JEWELRY_BOXES,JEWELRY_BOXES,JEWELRY_BOXES,JEWELRY_BOXES
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1490224,SOUVENIRS,CANDIES,CANDIES,CANDIES,CANDIES,SOUVENIRS,CANDIES,CANDIES,CANDIES,CANDIES,CANDIES,CANDIES,SOUVENIRS,CANDIES,CANDIES,CANDIES,CANDIES
1497165,SOUVENIRS,KEYCHAINS,KEYCHAINS,KEYCHAINS,KEYCHAINS,KEYCHAINS,KEYCHAINS,KEYCHAINS,KEYCHAINS,KEYCHAINS,KEYCHAINS,KEYCHAINS,KEYCHAINS,KEYCHAINS,KEYCHAINS,KEYCHAINS,KEYCHAINS
1497602,SOUVENIRS,HANDICRAFT_BOXES,SOUVENIRS,SOUVENIRS,HANDICRAFT_BOXES,HANDICRAFT_BOXES,HANDICRAFT_BOXES,HANDICRAFT_BOXES,HANDICRAFT_BOXES,SOUVENIRS,SOUVENIRS,HANDICRAFT_BOXES,HANDICRAFT_BOXES,HANDICRAFT_BOXES,HANDICRAFT_BOXES,HANDICRAFT_BOXES,HANDICRAFT_BOXES
1497955,SOUVENIRS,WALL_CLOCKS,WALL_CLOCKS,WALL_CLOCKS,TABLE_CLOCKS,WALL_CLOCKS,WALL_CLOCKS,WALL_CLOCKS,WALL_CLOCKS,WALL_CLOCKS,WALL_CLOCKS,TABLE_CLOCKS,WALL_CLOCKS,WALL_CLOCKS,WALL_CLOCKS,WALL_CLOCKS,WALL_CLOCKS


In [None]:
count = 0  
check_results = {"id": [], "category": []}
for index, row in error_df.iloc[:,2:].iterrows():
    votes = row.value_counts()[0]
    if votes > 10:
        count+=1
        check_results["category"].append(row.value_counts().index[1])
        check_results["id"].append(index)
check_results = pd.DataFrame.from_dict(check_results)


In [53]:
val_data.iloc[8543,0]

'__label__BOOKS arroces  thermomix  equipo  susaeta'

In [55]:
error_df.to_csv("errors_val.csv")

In [108]:
check_results = pd.DataFrame.from_dict(check_results)

In [109]:
check_results.to_csv("results_validation_7_classifiers.csv")

In [57]:
error_df.sample(50)

Unnamed: 0,0,1,co_camporeale/models/ensemble/ensemble1_0.bin,co_camporeale/models/ensemble/ensemble1_1.bin,co_camporeale/models/ensemble/ensemble1_2.bin,co_camporeale/models/ensemble/ensemble1_3.bin,co_camporeale/models/ensemble/ensemble1_4.bin,co_camporeale/models/ensemble/ensemble1_5.bin,co_camporeale/models/ensemble/ensemble1_6.bin,co_camporeale/models/ensemble/ensemble1_0.bin.1,co_camporeale/models/ensemble/ensemble1_1.bin.1,co_camporeale/models/ensemble/ensemble1_2.bin.1,co_camporeale/models/ensemble/ensemble1_3.bin.1,co_camporeale/models/ensemble/ensemble1_4.bin.1,co_camporeale/models/ensemble/ensemble1_5.bin.1,co_camporeale/models/ensemble/ensemble1_6.bin.1,co_camporeale/models/ensemble/ensemble1_6.bin.2
321610,NETWORK_SWITCHES,GATE_MOTORS,DOOR_AND_GATE_REMOTE_CONTROLS,CAR_SCANNERS,ELECTRONIC_ENTRANCE_INTERCOMS,TV_ANTENNAS,GRILL_GRATES,HYGROMETERS,GATE_MOTORS,DOOR_AND_GATE_REMOTE_CONTROLS,CAR_SCANNERS,ELECTRONIC_ENTRANCE_INTERCOMS,TV_ANTENNAS,GRILL_GRATES,HYGROMETERS,GATE_MOTORS,GATE_MOTORS
993463,JACKETS_AND_COATS,SUITS,SUITS,SUITS,SUITS,SUITS,SUITS,SUITS,SUITS,SUITS,SUITS,SUITS,SUITS,SUITS,SUITS,SUITS,SUITS
590082,FLASHLIGHTS,ELECTRONIC_ENTRANCE_INTERCOMS,ELECTRONIC_ENTRANCE_INTERCOMS,ELECTRONIC_ENTRANCE_INTERCOMS,ELECTRONIC_ENTRANCE_INTERCOMS,ELECTRONIC_ENTRANCE_INTERCOMS,ELECTRONIC_ENTRANCE_INTERCOMS,ELECTRONIC_ENTRANCE_INTERCOMS,MIXERS,ELECTRONIC_ENTRANCE_INTERCOMS,ELECTRONIC_ENTRANCE_INTERCOMS,ELECTRONIC_ENTRANCE_INTERCOMS,ELECTRONIC_ENTRANCE_INTERCOMS,ELECTRONIC_ENTRANCE_INTERCOMS,ELECTRONIC_ENTRANCE_INTERCOMS,MIXERS,MIXERS
1020181,SARONGS,HANDBAGS,HANDBAGS,SARONGS,SARONGS,HANDBAGS,HANDBAGS,SARONGS,HANDBAGS,HANDBAGS,SARONGS,SARONGS,HANDBAGS,HANDBAGS,SARONGS,HANDBAGS,HANDBAGS
1295192,XENON_KITS,CAR_LIGHT_BULBS,CAR_LIGHT_BULBS,CAR_LIGHT_BULBS,CAR_LIGHT_BULBS,CAR_LIGHT_BULBS,XENON_KITS,XENON_KITS,CAR_LIGHT_BULBS,CAR_LIGHT_BULBS,CAR_LIGHT_BULBS,CAR_LIGHT_BULBS,CAR_LIGHT_BULBS,XENON_KITS,XENON_KITS,CAR_LIGHT_BULBS,CAR_LIGHT_BULBS
63343,KITCHEN_RANGE_HOODS,BRAKE_DRUMS,BRAKE_DRUMS,BRAKE_DRUMS,BRAKE_DRUMS,BRAKE_DRUMS,BRAKE_DRUMS,BRAKE_DRUMS,BRAKE_DRUMS,BRAKE_DRUMS,BRAKE_DRUMS,BRAKE_DRUMS,BRAKE_DRUMS,BRAKE_DRUMS,BRAKE_DRUMS,BRAKE_DRUMS,BRAKE_DRUMS
453021,SCHOOL_AND_OFFICE_PAPERS,SCHOOL_AND_OFFICE_ENVELOPES,SCHOOL_AND_OFFICE_ENVELOPES,SCHOOL_AND_OFFICE_ENVELOPES,SCHOOL_AND_OFFICE_ENVELOPES,CARDS_AND_INVITATIONS,SCHOOL_AND_OFFICE_PAPERS,SCHOOL_AND_OFFICE_PAPERS,SCHOOL_AND_OFFICE_ENVELOPES,SCHOOL_AND_OFFICE_ENVELOPES,SCHOOL_AND_OFFICE_ENVELOPES,SCHOOL_AND_OFFICE_ENVELOPES,CARDS_AND_INVITATIONS,SCHOOL_AND_OFFICE_PAPERS,SCHOOL_AND_OFFICE_PAPERS,SCHOOL_AND_OFFICE_ENVELOPES,SCHOOL_AND_OFFICE_ENVELOPES
1396690,EROTIC_CREAMS,DILDOS,WHISKEYS,DILDOS,DILDOS,DILDOS,DILDOS,EROTIC_MASSAGE_OILS,EROTIC_MASSAGE_OILS,WHISKEYS,DILDOS,DILDOS,DILDOS,DILDOS,EROTIC_MASSAGE_OILS,EROTIC_MASSAGE_OILS,EROTIC_MASSAGE_OILS
306696,CATS_AND_DOGS_FOODS,CATS_AND_DOGS_TREATS,CATS_AND_DOGS_TREATS,CATS_AND_DOGS_TREATS,CATS_AND_DOGS_TREATS,CATS_AND_DOGS_TREATS,CATS_AND_DOGS_TREATS,CATS_AND_DOGS_TREATS,CATS_AND_DOGS_TREATS,CATS_AND_DOGS_TREATS,CATS_AND_DOGS_TREATS,CATS_AND_DOGS_TREATS,CATS_AND_DOGS_TREATS,CATS_AND_DOGS_TREATS,CATS_AND_DOGS_TREATS,CATS_AND_DOGS_TREATS,CATS_AND_DOGS_TREATS
145225,COMICS,MANGA,MANGA,MANGA,COMICS,STATUES,MANGA,MANGA,MANGA,MANGA,MANGA,COMICS,STATUES,MANGA,MANGA,MANGA,MANGA
