In [1]:
import pandas as pd
import numpy as np
import pickle
from keras.models import load_model

def save_obj(obj, name):
    with open(name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def load_obj(name):
    with open(name + '.pkl', 'rb') as f:
        return pickle.load(f)

Using TensorFlow backend.


In [2]:
columns = ['p_id','brand','mpn','p_full_name','description',
           'brand_category','created_at','updated_at',
           'deleted_at','brand_canonical_url','details',
           'labels','bc_p_id']
df = pd.read_csv('full_data.csv',names=columns)

In [3]:
df = df.iloc[:,np.r_[0:2,3:6,9:11]]
df.drop_duplicates(inplace=True)
df.set_index(['p_id'], inplace=True)

## I. Style - Nanchun (Aslan) Shi

In [4]:
df1 = df.copy()

### 1.1 Embedding

In [5]:
emb_df = df1.loc[:,['description','details']]

In [6]:
from Preprocessing import embedding_preprocessing
emb_pre = embedding_preprocessing()

In [7]:
emb_vector_df = pd.DataFrame(emb_pre.preprocess(emb_df), index = emb_df.index)

In [8]:
emb_model = load_model('style_embedding_model.h5')

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


In [9]:
emb_pred_vectors = emb_model.predict(emb_vector_df)

### 1.2 TF-IDF

In [10]:
tfidf_df = df1.loc[:,['brand','p_full_name','brand_category','brand_canonical_url']]

In [11]:
from Preprocessing import tfidf_preprocessing
tfidf_pre = tfidf_preprocessing()

In [12]:
tfidf_vector_df = tfidf_pre.preprocess(tfidf_df).set_index(tfidf_df.index)

In [13]:
tfidf_model = load_model('style_tfidf_model.h5')

In [14]:
tfidf_pred_vectors = tfidf_model.predict(tfidf_vector_df)

### 1.3 Prediction

In [15]:
def get_pred_classes(mat):
    pred = list(map(lambda v: list(np.argsort(v))[-2:], mat))
    return np.array(pred)

label_dict = load_obj('style_label_dict_rev')

In [16]:
final_vectors = 0.4*emb_pred_vectors + 0.6*tfidf_pred_vectors

In [17]:
final_pred_classes = get_pred_classes(final_vectors)

In [26]:
df1['style_prediction'] = list(map(lambda x: [label_dict[x[0]], label_dict[x[1]]], final_pred_classes))

In [27]:
df1.head(3)

Unnamed: 0_level_0,brand,p_full_name,description,brand_category,brand_canonical_url,details,style_prediction
p_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
01DSE9TC2DQXDG6GWKW9NMJ416,Banana Republic,Ankle-Strap Pump,"A modern pump, in a rounded silhouette with an...",Unknown,https://bananarepublic.gap.com/browse/product....,"A modern pump, in a rounded silhouette with an...","[modern, businesscasual]"
01DSE9SKM19XNA6SJP36JZC065,Banana Republic,Petite Tie-Neck Top,Dress it down with jeans and sneakers or dress...,Unknown,https://bananarepublic.gap.com/browse/product....,Dress it down with jeans and sneakers or dress...,"[businesscasual, classic]"
01DSJX8GD4DSAP76SPR85HRCMN,Loewe,52MM Padded Leather Round Sunglasses,Padded leather covers classic round sunglasses.,JewelryAccessories/SunglassesReaders/RoundOval...,https://www.saksfifthavenue.com/loewe-52mm-pad...,100% UV protection\nCase and cleaning cloth in...,"[casual, classic]"
