In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from gensim.models import Word2Vec

In [2]:
products = pd.read_csv('products.csv')
products.sample(5)

Unnamed: 0,product_id,product_name,aisle_id,department_id
6306,6307,Citrus Ginger Kraut Pickles,110,13
15274,15275,Milk Chocolate Brownies,105,13
23874,23875,Dark Chocolate Truffles,45,19
25066,25067,Lactose Free Cheesy Bliss American Style Singles,21,16
33023,33024,Migrane Acetaminophen Aspirin (NSAID) and Caff...,133,11


In [97]:
model = Word2Vec.load("cbow_w2v_model")

In [98]:
prod_ids = products['product_id'].tolist()
prod_names = products['product_name'].tolist()

In [99]:
prod_dict = {}
embed_mat = []
for i,(prod_id,prod_name) in enumerate(zip(prod_ids,prod_names)):
    try:
        embed_mat.append(model.wv[str(prod_id)])
        prod_dict[i] = {"prod_id":prod_id,"prod_name":prod_name}
    except:
        pass

In [100]:
embed_mat = np.array(embed_mat)
embed_mat.shape

(49677, 100)

In [101]:
for p in range(embed_mat.shape[1]):
    print(str(p))
    top = embed_mat[:,p].argsort()[-5:][::-1]
    for i in top:
        print(prod_dict[i]["prod_name"])

0
24 Hour Allergy Relief Liquid
Organic Milk
Organic Iced Coffee & Milk
Garden Salsa Multigrain Chips
Xtra Protection Extra Long Dailies Pantiliners
1
Frozen Palak Paneer Meal with Naan Bread
Shrimp Scampi
Ten Hills Pale Ale
Deluxe French Dressing
Bissell SteamBoost Steam Mop Starter
2
Bread Crumbs Plain
Evening Primrose Oil 500 Mg
Precision Clean Replacement Electric Toothbrush Head
Organic Bananas Stage 2 Baby Food
Made With Organic Beans & Rice Burrito Non-Dairy
3
Mayo Sriracha
Tart Cherry Melon Kombucha
Extra Hot Peri-Peri Sauce
Jumbo Ice Cream Cup Cones
Pomegranate Blueberry Pistachio Plus Antioxidants Fruit & Nut Bar
4
Wrigley's 5 React2 Unique Mint Sugar-Free Gum
Deil Style Kosher Dill Relish
Scar Treatment
Milk Whole
Sculpin Indian Pale Ale
5
Anytime Favorites Hickory Smoked Boneless Ham Steak
Brooklyn Pizza Dough
Dark Chocolate Chunk Chewy Granola Bars
Picnic Style Pork & Beans
Birthday Cake Ice Cream
6
Organic Superfood Coconut Flour Gluten-Free
Beef & Cheese Burritos
Trainin

KeyError: 36232

## Varimax rotation
https://github.com/rossfadely/consomme/blob/master/consomme/rotate_factor.py

In [95]:
def ortho_rotation(lam, method='varimax',gamma=None,
                   eps=1e-6, itermax=1000):
    """
    Return orthogal rotation matrix
    TODO: - other types beyond 
    """
    if gamma == None:
        if (method == 'varimax'):
            gamma = 1.0
        if (method == 'quartimax'):
            gamma = 0.0

    nrow, ncol = lam.shape
    R = np.eye(ncol)
    var = 0

    for i in range(itermax):
        lam_rot = np.dot(lam, R)
        tmp = np.diag(np.sum(lam_rot ** 2, axis=0)) / nrow * gamma
        u, s, v = np.linalg.svd(np.dot(lam.T, lam_rot ** 3 - np.dot(lam_rot, tmp)))
        R = np.dot(u, v)
        var_new = np.sum(s)
        if var_new < var * (1 + eps):
            print(i)
            break
        var = var_new

    return R

In [102]:
rotation_mat = ortho_rotation(embed_mat,gamma=1)
rotation_mat.shape

147


(100, 100)

In [103]:
rotated_embed = np.dot(embed_mat, rotation_mat)
rotated_embed.shape

(49677, 100)

In [104]:
for p in range(rotated_embed.shape[1]):
    print(str(p))
    top = rotated_embed[:,p].argsort()[-5:][::-1]
    for i in top:
        try:
            print(prod_dict[i]["prod_name"])
        except:
            pass

0
24 Hour Allergy Relief Liquid
Triple Berry Blend
Charcoal Lighter Fluid, Odorless
Crispy Mango Fruit
Apollo Fresh Deodorant Stick
1
24 Hour Volume Conditioner
Diet Coke Caffeine Free Soda
Light Creamy Swiss Flavor Spreadable Cheese Wedges
Original Instant Oatmeal Hot Cereal
Extra Strength Pain Relieving Balm
2
Grilled Chicken Caesar
Chile Picante Chips
Talenti Banana Chocolate Swirl Gelato
Prune Juice with Pulp
Slim Cut Reduced Fat 2% Milk Sharp Cheddar Cheese
3
Organic Gluten Free Rigatoni
Skinny Gut Ultimate Chocolate Shake Single
Original Gluten-Free Pizza Crust
Gluten Free Pizza Shell
Mushroom Lo Mein Noodles
4
Mango Sparkling Water
Light White Grape Juice Drink
Honey Maid Teddy Grahams
Original Dairy Free Butter
Strawberry & Blackcurrant Preserves
5
Birthday Cake Ice Cream
Chicken Enchiladas Poblano
Organic Red Grapefruit
Meritage
Ale, India Pale, Brew Free! Or Die IPA
6
Ginger Soda
Sushi Nori Toasted Sea Vegetable
Itch & Flake Therapy Medicated Anti-Dandruff Shampoo
Classic Dec

Organic Strawberry Smash Standup Lollipops
Original Baked Whole Grain Wheat Crackers
Cranberry Orange Scones
Meatloaf With Home Style Mashed Potatoes & Gravy
Original Pineapple Low Fat Yogurt
75
Original Stain Remover and Color Booster
Butter Hamburger Buns
Black Tea Organic Darjeeling
Pear Juice from Concentrate
Bunny Ears Crisp Wafers in Milk Chocolate
76
Mueslix Cereal
Golden Roasted Turkey Breast Complete Meals
Acai Berry Chia Bars
98% Fat Free Premium Ham With Natural Juices
Premium Dog Food with Lamb and Rice Classic Pate Dog Food
77
The Red ONe: Squished Fruit Smoothies
Organic Gotta Have Vanilla Nonfat Frozen Yogurt
Carafe with Lid
Butterscotch Pudding
Glide Pro-Health Original Floss
78
Kale Blazer
100% Grated Parmesan Cheese
Old Fashioned Hard Sourdough Pretzels
Original Jerky
Broccoli & Apple Stage 2 Baby Food
79
Organic Uncooked Flour Tortillas
Original Wheat Crackers
Frozen Palak Paneer Meal with Naan Bread
Honey Chipotle Salmon
Very Emollient After Sun 85% Aloe Vera Lotion

In [107]:
r=np.random.choice(range(len(prod_dict.keys())))
print(prod_dict[r]['prod_name'])
rotated_embed[r]

Plus Lotion Facial Tissues


array([ 7.67279726e-02,  4.68477095e-01,  5.16196215e-01,  6.60855564e-02,
        2.62978329e-01, -7.67285678e-03,  2.94672538e-01, -1.07841270e-01,
        6.20899380e-02,  3.02534293e-01, -2.98863961e-01, -5.37028542e-01,
        2.40554659e-01,  3.91399685e-01,  1.31876860e-01, -1.27174321e-01,
        3.96630136e-01, -4.86627894e-01,  2.45186979e-01, -5.13192626e-01,
       -3.60690344e-01,  1.30094462e-01, -2.47615883e-01,  6.82731374e-02,
       -5.46554214e-01,  3.70925776e-01,  4.76474333e-02, -4.10905367e-01,
       -3.47336353e-01, -6.11951127e-01,  3.31779065e-01, -1.15779133e-01,
        4.05735963e-02,  5.75469781e-01, -5.88945790e-01,  1.89075566e-01,
       -5.05533401e-01, -3.86033811e-02,  3.19922347e-01, -2.90797278e-01,
       -8.61420551e-03,  5.20162960e-01, -5.94750896e-01, -6.51297209e-01,
       -9.45673776e-02, -9.58497641e-01,  6.94649990e-01,  1.71879937e-01,
        2.25958564e-01,  1.61674487e-01, -3.95390662e-01, -3.61239603e-01,
        2.13313443e-01, -

array([-0.07035232, -0.21837053, -0.34172217, -0.0405828 ,  0.00232881,
       -0.06529592,  0.06059534,  0.02546344, -0.07151947,  0.04803478,
       -0.35397362,  0.12250824,  0.24726617,  0.22340814, -0.01062366,
        0.09970128,  0.35410678,  0.02798459, -0.1841646 , -0.1240561 ,
       -0.02011873, -0.0575058 ,  0.12053267,  0.09192165,  0.00484189,
       -0.19085826, -0.46984493,  0.04102642,  0.14786274,  0.30073782,
       -0.50342711, -0.08282258,  0.07982296, -0.26349217, -0.04401735,
       -0.06092434,  0.57168097, -0.15440025, -0.08347725,  0.17518904,
        0.20909999, -0.17951635, -0.10007868, -0.04726606,  0.0777791 ,
        0.14855377,  0.28551412,  0.16643183,  1.64638635, -0.12072283,
        0.12069569,  0.20186426,  0.16374778,  0.27453834, -1.19681391,
       -0.17193226, -0.13147294, -0.03977116, -0.26634066, -0.11338936,
        0.13718835, -0.0731562 ,  0.35920317, -0.0265957 , -0.13439435,
       -0.69684121,  0.0644937 ,  0.19162678,  0.02184573, -0.16