# Create function to analyze customers' shopping preference & advise on targeted products

In [197]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import lifetimes
from typing import Tuple
import sklearn

from datetime import datetime
from dateutil.relativedelta import relativedelta
from sentence_transformers import SentenceTransformer
from sklearn.metrics import DistanceMetric

import os
from google.cloud import bigquery
client = bigquery.Client()

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


### Import Data

In [161]:
# Import order data
gcr_project_id = os.getenv('GCR_CLV_PROJECT_ID')

QUERY  = f"""
SELECT
  order_items.user_id,
  users.first_name,
  users.last_name,
  order_items.order_id,
  order_items.product_id,
  products.name as product_name,
  products.brand as product_brand,
  order_items.sale_price,
  order_items.status
FROM `ecommerce-data-project-444616.thelook_ecommerce.order_items` as order_items
LEFT JOIN `ecommerce-data-project-444616.thelook_ecommerce.users` as users
ON order_items.user_id = users.id
LEFT JOIN `ecommerce-data-project-444616.thelook_ecommerce.products` as products
ON order_items.product_id = products.id
ORDER BY order_items.user_id;
"""

df_orders = client.query_and_wait(QUERY).to_dataframe()
df_orders.head()



Unnamed: 0,user_id,first_name,last_name,order_id,product_id,product_name,product_brand,sale_price,status
0,1,Maria,Stevens,1,8053,GREY BLOUSE GAUCHO PALAZZO SET ASYM - FITS - 3...,LOTUSTRADERS,62.990002,Processing
1,2,Brad,Caldwell,2,26233,HUGO BOSS Men's Cotton Boxer Brief Op 3 Pack,HUGO BOSS,35.990002,Cancelled
2,3,Stephanie,Bradshaw,3,2042,ililily Double-layer Cotton Hooded Tailored-fi...,ililily,34.990002,Processing
3,4,Sylvia,Ferguson,4,7730,Diesel Women's Dulhar Blazer,Diesel,348.0,Cancelled
4,5,Emma,Travis,5,7408,Lavender Opaque Skirt Stretch Footless Legging...,Luxury Divas,17.99,Processing


In [162]:
QUERY  = f"""
SELECT *
FROM `ecommerce-data-project-444616.thelook_ecommerce.products`;
"""

df_products = client.query_and_wait(QUERY).to_dataframe()
df_products.head()

Unnamed: 0,id,cost,category,name,brand,retail_price,department,sku,distribution_center_id
0,13842,2.51875,Accessories,Low Profile Dyed Cotton Twill Cap - Navy W39S55D,MG,6.25,Women,EBD58B8A3F1D72F4206201DA62FB1204,1
1,13928,2.33835,Accessories,Low Profile Dyed Cotton Twill Cap - Putty W39S55D,MG,5.95,Women,2EAC42424D12436BDD6A5B8A88480CC3,1
2,14115,4.87956,Accessories,Enzyme Regular Solid Army Caps-Black W35S45D,MG,10.99,Women,EE364229B2791D1EF9355708EFF0BA34,1
3,14157,4.64877,Accessories,Enzyme Regular Solid Army Caps-Olive W35S45D (...,MG,10.99,Women,00BD13095D06C20B11A2993CA419D16B,1
4,14273,6.50793,Accessories,Washed Canvas Ivy Cap - Black W11S64C,MG,15.99,Women,F531DC20FDE20B7ADF3A73F52B71D0AF,1


### Create similarity evaluation function

In [200]:
def getQuery(customer_id : int, df_orders : pd.DataFrame):

    customer_purchases = df_orders[(df_orders.user_id == customer_id) & (df_orders.status != 'Cancelled')]

    ordered_products = customer_purchases.product_name.tolist()

    return ordered_products

def getEmbeddings(model, df_products):
    embedding_arr = model.encode(df_products['name'].to_list())

    # store embeddings in a dataframe
    df_embedding = pd.DataFrame(embedding_arr)
    df_embedding.columns = ['product-embedding-'+str(i) for i in range(embedding_arr.shape[1])]
    df_embedding = pd.concat([df_products, df_embedding], axis=1)
    
    return df_embedding

def getRecommendationDict(model, queries, df_embedding, metric, top_k : int=5):
    rec_dict = {}
    for query in queries:
        query_embedding = model.encode(query).reshape(1,-1)
        query_dist = metric.pairwise(df_embedding.values[:,9:], query_embedding).flatten()
        query_dist_df = pd.DataFrame({'product_id' : df_products.id,
                                    'dist' : query_dist})
        rec_dict[query] = query_dist_df.sort_values(by='dist').product_id.head(top_k).tolist()

    return rec_dict

def getProducts(product_id_list : list, df_products : pd.DataFrame):

    return df_products[df_products.id.isin(product_id_list)]

In [202]:
def getRecommendedProducts(customer_id : int,
                      model : SentenceTransformer, metric : sklearn.metrics,
                      df_orders : pd.DataFrame, df_products : pd.DataFrame,
                      top_k : int=5):
    """
    """

    df_embedding = getEmbeddings(model, df_products)

    queries = getQuery(customer_id, df_orders)

    rec_dict = getRecommendationDict(model, queries, df_embedding, metric, top_k)

    df_recs = pd.DataFrame(columns=['id', 'cost', 'category', 'name', 'brand', 'retail_price', 'department', 'sku', 'distribution_center_id', ])
    for product in rec_dict.keys():
        product_recs = getProducts(rec_dict[product], df_products)
        df_recs = pd.concat([df_recs, product_recs], axis=0)

    return df_recs

In [203]:
model = SentenceTransformer("all-mpnet-base-v2")
metric = DistanceMetric.get_metric('euclidean')

getRecommendedProducts(99998, model, metric, df_orders, df_products)

Unnamed: 0,id,cost,category,name,brand,retail_price,department,sku,distribution_center_id
22606,1603,32.248,Fashion Hoodies & Sweatshirts,O'Neill Juniors Rocko Hoodie,O'Neill,69.5,Women,F3173935ED8AC4BF073C1BCD63171F8A,8
22608,1870,20.648,Fashion Hoodies & Sweatshirts,O'Neill Juniors Sunday Morning Hoodie,O'Neill,44.5,Women,D305281FAF947CA7ACADE9AD5C8C818C,8
22612,2096,26.73,Fashion Hoodies & Sweatshirts,O'Neill Juniors Nomad Hoodie,O'Neill,54.0,Women,194CF6C2DE8E00C05FCF16C498ADC7BF,8
22614,2307,21.2,Fashion Hoodies & Sweatshirts,O'Neill Juniors December Hoodie,O'Neill,50.0,Women,6211080FA89981F66B1A0C9D55C61D0F,8
22615,2365,24.7005,Fashion Hoodies & Sweatshirts,O'Neill Juniors Helix Hoodie,O'Neill,49.5,Women,9FB7B048C96D44A0337F049E0A61FF06,8
12324,12783,37.72,Swim,Speedo Women's Off the Grid Endurance+ Flyback...,Speedo,82.0,Women,7854D49BA2F35C970603FBE7B70364F9,4
12341,12862,37.879379,Swim,Speedo Women's Power Sprint Flyback Endurance ...,Speedo,81.989998,Women,E056E52C8DCD019A63E6A3F169892CC9,4
12342,12864,35.998,Swim,Speedo Women's Aqua Sites Endurance+ Flyback P...,Speedo,82.0,Women,240497D1C93F3EA543976E5F331F3F9D,4
12383,13048,18.048,Swim,Speedo Women's Breaststroke 4 Hope Graphic Dai...,Speedo,48.0,Women,2053EA869F5C78D1A98B73AE63133EA1,4
12396,13172,26.102,Swim,Speedo Womens Endurance Plus Lifeguard 2 Piece...,Speedo,62.0,Women,2A3D16448453D694B503AEEBFD710AA7,4
