# Create function to analyze customers' shopping preference & advise on targeted products

In [1]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import lifetimes
from typing import Tuple
import sklearn

from datetime import datetime
from dateutil.relativedelta import relativedelta
from sentence_transformers import SentenceTransformer
from sklearn.metrics import DistanceMetric

import os
from google.cloud import bigquery
client = bigquery.Client()

  from .autonotebook import tqdm as notebook_tqdm


### Import Data

In [2]:
# Import order data
gcr_project_id = os.getenv('GCR_CLV_PROJECT_ID')

QUERY  = f"""
SELECT
  order_items.user_id,
  users.first_name,
  users.last_name,
  order_items.order_id,
  order_items.product_id,
  products.name as product_name,
  products.brand as product_brand,
  order_items.sale_price,
  order_items.status
FROM `ecommerce-data-project-444616.the_look_ecommerce_constant.order_items` as order_items
LEFT JOIN `ecommerce-data-project-444616.the_look_ecommerce_constant.users` as users
ON order_items.user_id = users.id
LEFT JOIN `ecommerce-data-project-444616.the_look_ecommerce_constant.products` as products
ON order_items.product_id = products.id
ORDER BY order_items.user_id;
"""

df_orders = client.query_and_wait(QUERY).to_dataframe()
df_orders.head()



Unnamed: 0,user_id,first_name,last_name,order_id,product_id,product_name,product_brand,sale_price,status
0,2,Erica,Wright,1,4640,GUESS Power Skinny Jeans in Resolute,GUESS,89.0,Shipped
1,3,Robert,Moore,3,21748,Allegra K Mens Casual NEW Stylish Drawstring E...,Allegra K,16.379999,Complete
2,3,Robert,Moore,3,29007,Washed Hunting Fishing Outdoor Hat-Camo W11S41D,MG,26.99,Complete
3,3,Robert,Moore,3,17321,STANDARD SUPPLY Slim Fit Mens Zip Hoodie,Standard Supply,29.99,Complete
4,3,Robert,Moore,3,16887,Volcom Men's Bangout Tank Shirt,Volcom,35.0,Complete


In [3]:
QUERY  = f"""
SELECT *
FROM `ecommerce-data-project-444616.the_look_ecommerce_constant.products`;
"""

df_products = client.query_and_wait(QUERY).to_dataframe()
df_products.head()

Unnamed: 0,id,cost,category,name,brand,retail_price,department,sku,distribution_center_id
0,27569,92.652563,Swim,2XU Men's Swimmers Compression Long Sleeve Top,2XU,150.410004,Men,B23C5765E165D83AA924FA8F13C05F25,1
1,27445,24.719661,Swim,TYR Sport Men's Square Leg Short Swim Suit,TYR,38.990002,Men,2AB7D3B23574C3DEA2BD278AFD0939AB,1
2,27457,15.8976,Swim,TYR Sport Men's Solid Durafast Jammer Swim Suit,TYR,27.6,Men,8F831227B0EB6C6D09A0555531365933,1
3,27466,17.85,Swim,TYR Sport Men's Swim Short/Resistance Short Sw...,TYR,30.0,Men,67317D6DCC4CB778AEB9219565F5456B,1
4,27481,29.408001,Swim,TYR Alliance Team Splice Jammer,TYR,45.950001,Men,213C888198806EF1A0E2BBF2F4855C6C,1


### Create similarity evaluation function

In [200]:
def getQuery(customer_id : int, df_orders : pd.DataFrame):

    customer_purchases = df_orders[(df_orders.user_id == customer_id) & (df_orders.status != 'Cancelled')]

    ordered_products = customer_purchases.product_name.tolist()

    return ordered_products

def getEmbeddings(model, df_products):
    embedding_arr = model.encode(df_products['name'].to_list())

    # store embeddings in a dataframe
    df_embedding = pd.DataFrame(embedding_arr)
    df_embedding.columns = ['product-embedding-'+str(i) for i in range(embedding_arr.shape[1])]
    df_embedding = pd.concat([df_products, df_embedding], axis=1)
    
    return df_embedding

def getRecommendationDict(model, queries, df_embedding, metric, top_k : int=5):
    rec_dict = {}
    for query in queries:
        query_embedding = model.encode(query).reshape(1,-1)
        query_dist = metric.pairwise(df_embedding.values[:,9:], query_embedding).flatten()
        query_dist_df = pd.DataFrame({'product_id' : df_products.id,
                                    'dist' : query_dist})
        rec_dict[query] = query_dist_df.sort_values(by='dist').product_id.head(top_k).tolist()

    return rec_dict

def getProducts(product_id_list : list, df_products : pd.DataFrame):

    return df_products[df_products.id.isin(product_id_list)]

In [202]:
def getRecommendedProducts(customer_id : int,
                      model : SentenceTransformer, metric : sklearn.metrics,
                      df_orders : pd.DataFrame, df_products : pd.DataFrame,
                      top_k : int=5):
    """
    """

    df_embedding = getEmbeddings(model, df_products)

    queries = getQuery(customer_id, df_orders)

    rec_dict = getRecommendationDict(model, queries, df_embedding, metric, top_k)

    df_recs = pd.DataFrame(columns=['id', 'cost', 'category', 'name', 'brand', 'retail_price', 'department', 'sku', 'distribution_center_id', ])
    for product in rec_dict.keys():
        product_recs = getProducts(rec_dict[product], df_products)
        df_recs = pd.concat([df_recs, product_recs], axis=0)

    return df_recs

In [203]:
model = SentenceTransformer("all-mpnet-base-v2")
metric = DistanceMetric.get_metric('euclidean')

getRecommendedProducts(99998, model, metric, df_orders, df_products)

Unnamed: 0,id,cost,category,name,brand,retail_price,department,sku,distribution_center_id
22606,1603,32.248,Fashion Hoodies & Sweatshirts,O'Neill Juniors Rocko Hoodie,O'Neill,69.5,Women,F3173935ED8AC4BF073C1BCD63171F8A,8
22608,1870,20.648,Fashion Hoodies & Sweatshirts,O'Neill Juniors Sunday Morning Hoodie,O'Neill,44.5,Women,D305281FAF947CA7ACADE9AD5C8C818C,8
22612,2096,26.73,Fashion Hoodies & Sweatshirts,O'Neill Juniors Nomad Hoodie,O'Neill,54.0,Women,194CF6C2DE8E00C05FCF16C498ADC7BF,8
22614,2307,21.2,Fashion Hoodies & Sweatshirts,O'Neill Juniors December Hoodie,O'Neill,50.0,Women,6211080FA89981F66B1A0C9D55C61D0F,8
22615,2365,24.7005,Fashion Hoodies & Sweatshirts,O'Neill Juniors Helix Hoodie,O'Neill,49.5,Women,9FB7B048C96D44A0337F049E0A61FF06,8
12324,12783,37.72,Swim,Speedo Women's Off the Grid Endurance+ Flyback...,Speedo,82.0,Women,7854D49BA2F35C970603FBE7B70364F9,4
12341,12862,37.879379,Swim,Speedo Women's Power Sprint Flyback Endurance ...,Speedo,81.989998,Women,E056E52C8DCD019A63E6A3F169892CC9,4
12342,12864,35.998,Swim,Speedo Women's Aqua Sites Endurance+ Flyback P...,Speedo,82.0,Women,240497D1C93F3EA543976E5F331F3F9D,4
12383,13048,18.048,Swim,Speedo Women's Breaststroke 4 Hope Graphic Dai...,Speedo,48.0,Women,2053EA869F5C78D1A98B73AE63133EA1,4
12396,13172,26.102,Swim,Speedo Womens Endurance Plus Lifeguard 2 Piece...,Speedo,62.0,Women,2A3D16448453D694B503AEEBFD710AA7,4
