## Reading Data

In [1]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt  
from tqdm import tqdm

In [2]:
## Sample Reward Function to get reward corresponding a chosen arm
def sampleReward(SelectedEventsTest, clusterLabel, armActual):
    sampleDataCluster = SelectedEventsTest[SelectedEventsTest.cluster == clusterLabel]
    sampleDataClusterArm = sampleDataCluster[sampleDataCluster.MainCatID == armActual]
    if len(sampleDataClusterArm)>0:
        visitoridSample = sampleDataClusterArm['visitorid'].sample(1).values[0]
        visitoridSampleDF = sampleDataClusterArm[sampleDataClusterArm.visitorid == visitoridSample]
        itemIdSample = visitoridSampleDF['itemid'].sample(1).values[0]
        visitoridSampleDFItem = visitoridSampleDF[visitoridSampleDF.itemid==itemIdSample]
        Keys = sorted(visitoridSampleDFItem['event'].value_counts().keys())
        for key in Keys:
            if  key == 'addtocart':
                Reward = 2
            elif key == 'transaction':
                Reward = 3     
            else: ## only a View
                Reward = 1
        SelectedEventsTest.drop(visitoridSampleDFItem.index, inplace=True)
        
    else: ## Not even a View
        Reward = 0
        visitoridSample = None
        itemIdSample = None

            
    return Reward,visitoridSample,itemIdSample

## Random Recommendation Approach

In [4]:
##################################################################################
def simulatorRandomRandom(df, n_arms, n_steps): ## Random Cluster Random Arm
    Draws = np.zeros((n_steps, 5))
    DrawIt = 0
    for i in (range(n_steps-DrawIt)):
        
        
        clusterLabel =  np.random.randint(11)   ## Random Cluster     
        alphas, betas = AlphaBetaClusterDict[clusterLabel]
        
        arm =  np.random.randint(n_arms)        ## Random Cluster
    
        armActual =  UniqueMainCatID[arm]
        reward, visitoridSample, itemIdSample = sampleReward(SelectedEventsTest, clusterLabel, armActual)
        
        Draws[DrawIt] = np.array([reward, visitoridSample, itemIdSample, clusterLabel,armActual])
        DrawIt  = DrawIt + 1

        ##  Updaring arm rewards
        alphas[arm] =  alphas[arm] + reward
        betas[arm ] =  betas[arm]  + 1 - min(reward,1)
        AlphaBetaClusterDict[clusterLabel] = [alphas, betas]
 
        
    return Draws 

## Simple Thompson Without Clustering

In [94]:
##################################################################################
## Updating prior using the train data
alphasModel = np.ones(n_arms)
betasModel = np.ones(n_arms)

for arm in tqdm((range(n_arms))):
    armActual =  UniqueMainCatID[arm]
    ArmData = SelectedEventsTrain[SelectedEventsTrain.MainCatID==armActual].copy()
    while len(ArmData) > 0:

        visitoridSample = ArmData['visitorid'].sample(1).values[0]
        visitoridSampleDF = ArmData[ArmData.visitorid == visitoridSample]

        itemIdSample = visitoridSampleDF['itemid'].sample(1).values[0]
        visitoridSampleDFItem = visitoridSampleDF[visitoridSampleDF.itemid==itemIdSample]
        Keys = visitoridSampleDFItem['event'].value_counts().keys()
        Reward = 0
        for key in Keys:
            if  key == 'addtocart':
                Reward = 2
            elif key == 'transaction':
                Reward = 3 
            else: ## View
                Reward = 1

        alphasModel[arm] =  alphasModel[arm] + Reward
        betasModel[arm ] =  betasModel[arm] + 1 - min(Reward,1)

        ArmData.drop(visitoridSampleDFItem.index,inplace=True)


##################################################################################
def simulatorThompson(df, n_arms, n_steps):
    
    ## Initialization
    Draws = np.zeros((n_steps, 5))
    DrawIt = 0

    alphas = alphasModel
    betas  = betasModel 
    
    ## Main Simulation 
    for i in tqdm(range(n_steps)):
        
        clusterLabel =  np.random.randint(11)  ## Simple TS doesn't have any cluster       
                
        arm = chooseArmThompson(alphas, betas, numberOfItems) 
    
        armActual =  UniqueMainCatID[arm]
        reward, visitoridSample, itemIdSample = sampleReward(SelectedEventsTest, clusterLabel, armActual)
        
        Draws[DrawIt] = np.array([reward, visitoridSample, itemIdSample, clusterLabel, armActual])

        DrawIt  = DrawIt + 1
        
        ##  Updaring arm rewards
        alphas[arm] =  alphas[arm] + reward
        betas[arm ] =  betas[arm]  + 1 - min(reward,1)

        
    return Draws  

## Proposed UCB-TS method

In [3]:
# UCB Bandit for cluster selection
def chooseClusterUCB(ClusterDraws,t,c = 0.1):
    draws_Labels = ClusterDraws[:, 0]
    draws_Rewards = ClusterDraws[:, 1]
    
    UCB_indices = []
    for arm in sorted(np.unique(draws_Labels)):
        
        arm_obs = (draws_Labels == arm).sum()
        arm_total_rew = (draws_Rewards[draws_Labels == arm]).sum()
        avg_rew = arm_total_rew/arm_obs
        Ut = (np.log(t)/arm_obs) ** 0.5
        UCB = avg_rew + c * Ut + np.random.normal(0, 0.00001) # error for robustness
        
        UCB_indices.append(UCB)

    SelectedAtmsID = np.argsort(UCB_indices)[::-1][0]

    return SelectedAtmsID

In [8]:
##################################################################################
def chooseArmThompson(alphas, betas, sample_size = 100):
    samples = np.random.beta(alphas, betas, size = (sample_size, numberOfItems))
    means = samples.mean(axis = 0)
    return np.argmax(means)

##################################################################################
def simulatorThompsonCL(df, n_arms, n_steps, clusterUCB_c = 0.1):
    
    ## Initialization
    Draws = np.zeros((n_steps, 5))
    DrawIt = 0
    
    ClusterDraws = np.zeros(((n_steps, 2)))
    
    ## Choosing each cluster at least once
    for clusterLabel in range(0,11):
        
        alphas, betas = AlphaBetaClusterDict[clusterLabel]
        
        arm = chooseArmThompson(alphas, betas, numberOfItems) 
    
        armActual =  UniqueMainCatID[arm]
        reward, visitoridSample, itemIdSample = sampleReward(SelectedEventsTest, clusterLabel, armActual)
        
        Draws[DrawIt] = np.array([reward, visitoridSample, itemIdSample, clusterLabel, armActual ])
        ClusterDraws[DrawIt] = np.array([clusterLabel,reward])
        DrawIt  = DrawIt + 1        
        ##  Updaring arm rewards
        alphas[arm] =  alphas[arm] + reward
        betas[arm ] =  betas[arm]  + 1 - min(reward,1)
        AlphaBetaClusterDict[clusterLabel] = [alphas, betas]


    ## Main Simulation for proposed method UCB-TS
    for i in (range(n_steps-DrawIt)):
        
        clusterLabel = np.random.randint(11)#chooseClusterUCB(ClusterDraws,t = DrawIt, c = clusterUCB_c)
        
        alphas, betas = AlphaBetaClusterDict[clusterLabel]
        
        arm = chooseArmThompson(alphas, betas, numberOfItems) 
    
        armActual =  UniqueMainCatID[arm]
        reward, visitoridSample, itemIdSample = sampleReward(SelectedEventsTest, clusterLabel, armActual)
        
        Draws[DrawIt] = np.array([reward, visitoridSample, itemIdSample, clusterLabel, armActual])
        ClusterDraws[DrawIt] = np.array([clusterLabel,reward])
        DrawIt  = DrawIt + 1    
        ##  Updaring arm rewards
        alphas[arm] =  alphas[arm] + reward
        betas[arm ] =  betas[arm]  + 1 - min(reward,1)
        AlphaBetaClusterDict[clusterLabel] = [alphas, betas]
    
    return Draws  

## Main Simulation
(5 times simulation to have Confidence Interval)

In [11]:
ResultDict = {}
for I in tqdm(range(5)):
    ################################################################################################
    events  = pd.read_csv('/.../input/cleanrl/events.csv',index_col=0)
    SelectedEventsTest  = pd.read_csv('/.../input/cleanrl/SelectedEventsTest.csv',index_col=0)
    SelectedEventsTrain = pd.read_csv('/.../input/cleanrl/SelectedEventsTrain.csv',index_col=0)
    SelectedEventsTest.cluster = SelectedEventsTest.cluster.fillna(10)
    with open('/.../input/cleanrl/AlphaBetaClusterDict.pickle', 'rb') as f:
        AlphaBetaClusterDict = pickle.load(f)

    UniqueMainCatID = sorted(events.MainCatID.unique())
    numberOfItems = len(UniqueMainCatID)
    df, n_arms, n_steps = SelectedEventsTest.copy() , numberOfItems, 40000
    ################################################################################################
    
    DrawsRR = simulatorThompsonCL(df, n_arms, n_steps)

    ResultDict[I] = DrawsRR

    
    with open('/.../working/simulatorThompsonCLRandomCluster.pickle', 'wb') as handle:
        pickle.dump(ResultDict, handle, protocol=pickle.HIGHEST_PROTOCOL)

100%|██████████| 5/5 [08:31<00:00, 102.36s/it]
