In [1]:
import os
import pandas as pd
import numpy as np
import random

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

from sklearn.model_selection import train_test_split

In [2]:
path = '/content/drive/MyDrive/Fellowship_Deep_and_Wide_Bandit/' ## Path to data_snippet.csv
os.chdir(path)
os.getcwd()

'/content/drive/MyDrive/Fellowship_Deep_and_Wide_Bandit'

In [4]:
## Load data
data_snippet = pd.read_csv(path+'data_snippet.csv') ## Takes about 7 seconds
data_snippet.head()

Unnamed: 0,riid,opened,rev_3dv2,unsub,campaign_type,retention_score,recency_score,frequency_score
0,242697842,1,75.0,0,Dedicated,14.0,1.637422,31
1,275469002,1,75.0,0,Dedicated,2.8,0.0,0
2,4319542,1,19.98,0,Dedicated,1.0,0.084239,4
3,166170102,1,19.98,0,Dedicated,0.756757,0.01087,1
4,6998482,1,150.0,0,Dedicated,28.0,1.845885,18


In [5]:
## Use label encoding for campaign_type
data_snippet["campaign_type"] = data_snippet["campaign_type"].astype('category')
data_snippet["campaign_type_cat"] = data_snippet["campaign_type"].cat.codes

In [6]:
## Use opened and rev_3dv2 as rewards, but set rewards to negative if unsub == 1
data_snippet['rewards'] = data_snippet['opened'] + data_snippet['rev_3dv2'] / 100.
data_snippet['rewards'][data_snippet['unsub']==1] = -1

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [10]:
## Take a look at modified dataframe
data_snippet.head()

Unnamed: 0,riid,opened,rev_3dv2,unsub,campaign_type,retention_score,recency_score,frequency_score,campaign_type_cat,rewards
0,242697842,1,75.0,0,Dedicated,14.0,1.637422,31,3,1.75
1,275469002,1,75.0,0,Dedicated,2.8,0.0,0,3,1.75
2,4319542,1,19.98,0,Dedicated,1.0,0.084239,4,3,1.1998
3,166170102,1,19.98,0,Dedicated,0.756757,0.01087,1,3,1.1998
4,6998482,1,150.0,0,Dedicated,28.0,1.845885,18,3,2.5


In [17]:
## Define Dataset for data snippet
class BanditTestDataset(Dataset):
    def __init__(self, data, user_col, context_col, rewards_col, action_col=None):
        super(BanditTestDataset, self).__init__()
        ## data - pandas dataframe
        ## user_col - user IDs column name
        ## action_col - action column name
        ## context_col - context column names
        ## rewards_col - reward column name
        self.user_ids = data[user_col]
        self.context = data[context_col]
        self.rewards = data[rewards_col]
        if action_col != None:
          self.actions = data[action_col]
        
    def __len__(self):
        return len(self.user_ids)

    def __getitem__(self, index):
        ## returns numpy arrays
        user_id = self.user_ids.iloc[index].values[0]
        context = self.context.iloc[index].values
        reward = self.rewards.iloc[index].values[0]
        if action_col != None:
          action = self.actions.iloc[index].values[0]
        else:
          ## Randomly choose to send or not send email
          randnum = random.random() ## draw a random number between 0 and 1
          threshold = 0.2 ## if thresehold = 0.2, send email 80% of the time
          if randnum >= threshold:
            action = 1 ## send email if random number larger than threshold, get the associated reward
          else:
            action = 0 ## don't send email if random number smaller than threshold
            r1 = reward ## copy the reward before changing it
            reward = reward * -1. ## Get opposite reward compared to send email

            if (r1==0): 
              reward = 1 ## Set reward to 1 for not annoying recipients who would not open email

        return user_id, action, context, reward

In [18]:
## Split 80/20 into train and val. Create dataloaders.
## val not used in this version, just trying to run space-bandits at this point...

train, val = train_test_split(data_snippet, test_size=0.2)

user_col = ['riid']
context_col = ['retention_score','recency_score','frequency_score','campaign_type_cat']
rewards_col = ['rewards']
action_col = None

train_dataset = BanditTestDataset(train, user_col, context_col, rewards_col, action_col=None)
train_dataloader = DataLoader(train_dataset, batch_size=2048, shuffle=True)
del train

val_dataset = BanditTestDataset(val, user_col, context_col, rewards_col, action_col=None)
val_dataloader = DataLoader(val_dataset, batch_size=2048, shuffle=True)
del val

# Try using space-bandits neural linear model

In [None]:
## Install and import space-bandits
!pip install space-bandits
from space_bandits import NeuralBandits

In [14]:
## Initialize model
num_actions = 2
num_features = len(context_col)
model = NeuralBandits(num_actions, num_features)

In [None]:
## Load some data and train the model
## Model training gets noticeable slower as more data is added

iterator = iter(train_dataloader)
user_id, action, context, reward = iterator.next()

for i in range(1000):
  context1 = context[i]
  action1 = action[i]
  reward1 = reward[i]
  model.update(context1, action1, reward1)

In [None]:
## Load some data and see what actions the model recommends, 
## 1 for send email, 0 for don't send email, 
## seem to bias toward 1: sending email

user_id, action, context, reward = iterator.next()

for i in range(100):
  context1 = context[i]
  print(model.action(context1))