In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import gc
np.random.seed(0)

In [2]:
def load_file_from_excel(file_path):
    df = pd.ExcelFile(file_path)
    df = pd.read_excel(df, df.sheet_names[0])
    return df

In [3]:
data = load_file_from_excel("/home/dt-021/Documents/sagemaker/ID_counts1.xlsx")

In [4]:
# data.drop(['INDEX'],inplace=True,axis=1)
data.head()

Unnamed: 0,INDEX,id,clicks,conversions,cost,impressions,CTR,CPS,CLPS,IPS,Scaled CLPS
0,0,6089948464237,5,4,22.56,502,0.00996,0.177305,0.221631,22.251773,0.000498
1,1,6095907447037,2,1,18.37,502,0.003984,0.054437,0.108873,27.327164,0.000244
2,2,6097338832437,2,0,9.04,502,0.003984,0.0,0.221239,55.530973,0.000497
3,3,6095907846037,2,2,20.99,503,0.003976,0.095283,0.095283,23.963792,0.000214
4,4,6097002345437,2,1,10.45,503,0.003976,0.095694,0.191388,48.133971,0.00043


In [73]:
scaled_clps_opt=data[['id','Scaled CLPS']][:5]

In [74]:
bandit_probs = scaled_clps_opt['Scaled CLPS']  # bandit probabilities of success
identity = pd.DataFrame(scaled_clps_opt['id'])
N_experiments = 10  # number of experiments to perform
N_episodes = 10000 # number of episodes per experiment
epsilon = 0.5  # probability of random exploration (fraction)
save_fig = True  # if false -> plot, if true save as file in same directory
save_format = ".png"# ".pdf" or ".png
for i in range(N_experiments):
    identity['Experiment No {} action'.format(i)] = 0
    identity['Experiment No {} reward'.format(i)] = 0

In [75]:
class Bandit:
    def __init__(self, bandit_probs,i):
        self.N = len(bandit_probs)  # number of bandits
        self.prob = bandit_probs  # success probabilities for each bandit
        self.exp_no = i
    # Get reward (1 for success, 0 for failure)
    def get_reward(self, action):
        rand = np.random.random()  # [0.0,1.0)
        reward = 1 if (rand < self.prob[action]) else 0
#         print(self.prob[action])
        identity['Experiment No {} reward'.format(self.exp_no)] += reward
        return reward


In [80]:
class Agent:
        def __init__(self, bandit, epsilon,i):
            self.epsilon = epsilon
            self.k = np.zeros(bandit.N, dtype=np.int)  # number of times action was chosen
            self.Q = np.zeros(bandit.N, dtype=np.float)  # estimated value
            self.exp_no = i
        # Update Q action-value using:
        # Q(a) <- Q(a) + 1/(k+1) * (r(a) - Q(a))
        def update_Q(self, action, reward):
            self.k[action] += 1  # update action counter k -> k+1
            self.Q[action] += (1./self.k[action]) * (reward - self.Q[action])
            
        # Choose action using a soft max agent
        def choose_action(self, bandit):
            a = bandit_probs
            pi = np.exp(a) / np.sum(np.exp(a))
#             print(pi)
            cdf = np.cumsum(pi)
            s = np.random.random()
            act = 1 if np.where(s < cdf)[0][0] else 0
            identity['Experiment No {} action'.format(self.exp_no)] += pi
            return (act)

In [81]:
def experiment(agent, bandit, N_episodes):
    action_history = []
    reward_history = []
    for episode in range(N_episodes):
        # Choose action from agent (from current Q estimate)
        action = agent.choose_action(bandit)
        # Pick up reward from bandit for chosen action
        reward = bandit.get_reward(action)
        # Update Q action-value estimates
        agent.update_Q(action, reward)
        # Append to history
        action_history.append(action)
        reward_history.append(reward)
    return (np.array(action_history), np.array(reward_history))

In [82]:
import time
start = time.time()
action_data = data['id']
exp_reward_matrix = {}
N_bandits = len(bandit_probs)
# print("Running multi-armed bandits with N_bandits = {} and agent epsilon = {}".format(N_bandits, epsilon))
reward_history_avg = np.zeros(N_episodes)  # reward history experiment-averaged
action_history_sum = np.zeros((N_episodes, N_bandits))  # sum action history
for i in range(N_experiments):
    bandit = Bandit(bandit_probs,i)  # initialize bandits  
    agent = Agent(bandit, epsilon,i)  # initialize agent
    (action_history, reward_history) = experiment(agent, bandit, N_episodes)
#     for i in range(len(bandit_probs)):
#         (index,rew,action) = (i,bandit,agent)
#         print(index,rew,action)
        
    # perform experiment
#     print("[Experiment {}/{}]".format(i + 1, N_experiments))
#     print("  N_episodes = {}".format(N_episodes))
#     print("  bandit choice history = {}".format(action_history + 1))
#     print("  reward history = {}".format(reward_history))
#     print("  average reward = {}".format(np.sum(reward_history) / len(reward_history)))
#     print("")
        # Sum up experiment reward (later to be divided to represent an average)
    reward_history_avg += reward_history
    exp_reward_matrix[i] = (reward_history_avg, action_history)
    
for j, (a) in enumerate(action_history):
    action_history_sum[j][a] += 1
#     action_data.append(id[j], action_history_sum)
# reward_history_avg /= np.float(N_experiments)
print("reward history avg = {}".format(reward_history_avg))
end = time.time()
print("Total time take {}".format(end - start))

NameError: name 'Q' is not defined

In [57]:
bandit_selection=action_history+1
action_data = pd.DataFrame(bandit_selection)

In [58]:
sum_reward = 0
max_reward = 0
exp = 0
for key, value in exp_reward_matrix.items():
    reward_sum = np.sum(value[0])
    if reward_sum >= sum_reward:
        max_reward = sum_reward
        sum_reward = reward_sum
        exp = key
print("Max reward is {0} for experiment {1}".format(max_reward, exp))
    

Max reward is 0.0 for experiment 2


In [59]:
bandit_selection=reward_history
reward_data= pd.DataFrame(bandit_selection)

In [60]:
print(reward_data)
print(action_data)

   0
0  0
1  0
   0
0  2
1  2


In [61]:
reward_data.csv

AttributeError: 'DataFrame' object has no attribute 'csv'

In [64]:
identity

Unnamed: 0,id,Experiment No 0 action,Experiment No 0 reward,Experiment No 1 action,Experiment No 1 reward,Experiment No 2 action,Experiment No 2 reward,Experiment No 3 action,Experiment No 3 reward,Experiment No 4 action,...,Experiment No 5 action,Experiment No 5 reward,Experiment No 6 action,Experiment No 6 reward,Experiment No 7 action,Experiment No 7 reward,Experiment No 8 action,Experiment No 8 reward,Experiment No 9 action,Experiment No 9 reward
0,6089948464237,14.955117,4,14.955117,5,14.955117,6,14.955117,7,14.955117,...,14.955117,2,14.955117,4,14.955117,4,14.955117,6,14.955117,5
1,6095907447037,14.951331,4,14.951331,5,14.951331,6,14.951331,7,14.951331,...,14.951331,2,14.951331,4,14.951331,4,14.951331,6,14.951331,5
2,6097338832437,14.955104,4,14.955104,5,14.955104,6,14.955104,7,14.955104,...,14.955104,2,14.955104,4,14.955104,4,14.955104,6,14.955104,5
3,6095907846037,14.950875,4,14.950875,5,14.950875,6,14.950875,7,14.950875,...,14.950875,2,14.950875,4,14.950875,4,14.950875,6,14.950875,5
4,6097002345437,14.954102,4,14.954102,5,14.954102,6,14.954102,7,14.954102,...,14.954102,2,14.954102,4,14.954102,4,14.954102,6,14.954102,5
5,6097011912837,14.956314,4,14.956314,5,14.956314,6,14.956314,7,14.956314,...,14.956314,2,14.956314,4,14.956314,4,14.956314,6,14.956314,5
6,6096204398437,14.965482,4,14.965482,5,14.965482,6,14.965482,7,14.965482,...,14.965482,2,14.965482,4,14.965482,4,14.965482,6,14.965482,5
7,6097002350837,14.953901,4,14.953901,5,14.953901,6,14.953901,7,14.953901,...,14.953901,2,14.953901,4,14.953901,4,14.953901,6,14.953901,5
8,6097341606237,14.970196,4,14.970196,5,14.970196,6,14.970196,7,14.970196,...,14.970196,2,14.970196,4,14.970196,4,14.970196,6,14.970196,5
9,6096204363237,14.960591,4,14.960591,5,14.960591,6,14.960591,7,14.960591,...,14.960591,2,14.960591,4,14.960591,4,14.960591,6,14.960591,5


In [72]:
identity['Experiment No 8 reward'].value_counts()

6    1337
Name: Experiment No 8 reward, dtype: int64