In [3]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

import random
import numpy as np
import json
from datetime import datetime
from pprint import pprint
from sklearn.model_selection import train_test_split
import os
import collections

"""
    data_path : path for dataset
    output_column_name : Column Name with labels
    stochastic_param : list of 2 values float[p1,p2]-> p1 is the probability that right arm is user's choice,
                        p2 is the probability that the user might have chosen the wrong arm
"""

class Evaluator():

    def __init__(self, data_path, output_column_name,  print_params = {}, use_non_stationarity = False, sampling_fraction=0.1, stochastic_param = [1,0]):
        df_original = pd.read_csv('news_user_context_timestamp.csv')
        df_original = df_original.drop(columns =['user_id'])
        #df_original.iloc[:,~df_original.columns.duplicated()]
        df_original['article_id']=df_original['article_id'].astype(str)
        all_columns = list(df_original.columns)
        output_column_name = 'article_id'
        input_columns = all_columns[:]
        input_columns.remove(output_column_name)
        print('input cols : ', len(input_columns))
        x = df_original[input_columns].values
        self.print_params = print_params
        y=df_original[output_column_name].values
        x_scaled=pd.DataFrame(x,columns = input_columns)
        x_scaled['article_id']=y
        

        x_tr, x_test , y_tr, y_test = train_test_split(x_scaled,y,test_size=0.3,shuffle=False)
        x_train ,x_valid,y_train, y_valid = train_test_split(x_tr,y_tr,test_size=0.3,shuffle=False)

        self.x_tr = pd.DataFrame(x_tr, columns = input_columns)
        self.y_tr = pd.DataFrame(y_tr, columns = [output_column_name])

        self.df = pd.DataFrame(x_train, columns = input_columns)
        self.y_train = pd.DataFrame(y_train, columns = [output_column_name])

        self.df_valid = pd.DataFrame(x_valid, columns=input_columns)
        self.y_valid = pd.DataFrame(y_valid, columns = [output_column_name])

        self.df_test = pd.DataFrame(x_test, columns=input_columns)
        self.y_test = pd.DataFrame(y_test, columns = [output_column_name])

        self.total_train= self.df.copy(deep = True)
        self.total_train['article_id']=self.y_train.copy(deep = True)

        self.total_test= self.df_test.copy(deep = True)
        self.total_test['article_id']=self.y_test.copy(deep = True)

        print ("Train Shape : ", self.df.shape ,self.y_train.shape)
        print ("Valid Shape :", self.df_valid.shape, self.y_valid.shape)
        print ("Test Shape :", self.df_test.shape,self.y_test.shape)
        self.arm_column_name = output_column_name
        self.logging_frequency = 1000
        self.number_features_in_context = len(input_columns)
        self.list_arms = df_original[self.arm_column_name].unique().tolist()

        self.num_arms = len(self.list_arms)
        print(self.num_arms)
        self.stochastic_param = stochastic_param

        random.seed(9001)

        #Initially, the correct arm is the right arm, later this will change to simulate non stationarity
        self.dict_true_arm_to_right_arm = {arm:arm for arm in self.list_arms}
        self.use_non_stationarity = use_non_stationarity
        self.non_stationarity_steps = 50000
        self.non_stationarity_param = 0
        
        self.e = 0.2
        self.decay = 0.998


    def get_list_arms(self):
        return self.list_arms

    def get_number_features_in_context(self):
        return self.number_features_in_context

    def get_dataset_shape(self):
        return self.df.shape

    def evaluate(self, policy):

        start_time = datetime.now()
        rewards = 0
        rewards_test=0
        total_possible_reward = len(self.df_test) + len(self.df_valid)
        evaluated = 0
        evaluated_test=0
        match_found = False
        row_reward=0
        q=0
        iterative_reward=[]

#################################################TRAINING##################################################################
        #BATCH FIT TRAINING ROWS
        for index, row in self.df.iterrows():
            q=q+1
            if(q%100==0):
                iterative_reward.append(row_reward)
                row_reward = 0

            state = row.tolist()[:]
            y_value = self.y_train.iloc[index][0]
            
            import random
            rand = random.uniform(0, 1)

            if(rand < self.e):
                selected_action =[]
                self.e=self.e*self.decay
                while len(selected_action) <1:
                    temp =random.choice(self.list_arms)

                    if(temp not in selected_action):
                        selected_action.append(temp)
            else:        
                selected_action,bs = policy.get_action(state)
                

                
            reward = 0


            p1 = round(random.uniform(0,1), 1)
            p2 = round(random.uniform(0,1), 1)
            
            match_found = False
            
            for i in range(1):
                reward=0
                
                if selected_action[i]== y_value : #and p1>(1-self.stochastic_param[0]):
                    match_found = True
                    reward = 1
                    row_reward+=1
         
                policy.update_model(reward,selected_action[i],state)

            if match_found:
                rewards += 1

            evaluated += 1

            if self.use_non_stationarity and evaluated % self.non_stationarity_steps == 0:
                self.non_stationarity_param += 1
                print (self.non_stationarity_param)


            if evaluated % self.logging_frequency == 0 and evaluated!=0:
                print("---------------------------------------------------------------------------------------------------------- ")
                print('evaluated and frequency reached %d' % evaluated)
                print('accuracy %f' % ( float(rewards) / evaluated ))
                print("----------------------------------------------------------------------------------------------------------- ")


#             state = row.tolist()[:]
#             right_arm = self.y_train.iloc[index][0]
#             policy.update_model(1,right_arm,state, True)
# #             df_elements = self.df.sample(n=3)
# #             for i, r in df_elements.iterrows():
# #                 state = row.tolist()[:]
# #                 arm = self.y_train.iloc[index][0]
# #                 policy.update_model(0,arm,state, True)

                
#             #for a in self.list_arms:
#                 #if(a!=right_arm):
#                     #policy.update_model(0,right_arm,state, True)
                    
################################################VALIDATION####################################################################

        for index, row in self.df_valid.iterrows():
            q=q+1
            if(q%100==0):
                iterative_reward.append(row_reward)
                row_reward = 0


            state = row.tolist()[:]
            y_value = self.y_valid.iloc[index][0]
            
            import random
            rand = random.uniform(0, 1)

            if(rand < self.e):
                selected_action =[]
                self.e=self.e*self.decay
                while len(selected_action) <1:
                    temp =random.choice(self.list_arms)
                    if(temp not in selected_action):
                        selected_action.append(temp)
            else:        
                selected_action,bs = policy.get_action(state)
                
            reward = 0
 

            p1 = round(random.uniform(0,1), 1)
            p2 = round(random.uniform(0,1), 1)
            
            match_found = False
            
            for i in range(1):
                reward=0
                
                if selected_action[i]== y_value : 
                    match_found = True
                    reward = 1
                    row_reward+=1

                policy.update_model(reward,selected_action[i],state)

            if match_found:
                rewards += 1

            evaluated += 1

            if self.use_non_stationarity and evaluated % self.non_stationarity_steps == 0:
                self.non_stationarity_param += 1
                print (self.non_stationarity_param)


            if evaluated % self.logging_frequency == 0 and evaluated!=0:
                print("---------------------------------------------------------------------------------------------------------- ")
                print('evaluated and frequency reached %d' % evaluated)
                print('accuracy %f' % ( float(rewards) / evaluated ))
                print("----------------------------------------------------------------------------------------------------------- ")

##################################################TESTING##################################################################
        precision=0
        dict_impressions = {}
        dict_clicks = {}
        
        for k in self.list_arms:
            dict_impressions[k]=0
            dict_clicks[k]=0
            
        tr=0
        
       
        for index, row in self.df_test.iterrows():
            
            
                q=q+1
                if(q%100==0):
                    iterative_reward.append(row_reward)
                    row_reward = 0

                state = row.tolist()[:]
                y_value = self.y_test.iloc[index][0]
                
                import random
                rand = random.uniform(0, 1)

                if(rand < self.e):
                    selected_action =[]
                    self.e=self.e*self.decay
                    while len(selected_action) <1:
                        temp =random.choice(self.list_arms)
                        if(temp not in selected_action):
                            selected_action.append(temp)
                            
                else: 
                    selected_action,bs= policy.get_action(state)
                    tr=tr+bs
                
                reward = 0


       
            
            # Use probability p for stochasticity
                p1 = round(random.uniform(0,1), 1)
                p2 = round(random.uniform(0,1), 1)
                ps=0
                ps_sum = 0


                match_found = False
                for i in range(1):
                    reward=0
                    dict_impressions[selected_action[i]] += 1
                    
                    
                    if selected_action[i] == y_value:
                        row_reward+=1
                        
                        match_found = True
                        reward = 1
                        
                        ps+=1
                        ps_sum += ps/(i+1)
                        dict_clicks[y_value] +=1

                    
                    policy.update_model(reward,selected_action[i],state)
          
            

                if match_found:
                    rewards += 1
                    rewards_test += 1
                evaluated += 1
                evaluated_test+=1
                precision =precision+ ps_sum

                if self.use_non_stationarity and evaluated % self.non_stationarity_steps == 0:
                    self.non_stationarity_param += 1
                    print (self.non_stationarity_param)


                if evaluated % self.logging_frequency == 0 and evaluated!=0:
                    print("---------------------------------------------------------------------------------------------------------- ")
                    print('evaluated and frequency reached %d' % evaluated)
                    print('accuracy %f' % ( float(rewards) / evaluated ))
                    print("----------------------------------------------------------------------------------------------------------- ")


        #Total Reward
        
        with open('iterative_reward.pickle', 'wb') as handle:
            pickle.dump(iterative_reward, handle)
      
        ctr = 0 
        for key, value in dict_impressions.items():
            if(value!=0):
                ctr += dict_clicks[key]/value
                
        print ("precision")
        print (precision/self.df_test.shape[0])
        
        print("CTR")
        print(ctr/len(dict_impressions.keys()))


        print("accuracy", float(rewards)/total_possible_reward)

        print("---------------------------------")

        print("accuracy", float(rewards_test)/evaluated_test)

        end_time = datetime.now()
        print ("Total time taken by ",str(policy)," = ",end_time-start_time)


In [None]:
import pickle
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import sklearn
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import matplotlib as mpl
import seaborn as sns
import random
#from eval_3_modified import Evaluator
from sklearn.tree import DecisionTreeRegressor
import itertools
import operator
from sklearn import linear_model
from sklearn.neural_network import MLPRegressor
#from imblearn.over_sampling import SMOTE

import tensorflow as tf
import sys


class TFNeuralNetworkRegressor():

	def multilayer_perceptron(self, x, weights, biases, keep_prob):
		# Hidden fully connected layer1
		layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
		layer_1 = tf.nn.tanh(layer_1)
		# Hidden fully connected layer2
		layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
		layer_2 = tf.nn.relu(layer_2)
		# Hidden fully connected layer2
# 		layer_3 = tf.add(tf.matmul(layer_2, weights['h3']), biases['b3'])
# 		layer_3 = tf.nn.relu(layer_3)
		# Output fully connected layer with a neuron for each class
		out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
		return out_layer

	def __init__(self, n_input, arm_name, model_number, training_dropout = 1.0, prediction_dropout = 1.0):

		self.n_input = n_input
		self.experience = []
		self.model_number = arm_name
		self.training_dropout = training_dropout
		self.prediction_dropout = prediction_dropout

		# Network Parameters
		n_hidden_1 = 60 # 1st layer number of neurons
		n_hidden_2 = 40 # 2nd layer number of neurons
# 		n_hidden_3 = 40 # 3rd layer number of neurons

		weights = {
			'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
			'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
# 			'h3': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3])),
			'out': tf.Variable(tf.random_normal([n_hidden_2, 1]))
		}
		biases = {
			'b1': tf.Variable(tf.random_normal([n_hidden_1])),
			'b2': tf.Variable(tf.random_normal([n_hidden_2])),
# 			'b3': tf.Variable(tf.random_normal([n_hidden_3])),
			'out': tf.Variable(tf.random_normal([1]))
		}
			
			
		 
			

		self.keep_prob = tf.placeholder("float", name="keep_prob")

		self.x = tf.placeholder("float", [None, n_input])
		self.y = tf.placeholder("float", [None, 1])

		self.predictions = self.multilayer_perceptron(self.x, weights, biases,
													  self.keep_prob)

		self.cost = tf.reduce_mean(tf.square(self.predictions - self.y))

		self.train = tf.train.AdamOptimizer(0.008).minimize(self.cost)

		self.sess = tf.Session()

		self.sess.run(tf.global_variables_initializer())


	def get_samples_from_experience(self, must_have_row, must_have_column, n_samples=50):
		num_to_sample = min(len(self.experience), n_samples)
		sample = random.sample(self.experience, num_to_sample)
# 		print (sample)
		x = [must_have_row]
		y = [must_have_column]
		for row in sample:
			x.append(row[0])
			y.append(row[1])
		x = np.array(x).reshape(num_to_sample + 1, self.n_input)
		y = np.array(y).reshape(num_to_sample + 1, 1)

		if num_to_sample > 500:
			sm = SMOTE(random_state=42)
			X_res, y_res = sm.fit_sample(x, y)
			y_res = y_res.reshape(y_res.shape[0], 1)

		else:
			X_res, y_res = x, y

		return X_res, y_res

	def fit(self, X, Y):
		self.experience.append((X[0], Y))
# 		print(self.experience)# if len(self.experience)>1000:
		#     self.experience.pop(0)
		# forcing the experience sample to return this piece of information

		# Y = [Y]
		for i in range(50):
			x, y = self.get_samples_from_experience(X[0], Y)
			train = self.sess.run([self.train],
								feed_dict = {self.x:x, self.y:y, self.keep_prob:self.training_dropout})

	def partial_fit(self, X, Y):
		# print "Fitting", self.model_number
		self.fit(X, Y)

	def batch_fit(self, X, Y, iter = 1):
		# print "Fitting : ", self.model_number
		for i in range(0,iter):
			train = self.sess.run([self.train],
								feed_dict = {self.x:X, self.y:Y, self.keep_prob:self.training_dropout})

	def predict(self, X, verbose=False):
		preds = self.sess.run([self.predictions],
					 feed_dict = {self.x:X, self.keep_prob : self.prediction_dropout})
		if verbose:
			print(preds)
		return preds
	
class PerArmModelPolicy():
	def __init__(self, list_arms, number_features_in_context, training_dropout, prediction_dropout, epsilon, number_models_per_arm=1,
				switch_off_epsilon_exploration=True, use_epsilon_decay=True):
		self.list_arms = list_arms
		self.last_action = None
		self.last_reward = None
		self.last_state = None
		self.last_model = None

		self.experience = {}
		self.dict_arm_to_model = {}
		# self.epsilon_percentage = 10
				#If Using Batch Training
		self.batch_size = 0
		self.mini_batch = 500
		self.train_batch_size = 50000

		# if self.batch_size:
		self.iter = 0
		self.dict_update_hist = {}
		for arm in list_arms:
			self.dict_arm_to_model[arm] = []
			for i in range(number_models_per_arm):
				m = TFNeuralNetworkRegressor(number_features_in_context, arm, str(i), training_dropout, prediction_dropout)
				self.dict_arm_to_model[arm].append(m)

		# for epsilon decay
		self.num_feedback_received = 0
		self.decay = 50
		self.switch_off_epsilon_exploration = switch_off_epsilon_exploration
		self.use_epsilon_decay = use_epsilon_decay
		self.default_epsilon = epsilon
	def get_best_arm(self, state):
		best_score1 = 0
		best_score2 = 0
		best_score3 = 0
		best_score4 = 0
		best_score5 = 0

		best_arm1 = None
		best_arm2 = None
		best_arm3 = None
		best_arm4 = None
		best_arm5 = None

		best_model1 = None
		best_model2 = None
		best_model3 = None
		best_model4 = None
		best_model5 = None

		s=[best_score1,best_score2,best_score3, best_score4, best_score5]
		a=[best_arm1, best_arm2, best_arm3 , best_arm4, best_arm5]
		m=[best_model1 ,best_model2 , best_model3 ,best_model4, best_model5]

		#score_to_model = {}

		for arm, models in self.dict_arm_to_model.items():
			# score = model.predict([state])[0]
			try:
				# randomly sample a model from the list of models
				model = random.choice(models)
				score = model.predict([state])[0]
				#score_to_arm[score] = arm
				# print(score)`
				if score > best_score1 or best_arm1 is None:
					best_score5 = best_score4
					best_score4 = best_score3
					best_score3 = best_score2
					best_score2 = best_score1
					best_score1 = score

					best_arm5= best_arm4
					best_arm4 = best_arm3
					best_arm3= best_arm2
					best_arm2 = best_arm1
					best_arm1 = arm

					best_model5 = best_model4
					best_model4 = best_model3
					best_model3 = best_model2
					best_model2 = best_model1
					best_model1 = model



				elif score > best_score2 and score < best_score1 :
					best_score5 = best_score4
					best_score4 = best_score3
					best_score3 = best_score2
					best_score2 = score


					best_arm5= best_arm4
					best_arm4 = best_arm3
					best_arm3= best_arm2
					best_arm2 = arm


					best_model5 = best_model4
					best_model4 = best_model3
					best_model3 = best_model2
					best_model2 = model

				elif score > best_score4 and score < best_score3:
					best_score5 = best_score4
					best_score4 = score

					best_arm5= best_arm4
					best_arm4 = arm


					best_model5 = best_model4
					best_model4 = model

				elif score > best_score4 and score < best_score5:
					best_score5 = best_score4
					best_score4 = score


					best_arm5= best_arm4
					best_arm4 = arm

					best_model5 = best_model4
					best_model4 = model

				else: #score > best_score5:
					best_score5 = score
					best_arm5= arm
					best_model5 = model


			except Exception as e:
				print('model %s not ready yet' % arm)
				print('----------------------------------')
				# print(repr(e))
				# sys.exit(1)
				return s,a,m
		#print (type(best_arm))
		s=[best_score1,best_score2,best_score3, best_score4, best_score5]
		a=[best_arm1, best_arm2, best_arm3 , best_arm4, best_arm5]
		m=[best_model1 ,best_model2 , best_model3 ,best_model4, best_model5]
		return s,a,m

	def get_action(self, state):
		explore_action=[]
		for i in range(5):
			explore_action.append(random.choice(list_arms))
			
		best_score, best_action, best_model = self.get_best_arm(state)
		self.num_feedback_received += 1
		if self.use_epsilon_decay:
			epsilon = self.get_epsilon()
		else:
			epsilon = self.default_epsilon
		# if (not self.switch_off_epsilon_exploration) and (np.random.random() <= epsilon or best_action is None):
		if np.random.random() <= epsilon or best_action[0] is None:
			action = explore_action
		else:
			# print('chose best action')
			action = best_action
		# print(action)
		#self.last_action = action
		#print('action',self.last_action)
		bs=0
		for i in best_score:
			bs=bs+i
			
			
			
		self.last_state = state
		self.last_model = best_model
		return action, bs


	def update_model(self, reward, last_action = None, last_state = [], update_in_train = False):
		# Immediate Updates for incremental training
		if not last_action:
			last_action = self.last_action
		if not last_state:
			last_state = self.last_state
		self.last_reward = reward

		if self.batch_size==0 and update_in_train == False and last_action!=None:
			self.last_reward = reward
			self.dict_arm_to_model[last_action][0].partial_fit([last_state], [self.last_reward])

		#Batch Updates for Li Hong Matching
		if update_in_train :
			if(self.iter == 0 or self.iter%self.train_batch_size != 0):
				#for i in range(0,self.num_locations):
				if last_action not in self.dict_update_hist.keys():
						self.dict_update_hist[last_action] = []
						self.dict_update_hist[last_action].append([])
						self.dict_update_hist[last_action].append([])
						#Storing Location
						#self.dict_update_hist[last_actions].append(i)
						self.dict_update_hist[last_action][0].append(last_state)
						self.dict_update_hist[last_action][1].append([self.last_reward])
				else:
					self.dict_update_hist[last_action][0].append(last_state)
					self.dict_update_hist[last_action][1].append([self.last_reward])
			else:
				print( "Update Number: ", self.iter)
				for iteration in range(1,7001):
					if(iteration%1000 == 0):
						print ("Iteration Number : ", iteration)
					for offer, update in self.dict_update_hist.items():
						num_to_sample = min(len(update[0]), self.mini_batch)
						random_indices = random.sample(range(0, len(update[0])), num_to_sample)
						x_train = []
						y_train = []
						for index in random_indices:
							x_train.append(update[0][index])
							y_train.append(update[1][index])
						self.dict_arm_to_model[offer][0].batch_fit(x_train, y_train)

						#self.dict_per_location_offer_model[update[2]][offer].batch_fit(x_train, y_train)
			self.iter+=1


	def get_epsilon(self):
		"""Produce epsilon"""
		total = self.num_feedback_received
		return float(self.decay) / (total + float(self.decay))



training_dropout, prediction_dropout, epsilon= 0.5, 0.5, 0.0000001
use_non_stationarity = False

print_params = {}
print_params["training_dropout"] = training_dropout
print_params["prediction_dropout"] = prediction_dropout
print_params["epsilon"] = epsilon

#Change these in the function get_samples_from_experience too
print_params["Sampling"] = 50
print_params["Number of times fit"] = 10

print_params["use_non_stationarity"] = use_non_stationarity

switch_off_epsilon_exploration = True
forest_cover_evaluator = Evaluator('news_user_context_timestamp.csv', 'article_id', print_params, use_non_stationarity)
list_arms = forest_cover_evaluator.get_list_arms()
number_features_in_context = forest_cover_evaluator.get_number_features_in_context()
per_arm_model_policy = PerArmModelPolicy(list_arms, number_features_in_context, training_dropout, prediction_dropout, epsilon,1,
										switch_off_epsilon_exploration,
										False)
forest_cover_evaluator.evaluate(per_arm_model_policy)

input cols :  37
Train Shape :  (89421, 37) (89421, 1)
Valid Shape : (38324, 37) (38324, 1)
Test Shape : (54749, 37) (54749, 1)
40
Instructions for updating:
Colocations handled automatically by placer.
---------------------------------------------------------------------------------------------------------- 
evaluated and frequency reached 1000
accuracy 0.021000
----------------------------------------------------------------------------------------------------------- 
---------------------------------------------------------------------------------------------------------- 
evaluated and frequency reached 2000
accuracy 0.021000
----------------------------------------------------------------------------------------------------------- 
---------------------------------------------------------------------------------------------------------- 
evaluated and frequency reached 3000
accuracy 0.022667
------------------------------------------------------------------------------------------

---------------------------------------------------------------------------------------------------------- 
evaluated and frequency reached 30000
accuracy 0.032900
----------------------------------------------------------------------------------------------------------- 
---------------------------------------------------------------------------------------------------------- 
evaluated and frequency reached 31000
accuracy 0.033290
----------------------------------------------------------------------------------------------------------- 
---------------------------------------------------------------------------------------------------------- 
evaluated and frequency reached 32000
accuracy 0.033188
----------------------------------------------------------------------------------------------------------- 
---------------------------------------------------------------------------------------------------------- 
evaluated and frequency reached 33000
accuracy 0.033182
-----------------