# COMP0124: Multi-Agent Artificial Intelligence

# Group project: Real-time bidding auctions

**Group #7: Oliviero Balbinetti, Mauricio Caballero, Paul Melkert**

Importing libraries.

In [1]:
import os
import sys
import math
import random
import operator
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from glob import glob
from sklearn.metrics import roc_auc_score
from sklearn.metrics import mean_squared_error

Defining functions.

In [2]:
#Inizialising weight.
def Initialize_weight(Initial_weight):
    return (random.random() - 0.5)*Initial_weight

#Sigmoid function.
def sigmoid_function(value):
    return 1.0/(1.0 + math.exp(-value))

Importing data in pandas DataFrames.

In [3]:
#Defining directory.
Path = '/Users/olivierobalbinetti/Desktop/University College London/Term 2/'\
       'Multi Agents Artificial Intelligence/Courseworks/Group coursework/'\
       'Data/Feature engineering'

#Importing data.
Datasets = {}
os.chdir(Path)
for Filename in glob('*.csv'):
    Datasets[Filename[:-4].title()] = pd.read_csv(Filename, sep = ',')
    
#Defining variables.
Train = Datasets['Train']
Validation = Datasets['Validation']

Click-throught rate estimation.

In [None]:
#Logistic regression model.
random.seed(10)
print('Click-throught rate estimation: logistic regression model.\n')

#Defining directory.
Path = '/Users/olivierobalbinetti/Desktop/University College London/Term 2/'\
       'Multi Agents Artificial Intelligence/Courseworks/Group coursework/'\
       'Data/Predicted CTR'

#Defining general variables.
Eta = 0.01
Epoch = 10
Lambda = 1*10**(-6)
Batch_size = 1000000

Feature_weights = {}
Initial_weight = 0.05

#Looping on epoch.
for epoch in range(0, Epoch):
    Flag = 0
    
    Click = Train['click'].values
    Payprice = Train['payprice'].values
    Feature = Train['feature'].str.split(' ').values
    
    #Looping for training.
    while (Flag*Batch_size < len(Train)):
        Train_click = Click[Flag*Batch_size:(Flag+1)*Batch_size]
        Train_feature = Feature[Flag*Batch_size:(Flag+1)*Batch_size]
        Train_payprice = Payprice[Flag*Batch_size:(Flag+1)*Batch_size]
        
        #Looping on batches.
        for request in range(0, len(Train_feature)):
            prediction = 0.
            
            clk = Train_click[request]
            price = Train_payprice[request]
            feature = [int(x) for x in Train_feature[request]]
            
            for feat in feature:
                if feat not in Feature_weights:
                    Feature_weights[feat] = Initialize_weight(Initial_weight)
                    
                prediction += Feature_weights[feat]
            prediction = sigmoid_function(prediction)
            
            for feat in feature:
                Feature_weights[feat] = Feature_weights[feat]*(1-Lambda)\
                                        + Eta*(clk-prediction)
        Flag += 1
    
    #Looping for testing.
    True_values = []
    Predicted_values = []
    
    Click = Validation['click'].values
    Payprice = Validation['payprice'].values
    Feature = Validation['feature'].str.split(' ')
    
    for request in range(0, len(Feature)):
        prediction = 0.
        
        clk = Click[request]
        price = Payprice[request]
        feature = [int(x) for x in Feature[request]]
        
        for feat in feature:
            if feat in Feature_weights:
                prediction += Feature_weights[feat]
        prediction = sigmoid_function(prediction)
        
        True_values.append(clk)
        Predicted_values.append(prediction)
        
    #Estimating preformance.
    auc = roc_auc_score(True_values, Predicted_values)
    rmse = math.sqrt(mean_squared_error(True_values, Predicted_values))
    print('[Epoch: %d]: auc = %.4f, rmse = %.4f' %(epoch, auc, rmse))
    
#Saving weights.
Filename = 'Logistic regression weights' + '.csv'
Header('feature code,weight')

with open(Filename, 'w') as file:
    file.write(Header + '\n')
    Feature_values = sorted(Feature_weights.items(), key=operator.itemgetter(0))
    
    for item in Feature_values:
        file.write(str(item[0]) + ',' + str(item[1]) + '\n')
        
    print('• [File %s]: Process completed!' %Filename)
        
#Saving predictions.
Filename = 'CTR estimation (Validation)' + '.csv'
Header('bidid,pCTR')

with open(Filename, 'w') as file:
    file.write(Header + '\n')
    
    for request in range(0, len(Validation)):
        prediction = 0.
        feature = [int(x) for x in Feature[request]]
        
        for feat in feature:
            if feat in Feature_weights:
                prediction += Feature_weights[feat]
        prediction = sigmoid_function(prediction)
        
        file.write(Validation['bidid'].iloc[request] + ',' + 
                   str(prediction) + '\n')
          
    print('• [File %s]: Process completed!' %Filename)

Click-throught rate estimation: logistic regression model.

