In [1]:
# Question: Create a network that is able to predict the likelihood that a given customer purchases from the website
# given if it is their pay day and if they visited the website?
# base method to create customer information
def decide(customer_id, pay_day, visit_site):
    if pay_day:
        if visit_site:
            return 1
        else:
            return 0
    else:
        return 0    

In [2]:
# appropriate import statements
import random
import pandas as pd
from pybrain.tools.shortcuts import buildNetwork #network
from pybrain.datasets import SupervisedDataSet #dataset
from pybrain.supervised.trainers import BackpropTrainer #training

In [3]:
# randonmly creating dummy data to train the model
random.seed(333)
data = []
for i in range (100):
    customer_id = random.randint(0,100)
    pay_day = random.randint(0,1)
    visit_site = random.randint(0,1)
    decision = decide(customer_id, pay_day, visit_site)
    data.append({'CustomerID': customer_id, 'Pay_Day': pay_day, 'Visited_Site': visit_site, 'Buy_or_Not_Buy': decision})

In [4]:
# convert data into a data frame
df = pd.DataFrame(data)
df.head(20)

Unnamed: 0,CustomerID,Pay_Day,Visited_Site,Buy_or_Not_Buy
0,71,1,1,1
1,27,1,0,0
2,36,1,1,1
3,32,0,1,0
4,71,0,1,0
5,11,1,0,0
6,33,1,1,1
7,76,1,1,1
8,29,1,1,1
9,4,1,0,0


In [5]:
# creating a list of inputs and outputs to feed into our network
inputs = list(zip(df.Pay_Day, df.Visited_Site))
outputs = list(df['Buy_or_Not_Buy'])

In [6]:
# building our network
hidden_layer = len(inputs[0])*2
input_layer = len(inputs[0])

net = buildNetwork(input_layer, hidden_layer, 1, bias=True)

In [7]:
# creating the data set that we will feed into our trainer
ds = SupervisedDataSet(input_layer, 1)

In [8]:
# adding samples to the dataset
for i, j in zip(inputs, outputs):
    ds.addSample(i, j)


In [15]:
#training the network and dataset
trainer = BackpropTrainer(net, ds)

# epochs how many times we will try to fit this dataset ~3000
for epoch in range(3000):
    print(trainer.train()) # prints error, shows how this is learning the data

for i, j in zip(inputs, outputs):
    compute = net.activate(i)
    

1.8013861119422423
0.12245577261168661
0.09877144061305625
0.0857975879562208
0.07457540843152002
0.06503305568219925
0.05768188316479272
0.05107380604653382
0.046083127600811416
0.04143267821596961
0.038631027866705435
0.035894502227014125
0.033502626895905434
0.031128403256599126
0.03028679917869681
0.028948877862449533
0.027865730685943446
0.02714931923628481
0.02633328866618859
0.02571065478247822
0.025109296203764585
0.02472260111976388
0.024215169928832377
0.023824284570658185
0.023500817982407197
0.02261006095306958
0.022850606842950594
0.022632772164770755
0.022490314466313176
0.02217105243545946
0.02185797515602312
0.02170655743747163
0.021545301706566208
0.02120966811999476
0.021041262495637286
0.020775342519252643
0.020575015052298376
0.02000084319426598
0.02029256718332888
0.019798449171393152
0.019800328102450897
0.019530061723131546
0.01943584265522471
0.019108933749402725
0.018503735435832152
0.018760571850302558
0.018641851002182835
0.01827829341970523
0.018273321240879

In [16]:
#creating dummy test dataset
random.seed(420)
testing_data = []
for i in range (20):
    customer_id = random.randint(0,100)
    pay_day = random.randint(0,1)
    visit_site = random.randint(0,1)
    testing_data.append({'CustomerID': customer_id, 'Pay_Day': pay_day, 'Visited_Site': visit_site})

In [17]:
#creating the dataframe of our testing data
testing_df = pd.DataFrame(testing_data)
testing_df['Inputs']=list(zip(testing_df.Pay_Day, testing_df.Visited_Site))
testing_df.head(5)

Unnamed: 0,CustomerID,Pay_Day,Visited_Site,Inputs
0,3,1,1,"(1, 1)"
1,51,0,0,"(0, 0)"
2,3,1,1,"(1, 1)"
3,83,0,0,"(0, 0)"
4,69,1,1,"(1, 1)"


In [18]:
# answering question, computing how likely a customer will buy a product from the website
for i in range(len(testing_df)):
    customer = testing_df['CustomerID'][i]
    compute = net.activate(testing_df['Inputs'][i])
    prop = compute[0] * 100
    print('Customer', customer, ' is ', prop, ' % likely to buy.')

Customer 3  is  99.99999999999764  % likely to buy.
Customer 51  is  -4.218847493575595e-12  % likely to buy.
Customer 3  is  99.99999999999764  % likely to buy.
Customer 83  is  -4.218847493575595e-12  % likely to buy.
Customer 69  is  99.99999999999764  % likely to buy.
Customer 34  is  2.8199664825478976e-12  % likely to buy.
Customer 34  is  -4.218847493575595e-12  % likely to buy.
Customer 33  is  2.8976820942716586e-12  % likely to buy.
Customer 61  is  -4.218847493575595e-12  % likely to buy.
Customer 63  is  2.8199664825478976e-12  % likely to buy.
Customer 78  is  -4.218847493575595e-12  % likely to buy.
Customer 86  is  -4.218847493575595e-12  % likely to buy.
Customer 74  is  2.8199664825478976e-12  % likely to buy.
Customer 17  is  2.8199664825478976e-12  % likely to buy.
Customer 94  is  2.8199664825478976e-12  % likely to buy.
Customer 90  is  -4.218847493575595e-12  % likely to buy.
Customer 87  is  -4.218847493575595e-12  % likely to buy.
Customer 50  is  2.897682094271