In [1]:
import numpy as np
import pandas as pd
dataset = pd.read_excel("p1-customers.xlsx")
dataset.head()

Unnamed: 0,Name,Customer_Segment,Customer_ID,Address,City,State,ZIP,#_Years_as_Customer,Store_Number,Responded_to_Last_Catalog,Avg_Num_Products_Purchased,Avg_Sale_Amount
0,Pamela Wright,Store Mailing List,2,376 S Jasmine St,Denver,CO,80224,6,100,No,1,227.9
1,Danell Valdez,Store Mailing List,7,12066 E Lake Cir,Greenwood Village,CO,80111,6,105,Yes,1,55.0
2,Jessica Rinehart,Store Mailing List,8,7225 S Gaylord St,Centennial,CO,80122,3,101,No,1,212.57
3,Nancy Clark,Store Mailing List,9,4497 Cornish Way,Denver,CO,80239,6,105,Yes,1,195.31
4,Andrea Brun,Store Mailing List,10,2316 E 5th Ave,Denver,CO,80206,2,100,Yes,1,110.55


In [2]:
dataset = dataset.drop(["Name", "Customer_ID", "Address", "State", "#_Years_as_Customer", "City", "ZIP", "Store_Number", "Responded_to_Last_Catalog"], axis=1)
dataset.head()

Unnamed: 0,Customer_Segment,Avg_Num_Products_Purchased,Avg_Sale_Amount
0,Store Mailing List,1,227.9
1,Store Mailing List,1,55.0
2,Store Mailing List,1,212.57
3,Store Mailing List,1,195.31
4,Store Mailing List,1,110.55


In [3]:
dataset_dv = pd.get_dummies(dataset, columns = ["Customer_Segment"], drop_first=True)
dataset_dv.head()

Unnamed: 0,Avg_Num_Products_Purchased,Avg_Sale_Amount,Customer_Segment_Loyalty Club Only,Customer_Segment_Loyalty Club and Credit Card,Customer_Segment_Store Mailing List
0,1,227.9,0,0,1
1,1,55.0,0,0,1
2,1,212.57,0,0,1
3,1,195.31,0,0,1
4,1,110.55,0,0,1


In [4]:
corr_mat = dataset_dv.corr()
features = corr_mat["Avg_Sale_Amount"].sort_values(ascending = False)
features

Avg_Sale_Amount                                  1.000000
Avg_Num_Products_Purchased                       0.855754
Customer_Segment_Loyalty Club and Credit Card    0.591488
Customer_Segment_Loyalty Club Only              -0.005746
Customer_Segment_Store Mailing List             -0.666655
Name: Avg_Sale_Amount, dtype: float64

In [5]:
from sklearn.model_selection import train_test_split
train, test = train_test_split(dataset_dv, test_size = 0.3, random_state = 0) 

In [6]:
train_targets = train.loc[:,"Avg_Sale_Amount"].values
train = train.drop("Avg_Sale_Amount", axis=1)
train_data = (train.iloc[:,:].values).astype('float64')

test_targets = test.loc[:,"Avg_Sale_Amount"].values
test = test.drop("Avg_Sale_Amount", axis=1)
test_data = (test.iloc[:,:].values).astype('float64')

In [7]:
train_targets

array([ 657.55, 2455.9 ,   16.25, ...,  235.44,   13.66,  548.84])

In [8]:
train_data

array([[ 5.,  0.,  0.,  0.],
       [16.,  0.,  1.,  0.],
       [ 1.,  0.,  0.,  1.],
       ...,
       [ 1.,  0.,  0.,  1.],
       [ 1.,  0.,  0.,  1.],
       [ 5.,  1.,  0.,  0.]])

In [12]:
test_targets

array([ 172.71,   80.25,  138.49, 1492.45,  112.99,  176.09,  738.73,
         89.53,   14.86,  153.96,  309.87, 1049.69,  423.21,  322.03,
        192.43,  985.71,  187.17,  217.01,   54.  ,  333.97,  374.67,
         34.72,   55.08,  828.47,  194.25,  145.72,  643.66,  992.74,
         46.65,  181.21,  106.92,  649.31,  477.18,  105.24,  335.13,
        681.82,  184.44,  193.81,  260.01,  281.13,  653.9 ,  623.49,
        654.58,  248.98,  914.58,  181.92,  541.29,    6.5 ,  455.78,
        244.86, 1058.91,  854.87,  381.99,  451.28,  192.04,  891.75,
        261.84,  153.26,  202.77,  421.91,  311.84,  249.44,  290.25,
        285.32,   46.89,  232.17, 1472.38,  252.94,   75.24,  984.13,
        817.61,  775.66,  123.12,  374.83,   81.57,  235.54,  182.97,
        803.54,  231.88,   66.88,  206.53,  211.1 ,  264.84,  551.53,
        540.68,  629.32,  814.24,  160.13,    8.5 ,   88.43,  161.85,
        209.08,  347.54,  359.18,  378.31,  406.86,  209.3 ,  225.38,
        190.1 ,  124

In [10]:
test_data

array([[1., 0., 0., 1.],
       [1., 0., 0., 1.],
       [1., 0., 0., 1.],
       ...,
       [3., 0., 0., 1.],
       [2., 0., 0., 1.],
       [1., 0., 0., 1.]])

In [13]:
# Normalizing the data
mean = train_data.mean(axis=0)
train_data -= mean
std = train_data.std(axis=0)
train_data /= std

test_data -= mean
test_data /= std

In [14]:
from keras import models
from keras import layers

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [15]:
def build_model():
    model = models.Sequential()
    model.add(layers.Dense(16, activation='relu', input_shape = (train_data.shape[1],)))
    model.add(layers.Dense(16, activation='relu'))
    model.add(layers.Dense(1))
    
    model.compile(optimizer='rmsprop', 
                  loss = 'mse',
                  metrics = ['mae'])
    return model

In [16]:
model = build_model()
model.fit(train_data, train_targets, epochs=750, batch_size=16, verbose=0)

<keras.callbacks.History at 0x7fca62156e10>

In [17]:
test_mse_score, test_mae_score = model.evaluate(test_data, test_targets)



In [18]:
test_mse_score

12413.59487756673

In [19]:
test_mae_score

77.85959301021624

In [20]:
y_pred = model.predict(test_data)

In [21]:
from sklearn.metrics import r2_score, mean_squared_error
r2 = r2_score(test_targets, y_pred)
mse = mean_squared_error(test_targets, y_pred)

In [22]:
r2, mse

(0.8752595942385236, 12413.595096532956)

In [23]:
new_customers = pd.read_excel("p1-mailinglist.xlsx")
scores = new_customers.loc[:, ["Score_Yes"]].values

In [24]:
new_customers = new_customers.drop(["Name", "Customer_ID", "Address", "State", "#_Years_as_Customer","City", "ZIP", "Store_Number", "Score_No", "Score_Yes"], axis=1)
new_customers = pd.get_dummies(new_customers, columns = ["Customer_Segment"], drop_first=True)
new_customers_pred = new_customers.loc[:,:].values

In [25]:
predictions = model.predict(new_customers_pred)

In [26]:
predicted_profit = (np.sum(np.multiply(scores,predictions))/2)-(6.5*250)

#predicted_profit = (np.dot(scores,predictions)/2)-(6.5*250)

print("The predicted profit from sending out the catalogs to the new customer is: %.2f" % predicted_profit)

The predicted profit from sending out the catalogs to the new customer is: 45648.59
