## Libraries

In [29]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

import pickle

from sklearn.linear_model import LogisticRegression

## Data Preparation

In [30]:
# Load Data
df_purchase = pd.read_csv('dataset/purchase_data.csv')

# Import Scaler
scaler = pickle.load(open('dataset/scaler.pickle', 'rb'))

# Import PCA
pca = pickle.load(open('dataset/pca.pickle', 'rb'))

# Import K-means
kmeans_pca = pickle.load(open('dataset/kmeans_pca.pickle', 'rb'))

# Standardization
features = df_purchase[['Sex','Marital status', 'Age', 'Education', 'Income', 'Occupation', 'Settlement size']]
df_purchase_segm_std = scaler.transform(features)

# Apply PCA 
df_purchase_segm_pca = pca.transform(df_purchase_segm_std)

# Segment data
purchase_segm_kmeans_pca = kmeans_pca.predict(df_purchase_segm_pca)

# Create a copy of the data frame
df_purchase_predictors = df_purchase.copy()

# Add segment label 
df_purchase_predictors['Segment'] = purchase_segm_kmeans_pca
segment_dummies = pd.get_dummies(purchase_segm_kmeans_pca, prefix = 'Segment', prefix_sep = '_')
df_purchase_predictors = pd.concat([df_purchase_predictors, segment_dummies ], axis =1)

df_pa = df_purchase_predictors.copy()

In [None]:
df_pa[df_pa['Segment'] >= 1]

In [None]:
purchase_segm_kmeans_pca

## Purchase Probability Model

In [None]:
Y = df_pa['Incidence']

In [None]:
X = pd.DataFrame()
X['Mean_Price'] = (df_pa['Price_1'] + df_pa['Price_2'] + df_pa['Price_3'] + df_pa['Price_4'] + df_pa['Price_5'])/5

In [None]:
plt.scatter(X, df_pa['Segment_3'])

In [None]:
model_purchase = LogisticRegression(solver = 'sag')
model_purchase.fit(X, Y)

In [None]:
model_purchase.coef_

## Price Elasticity of Purchase Probability

In [None]:
df_pa[['Price_1','Price_2','Price_3','Price_4','Price_5']].describe()

In [None]:
price_range = np.arange(0.5, 3.5, 0.01)
price_range

In [None]:
df_price_range = pd.DataFrame(price_range)

In [None]:
Y_pr = model_purchase.predict_proba(df_price_range)

In [None]:
# Get the probability of purchase
purchase_pr = Y_pr[:][:,1]

plt.plot(purchase_pr);

In [None]:
pe = model_purchase.coef_[:,0] * price_range * (1 - purchase_pr)

In [None]:
plt.plot(pe)

In [None]:
df_price_elasticities = pd.DataFrame(price_range)

In [None]:
df_price_elasticities = df_price_elasticities.rename(columns = {0: 'Price_Point'})
df_price_elasticities['Mean_PE'] = pe
df_price_elasticities.plot()

In [None]:
pd.options.display.max_rows = None 
df_price_elasticities

In [None]:
plt.figure(figsize = (9,6))
plt.plot(price_range, pe, color='grey')
plt.xlabel('Price')
plt.ylabel('Elasticity')
plt.title('Price Elasticity of Purchase')

## Purchase Probability by Segments

### Segment 1 - Career-Focused

In [None]:
#### Get segment 1 data
df_pa_segment_1 = df_pa[df_pa['Segment'] == 1]

## Get the incidence value for segmeent 1
Y = df_pa_segment_1['Incidence']

## Get the price average for segment 1
X = pd.DataFrame()
X['Mean_Price'] = (df_pa_segment_1['Price_1'] + 
                   df_pa_segment_1['Price_2'] + 
                   df_pa_segment_1['Price_5'] + 
                   df_pa_segment_1['Price_4'] +
                   df_pa_segment_1['Price_5']) / 5


In [None]:
## Fit the logistic regression model
model_incidence_segment_1 = LogisticRegression(solver='sag')
model_incidence_segment_1.fit(X, Y)

In [None]:
## Get the coef of the model
model_incidence_segment_1.coef_

In [None]:
# Get the probability of elasticity for segment 1
Y_segment_1 = model_incidence_segment_1.predict_proba(df_price_range)

# Probability of buying
purchase_pr_segment_1 = Y_segment_1[:][:,1]

# probability elasticity of segment 1
pe_segment_1 = model_incidence_segment_1.coef_[:,0]*price_range*(1 - purchase_pr_segment_1)

### Result

In [None]:
df_price_elasticities['PE_Segment_1'] = pe_segment_1
df_price_elasticities[['Mean_PE','PE_Segment_1']].plot()

## Segment 2 - Fewer Opportunities

In [None]:
# select only customers from segment 2
df_pa_segment_2 = df_pa[df_pa['Segment'] == 2]

## Get the incidence value for segmeent 1
Y = df_pa_segment_2['Incidence']

## Get the price average for segment 1
X = pd.DataFrame()
X['Mean_Price'] = (df_pa_segment_2['Price_1'] + 
                   df_pa_segment_2['Price_2'] + 
                   df_pa_segment_2['Price_5'] + 
                   df_pa_segment_2['Price_4'] +
                   df_pa_segment_2['Price_5']) / 5

## Fit the logistic regression model
model_incidence_segment_2 = LogisticRegression(solver='sag')
model_incidence_segment_2.fit(X, Y)

## Get the coef of the model
print(model_incidence_segment_2.coef_)

# Get the probability of elasticity for segment 1
Y_segment_2 = model_incidence_segment_2.predict_proba(df_price_range)

# Probability of buying
purchase_pr_segment_2 = Y_segment_2[:][:,1]

# probability elasticity of segment 1
pe_segment_2 = model_incidence_segment_2.coef_[:,0]*price_range*(1 - purchase_pr_segment_2)


## Segment 3 - Well to do

In [None]:
# select only customers from segment 3
df_pa_segment_3 = df_pa[df_pa['Segment'] == 3]

## Get the incidence value for segmeent 1
Y = df_pa_segment_3['Incidence']

## Get the price average for segment 1
X = pd.DataFrame()
X['Mean_Price'] = (df_pa_segment_3['Price_1'] + 
                   df_pa_segment_3['Price_2'] + 
                   df_pa_segment_3['Price_5'] + 
                   df_pa_segment_3['Price_4'] +
                   df_pa_segment_3['Price_5']) / 5

## Fit the logistic regression model
model_incidence_segment_3 = LogisticRegression(solver='sag')
model_incidence_segment_3.fit(X, Y)

## Get the coef of the model
print(model_incidence_segment_3.coef_)

# Get the probability of elasticity for segment 1
Y_segment_3 = model_incidence_segment_3.predict_proba(df_price_range)

# Probability of buying
purchase_pr_segment_3 = Y_segment_3[:][:,1]

# probability elasticity of segment 1
pe_segment_3 = model_incidence_segment_3.coef_[:,0]*price_range*(1 - purchase_pr_segment_3)


In [None]:
df_price_elasticities['PE_Segment_2'] = pe_segment_2
df_price_elasticities['PE_Segment_3'] = pe_segment_3
df_price_elasticities[['PE_Segment_1','PE_Segment_2', 'PE_Segment_3']].plot(figsize=(10,9))