In [18]:
%matplotlib inline

import numpy as np

from sklearn import datasets
from sklearn import cross_validation
from sklearn import preprocessing


In [19]:
import pandas as pd

In [20]:
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.noise import GaussianNoise
from keras.layers.normalization import BatchNormalization
from keras.layers.advanced_activations import PReLU
from keras.utils import np_utils

In [21]:
columns = [
    'state',
    'account length', 
    'area code', 
    'phone number', 
    'international plan', 
    'voice mail plan', 
    'number vmail messages',
    'total day minutes',
    'total day calls',
    'total day charge',
    'total eve minutes',
    'total eve calls',
    'total eve charge',
    'total night minutes',
    'total night calls',
    'total night charge',
    'total intl minutes',
    'total intl calls',
    'total intl charge',
    'number customer service calls',
    'churn']

df = pd.read_csv('churn.data.txt', header=None, names=columns)
mapping = {'no': 0., 'yes':1., 'False.':0., 'True.':1.}
df.replace({'international plan' : mapping, 'voice mail plan' : mapping, 'churn':mapping}, regex=True, inplace=True)

df.drop('phone number', axis=1, inplace=True)
df.drop('area code', axis=1, inplace=True)
df.drop('state', axis=1, inplace=True)

print("Dataset shape" + str(df.shape))


Dataset shape(3333, 18)


In [37]:
df['churn'].value_counts()

1.0    483
0.0    400
Name: churn, dtype: int64

In [22]:
#balancing the dataset
d_1 = df[df['churn']==1] #churned users
d_2 = df[df['churn']==0] #loyal users

df = d_1.append(d_2[:400])

In [23]:
# split train - test 90% 10%
X = df.loc[:, df.columns != 'churn']
Y = df['churn']

X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=0.1, random_state=0)

In [24]:
#churn or not churn
nb_classes = 1 

X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

#scale the inputs for NN
scaler = preprocessing.MinMaxScaler((-1,1))
scaler.fit(X)

XX_train = scaler.transform(X_train.values)
XX_test  = scaler.transform(X_test.values) #changing the shape of the distribution

YY_train = Y_train.values 
YY_test  = Y_test.values 

In [25]:
print (X_train.shape, YY_train.shape)
print (X_test.shape, YY_test.shape)

(794, 17) (794,)
(89, 17) (89,)


### Building the model

In [26]:
# 10 epochs, gradient batched each 100 samples

batch_size = 100


### A few Defination


Sequential -  linear stack of layers.

specifying the input shape
Activations - Before training a model you need to configure the learning process using compile 
when choosing an optimizer consider the depth of your network and how quickly you get your results

Optimizers - Optimisation functions usually calculate the gradient i.e. the partial derivative of loss function 
with respect to weights, and the weights are modified in the opposite direction of the calculated gradient.
This cycle is repeated until we reach the minima of loss function.
a non liner layer employed to the summation of the linear layer in this case we use sigmoid to 
restric the results between 0 and 1, others Relu, tahn, leaky Relu

Relu- non-linear function. It gives an output x if x is positive and 0 otherwise.

Loss - loss function is how you are penalizing you output, the magnitude of error your model made on the output so
that you can adjust your weights e.g mean squared error (L2), mean absolute error(L1), cross entrophy done
using backprop

BatchNormalization - normalise the inputs of each layer in such a way that they have a mean output activation of zero 
and standard deviation of one. speed up learning. Deals with change in distribution 

Dropout -refers to dropping out units (both hidden and visible) in a neural network. Not considered during a particular forward or 
backward pass. prevents overfiting.






In [53]:
# For a single-input model with 2 classes (binary classification):
model = Sequential()
model.add(Dense(32, activation='relu', input_shape=(17,)))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['accuracy'])


# model = Sequential()

# model.add(Dense(50, input_shape=(17,)))
# model.add(Activation("hard_sigmoid"))
# model.add(BatchNormalization())
# model.add(Dropout(0.1))

# model.add(Dense(10))
# model.add(Activation("hard_sigmoid"))
# model.add(BatchNormalization())
# model.add(Dropout(0.1))

# model.add(Dense(1))
# model.add(Activation('sigmoid'))

# model.compile(loss='binary_crossentropy', optimizer='adam')

# model.fit(XX_test, YY_test, epochs=10)


In [54]:
score = model.evaluate(XX_test, YY_test) #ealuating the models accuracy or loss,



In [55]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_14 (Dense)             (None, 32)                576       
_________________________________________________________________
dense_15 (Dense)             (None, 1)                 33        
Total params: 609
Trainable params: 609
Non-trainable params: 0
_________________________________________________________________


In [56]:
#This can be improved
print (score) #ealuating the models accuracy and loss

#59

[0.7872681798559896, 0.5168539342585574]


In [52]:
from keras.utils.vis_utils import plot_model
plot_model(model, to_file='model_plot2.png', show_shapes=True, show_layer_names=True)

### visualizing the model
