In [9]:
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd


df =pd.read_csv('train.csv')

### UNCOMMENT THIS PART TO USE THE FEATURE ENGINEERING
# df['total_call'] = df['total_day_calls'] + df['total_eve_calls'] + df['total_night_calls']

# # Create 'total_charges' feature
# df['total_charges'] = df['total_day_charge'] + df['total_eve_charge'] + df['total_night_charge']

# # Create 'total_minutes' feature
# df['total_minutes'] = df['total_day_minutes'] + df['total_eve_minutes'] + df['total_night_minutes']
# df = df.drop(['total_day_calls', 'total_eve_calls', 'total_night_calls'], axis=1)

# # Delete contributing features for 'total_charges'
# df = df.drop(['total_day_charge', 'total_eve_charge', 'total_night_charge'], axis=1)

# # Delete contributing features for 'total_minutes'
# df = df.drop(['total_day_minutes', 'total_eve_minutes', 'total_night_minutes'], axis=1)


# df.drop(['state', 'area_code', 'account_length'], axis=1, inplace=True)


###################

###ONE HOT ENCODING
df = pd.get_dummies(df, columns=['area_code','state'])


### MOVING THE Y VARIABLE TO THE END
churn = df['churn']
df = df.drop('churn', axis=1)
df['churn'] = churn


data=np.array(df)


data[data=='no']=0
data[data=='yes']=1
data[data==False]=0
data[data==True]=1
print(data[0])
X=data[:,:-1]
y=data[:,-1]



### SPLITTING THE DATA INTO TRAIN, VALIDATION AND TEST SETS

X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42) 

X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42) 

###DATA NORMALIZATION
def normalize(X):
    X = X.astype(float)
    X=(X-X.mean(axis=0))/X.std(axis=0)
    return X
X_train = normalize(X_train)
X_val = normalize(X_val)
X_test = normalize(X_test)

#SMOTE, oversampling the minority class (will read more about this later)
X_train_oversampled_smote = []
labels_train_oversampled_smote = []
indices_0 = np.where(y_train == 0)[0]
indices_1 = np.where(y_train == 1)[0]
indices = np.concatenate([indices_0, indices_1])
for _ in range(X_train.shape[0]):
    p = np.random.random()
    #sample from majority class
    if p < 0.5:
        X_train_oversampled_smote.append(X_train[np.random.choice(indices_0)])
        labels_train_oversampled_smote.append(0)
    #sample from minority class
    else:
        #get two random samples from minority class
        minority_samp_1 = X_train[np.random.choice(indices_1)]
        minority_samp_2 = X_train[np.random.choice(indices_1)]
        
        #get random proportion with which to mix them
        prop = np.random.random()
        
        #generate synthetic sample from minority class
        synthetic_minority_samp = prop*minority_samp_1 + (1-prop)*minority_samp_2
        X_train_oversampled_smote.append(synthetic_minority_samp)
        labels_train_oversampled_smote.append(1)
        
X_train = np.array(X_train_oversampled_smote)
y_train = np.array(labels_train_oversampled_smote)

print(y_train[y_train==0].shape)
print(X_train.shape)

####################

print(y_train)


[107 0 1 26 161.6 123 27.47 195.5 103 16.62 254.4 103 11.45 13.7 3 3.7 1 0
 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
(1492,)
(2975, 71)
[0 1 0 ... 0 0 0]


In [21]:
def reLU(z):
    return max(0,z)

def reLU_derivative(z):
    return 1 if z>0 else 0
def sigmoid(z):
    return 1/(1+np.exp(-z))


def sigmoid_derivative(z):
    return sigmoid(z)*(1-sigmoid(z))

def dense_layer(A_in, W, b):
     units = W.shape[1]
     A_out = np.zeros(units)
     for j in range(units):               
            w = W[:,j]                                    
            z = np.dot(w, A_in) + b[j]         
            A_out[j] = reLU(z)               
     return(A_out)
 
 
layers =[X_train.shape[1], 10, 10, 1]



def initialize_parameters(layers):
    weights = []
    biases = []
    for i in range(1, len(layers)):
        weights.append(np.random.randn(layers[i-1], layers[i]))
        biases.append(np.random.randn(layers[i]))
    return weights, biases

Activation_list=[]
def forward_propagation(X, layers, weights, biases):
    A = X
    for i in range(len(layers)-1):
        A = dense_layer(A, weights[i], biases[i])
        Activation_list.append(A)
    return A


def update_parameters(weights, biases, dW, db, learning_rate):
    for i in range(len(weights)):
        weights[i] = weights[i] - learning_rate*dW[i]
        biases[i] = biases[i] - learning_rate*db[i]
    return weights, biases

def compute_cost(y, y_hat):
    return np.mean((y-y_hat)**2)

def back_propagation(X, y, y_hat, layers, weights, biases):
    dW = []
    db = []
    m = X.shape[0]
    dC_dA = (y_hat - y)
    dC_dZ = dC_dA.dot(reLU_derivative(Activation_list[-1]))
    dZ_dW = dC_dZ.dot(Activation_list[-2])
    dZ_db = dC_dZ
    dW.append(dZ_dW)
    db.append(dZ_db)
    for i in range(len(layers)-2, 0, -1):
        dC_dA = dC_dZ.dot(weights[i].T)
        dC_dZ = dC_dA*reLU_derivative(Activation_list[i-1])
        dZ_dW = dC_dZ.dot(Activation_list[i-1].T)
        dZ_db = dC_dZ
        dW.append(dZ_dW)
        db.append(dZ_db)
    return dW, db
    
    
    
def gradient_descent(X_train, y_train, layers, learning_rate, epochs):
    weights, biases = initialize_parameters(layers)
    for i in range(epochs):
        y_hat=np.zeros(y_train.shape)
        for i in range(X_train.shape[0]):
            y_hat[i] = forward_propagation(X_train[i], layers, weights, biases)
        
        cost = compute_cost(y_train, y_hat)
        print(f'Epoch {i}, Cost: {cost}')
        dW, db = back_propagation(X_train, y_train, y_hat, layers, weights, biases)
        weights, biases = update_parameters(weights, biases, dW, db, learning_rate)
        
    return weights, biases

weights, biases = gradient_descent(X_train, y_train, layers, 0.01, 1000)    
    
def predict(X, layers, weights, biases):
    y_hat = forward_propagation(X, layers, weights, biases)
    return y_hat

y_hat = predict(X_val, layers, weights, biases)
y_hat = np.round(y_hat)
print(y_hat)
 

Epoch 2974, Cost: 24.645447030102986


ValueError: shapes (2975,) and (10,) not aligned: 2975 (dim 0) != 10 (dim 0)

array([-3, -3, -3])