In [1]:
import numpy as np
from Logistic_NN import Logistic_NN

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler
from statsmodels.stats.outliers_influence import variance_inflation_factor

In [4]:
mobile_train = pd.read_csv('mobile_train.csv')
mobile_test = pd.read_csv('mobile_test.csv')
mobile_train.head()

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,...,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range
0,842,0,2.2,0,1,0,7,0.6,188,2,...,20,756,2549,9,7,19,0,0,1,1
1,1021,1,0.5,1,0,1,53,0.7,136,3,...,905,1988,2631,17,3,7,1,1,0,2
2,563,1,0.5,1,2,1,41,0.9,145,5,...,1263,1716,2603,11,2,9,1,1,0,2
3,615,1,2.5,0,0,0,10,0.8,131,6,...,1216,1786,2769,16,8,11,1,0,0,2
4,1821,1,1.2,0,13,1,44,0.6,141,2,...,1208,1212,1411,8,2,15,1,1,0,1


In [5]:
mobile_train_vif = mobile_train.drop(['price_range'], axis=1)

def calculate_vif(data_frame):
    features = data_frame.columns
    vif_data = pd.DataFrame()
    vif_data["Feature"] = features
    vif_data["VIF"] = [variance_inflation_factor(data_frame.values, i) for i in range(data_frame.shape[1])]
    return vif_data.sort_values(by='VIF', ascending=False)
    
def drop_high_vif_features(data_frame, threshold=5):
    while True:
        vif_results = calculate_vif(data_frame)
        max_vif_feature = vif_results.loc[vif_results['VIF'].idxmax(), 'Feature']
        max_vif_value = vif_results.loc[vif_results['VIF'].idxmax(), 'VIF']
        
        if max_vif_value > threshold:
            print(f"Dropping feature '{max_vif_feature}' with VIF {max_vif_value}")
            data_frame = data_frame.drop(columns=max_vif_feature)
        else:
            break
    return data_frame
mobile_train_vif = drop_high_vif_features(mobile_train_vif)

Dropping feature 'mobile_wt' with VIF 12.972548425819065
Dropping feature 'px_width' with VIF 11.470014131904488
Dropping feature 'sc_h' with VIF 11.086593845458365
Dropping feature 'battery_power' with VIF 7.543843177190293
Dropping feature 'pc' with VIF 6.050059878559392
Dropping feature 'three_g' with VIF 5.930418164840767


In [6]:
X_vif = mobile_train_vif
y_vif = mobile_train['price_range']
X_train_vif, X_test_vif, y_train_vif, y_test_vif = train_test_split(X_vif, y_vif, test_size=0.2, random_state=42)
## now lets standardize the input data
scaler = StandardScaler()
X_train_vif_scaled = scaler.fit_transform(X_train_vif)
X_test_vif_scaled = scaler.transform(X_test_vif)

In [7]:
# transforming the data into dimensions (n,m) where m denoted the number of examples for ease of computation
X_train_nn, X_test_nn = X_train_vif_scaled.T, X_test_vif_scaled.T
y_train_nn,y_test_nn = y_train_vif.to_numpy().reshape((1,1600)), y_test_vif.to_numpy().reshape((1,400))

In [10]:
model1 = Logistic_NN(4, 0.01, 1000)

In [11]:
model1.fit(X_train_nn,y_train_nn)

cost after 0 iterations: 1.3894752102974326
cost after 200 iterations: 1.1055747841318257
cost after 400 iterations: 0.9809741817405256
cost after 600 iterations: 0.9099985988556191
cost after 800 iterations: 0.8617887646305927


(array([[-0.03813083,  0.01488702, -0.01930316,  0.02473812],
        [ 0.03466046, -0.05259475,  0.02981844, -0.02955453],
        [-0.00518858,  0.01701326, -0.04631842,  0.01080116],
        [-0.05163818,  0.04497545,  0.02316379, -0.01860325],
        [-0.01564286,  0.01907786, -0.04279907,  0.04362951],
        [-0.02735862,  0.01698119, -0.09440751,  0.1072443 ],
        [-0.04597294,  0.07436656, -0.04738763,  0.01695859],
        [ 0.02011611, -0.08680265,  0.05423481,  0.0094136 ],
        [-0.31766383,  0.00503249,  0.04004922,  0.24880695],
        [-1.39215605, -0.47987574,  0.50469495,  1.375659  ],
        [-0.01559693, -0.01966697,  0.0035722 ,  0.0368146 ],
        [-0.05167141,  0.06333881, -0.04432356,  0.02634408],
        [ 0.02618348,  0.02758379, -0.04886565, -0.00941848],
        [-0.01626603,  0.03179406, -0.01036779,  0.00229265]]),
 array([[-0.19559436],
        [ 0.20218899],
        [ 0.19740849],
        [-0.20400312]]))

In [12]:
predictions = model1.predict(X_test_nn)
model1.accuracy(predictions, y_test_nn)

0.7225

In [None]:
class NN_2_layer():

    def __init__(self, num_classes, num_hidden, learning_rate = 0.01, num_iters = 1000):
        self.num_classes = num_classes
        self.num_hidden = num_hidden # number of hidden units in the 1 hidden layer
        self.learning_rate = learning_rate
        self.num_iters = num_iters
        self.params = {}

    def tanh(self, x):
        return np.tanh(x)

    def tanh_derivative(self, x):
        return 1 - np.tanh(x)**2
    
    def relu(self, x):
        return np.maximum(0, x)
    
    def relu_derivative(self, x):
        return np.where(x > 0, 1, 0)
    
    def softmax(self, z):
        exp_z = np.exp(z - np.max(z, axis=0, keepdims=True))  # for numerical stability
        return exp_z / np.sum(exp_z, axis=0, keepdims=True)

    def initialize_parameters(self):
        self.params['w1'] = np.random.randn(self.num_hidden, self.n) * np.sqrt(2 / self.n)  # Xavier initialization
        self.params['b1'] = np.zeros((self.num_hidden, 1))
        self.params['w2'] = np.random.randn(self.num_classes, self.num_hidden) * np.sqrt(2 / self.num_hidden)  # Xavier initialization
        self.params['b2'] = np.zeros((self.num_classes, 1))
        return

    def forward_propagation(self, X, Y):
        Z1 = np.dot(self.params['w1'], X) + self.params['b1']
        A1 = self.relu(Z1)
        Z2 = np.dot(self.params['w2'], A1) + self.params['b2']
        A2 = self.softmax(Z2)
        

        Y_one_hot = np.eye(self.num_classes)[Y].T
        Y_one_hot = Y_one_hot.reshape(A2.shape) # Ensure shapes are compatible
       
        # avoid numerical instability
        epsilon = 1e-15
        A2 = np.maximum(epsilon, A2)
        cost = -np.sum(Y_one_hot * np.log(A2)) / self.m
        
        return A1, A2, cost

    def backward_propagation(self, A1, A2, X, Y):

        # this is a way to calculate the dz for softmax activation since the output is similar to a one hot encoded label
        dz2 = A2.copy()
        dz2[Y, np.arange(self.m)] -= 1
        
        dw2 = 1/self.m * np.dot(dz2, A1.T)
        db2 = 1/self.m * np.sum(dz2, axis=1, keepdims=True)
        
        dz1 = np.dot(self.params['w2'].T, dz2) * self.relu_derivative(A1)
        dw1 = 1/m * np.dot(dz1, X.T)
        db1 = 1/m * np.sum(dz1, axis=1, keepdims=True)
        
        grads = {"dw1": dw1, "db1": db1, "dw2": dw2, "db2": db2}
        
        return grads

    def update_parameters(self, grads):
        self.params['w1'] -= self.learning_rate * grads["dw1"]
        self.params['b1'] -= self.learning_rate * grads["db1"]
        self.params['w2'] -= self.learning_rate * grads["dw2"]
        self.params['b2'] -= self.learning_rate * grads["db2"]
        return

    def fit(self, x, y):

        self.n, self.m = x.shape

        self.initialize_parameters() # just call the function and it stores the initialized parameters in the params attribute

        for i in range(self.num_iters):
            A1, A2, cost = self.forward_propagation(
                self.params['w1'],
                self.params['b1'], 
                self.params['w2'],
                self.params['b2'],
                x, y
            )
            grads = self.backward_propagation(
                A1, A2, 
                self.params['w2'], 
                x, y
            )
            self.update_parameters(grads) # just calling the function will update the parameters stored in the params attribute
    
            if i%1000==0:
                print(f"cost after {i} iterations: {cost}")

    
    def predict(self, w1, b1, w2, b2, X, Y):
        _, A2, _ = self.forward_propagation(w1, b1, w2, b2, X,Y)
        predictions = np.argmax(A2, axis=0)
        return predictions