In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler
from statsmodels.stats.outliers_influence import variance_inflation_factor
import tensorflow as tf

In [3]:
mobile_train = pd.read_csv('mobile_train.csv')
mobile_test = pd.read_csv('mobile_test.csv')
mobile_train.head()

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,...,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range
0,842,0,2.2,0,1,0,7,0.6,188,2,...,20,756,2549,9,7,19,0,0,1,1
1,1021,1,0.5,1,0,1,53,0.7,136,3,...,905,1988,2631,17,3,7,1,1,0,2
2,563,1,0.5,1,2,1,41,0.9,145,5,...,1263,1716,2603,11,2,9,1,1,0,2
3,615,1,2.5,0,0,0,10,0.8,131,6,...,1216,1786,2769,16,8,11,1,0,0,2
4,1821,1,1.2,0,13,1,44,0.6,141,2,...,1208,1212,1411,8,2,15,1,1,0,1


In [4]:
mobile_train.describe()

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,...,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range
count,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,...,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0
mean,1238.5185,0.495,1.52225,0.5095,4.3095,0.5215,32.0465,0.50175,140.249,4.5205,...,645.108,1251.5155,2124.213,12.3065,5.767,11.011,0.7615,0.503,0.507,1.5
std,439.418206,0.5001,0.816004,0.500035,4.341444,0.499662,18.145715,0.288416,35.399655,2.287837,...,443.780811,432.199447,1084.732044,4.213245,4.356398,5.463955,0.426273,0.500116,0.500076,1.118314
min,501.0,0.0,0.5,0.0,0.0,0.0,2.0,0.1,80.0,1.0,...,0.0,500.0,256.0,5.0,0.0,2.0,0.0,0.0,0.0,0.0
25%,851.75,0.0,0.7,0.0,1.0,0.0,16.0,0.2,109.0,3.0,...,282.75,874.75,1207.5,9.0,2.0,6.0,1.0,0.0,0.0,0.75
50%,1226.0,0.0,1.5,1.0,3.0,1.0,32.0,0.5,141.0,4.0,...,564.0,1247.0,2146.5,12.0,5.0,11.0,1.0,1.0,1.0,1.5
75%,1615.25,1.0,2.2,1.0,7.0,1.0,48.0,0.8,170.0,7.0,...,947.25,1633.0,3064.5,16.0,9.0,16.0,1.0,1.0,1.0,2.25
max,1998.0,1.0,3.0,1.0,19.0,1.0,64.0,1.0,200.0,8.0,...,1960.0,1998.0,3998.0,19.0,18.0,20.0,1.0,1.0,1.0,3.0


In [5]:
mobile_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2000 entries, 0 to 1999
Data columns (total 21 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   battery_power  2000 non-null   int64  
 1   blue           2000 non-null   int64  
 2   clock_speed    2000 non-null   float64
 3   dual_sim       2000 non-null   int64  
 4   fc             2000 non-null   int64  
 5   four_g         2000 non-null   int64  
 6   int_memory     2000 non-null   int64  
 7   m_dep          2000 non-null   float64
 8   mobile_wt      2000 non-null   int64  
 9   n_cores        2000 non-null   int64  
 10  pc             2000 non-null   int64  
 11  px_height      2000 non-null   int64  
 12  px_width       2000 non-null   int64  
 13  ram            2000 non-null   int64  
 14  sc_h           2000 non-null   int64  
 15  sc_w           2000 non-null   int64  
 16  talk_time      2000 non-null   int64  
 17  three_g        2000 non-null   int64  
 18  touch_sc

Drawing parallels from the previous notebook we will be removing the correlated input variables using VIF

In [7]:
mobile_train_vif = mobile_train.drop(['price_range'], axis=1)

def calculate_vif(data_frame):
    features = data_frame.columns
    vif_data = pd.DataFrame()
    vif_data["Feature"] = features
    vif_data["VIF"] = [variance_inflation_factor(data_frame.values, i) for i in range(data_frame.shape[1])]
    return vif_data.sort_values(by='VIF', ascending=False)
    
def drop_high_vif_features(data_frame, threshold=5):
    while True:
        vif_results = calculate_vif(data_frame)
        max_vif_feature = vif_results.loc[vif_results['VIF'].idxmax(), 'Feature']
        max_vif_value = vif_results.loc[vif_results['VIF'].idxmax(), 'VIF']
        
        if max_vif_value > threshold:
            print(f"Dropping feature '{max_vif_feature}' with VIF {max_vif_value}")
            data_frame = data_frame.drop(columns=max_vif_feature)
        else:
            break
    return data_frame
mobile_train_vif = drop_high_vif_features(mobile_train_vif)

Dropping feature 'mobile_wt' with VIF 12.972548425819065
Dropping feature 'px_width' with VIF 11.470014131904488
Dropping feature 'sc_h' with VIF 11.086593845458365
Dropping feature 'battery_power' with VIF 7.543843177190293
Dropping feature 'pc' with VIF 6.050059878559392
Dropping feature 'three_g' with VIF 5.930418164840767


In [8]:
X_vif = mobile_train_vif
y_vif = mobile_train['price_range']
X_train_vif, X_test_vif, y_train_vif, y_test_vif = train_test_split(X_vif, y_vif, test_size=0.2, random_state=42)
## now lets standardize the input data
scaler = StandardScaler()
X_train_vif_scaled = scaler.fit_transform(X_train_vif)
X_test_vif_scaled = scaler.transform(X_test_vif)

### Logistic Regression

We will predict results using a base case logistic regression model from sk learn library and use it for benchmarking as we move forward

In [9]:
model_vif = LogisticRegression()
model_vif.fit(X_train_vif_scaled, y_train_vif)

# Make predictions
y_pred_vif = model_vif.predict(X_test_vif_scaled)

# Check accuracy of model
accuracy_lr = accuracy_score(y_test_vif, y_pred_vif)
conf_matrix_lr = confusion_matrix(y_test_vif, y_pred_vif)
classification_rep_lr = classification_report(y_test_vif, y_pred_vif)

print(f"Accuracy: {accuracy_lr}")
print(f"Confusion Matrix:\n{conf_matrix_lr}")
print(f"Classification Report:\n{classification_rep_lr}")

Accuracy: 0.79
Confusion Matrix:
[[93 12  0  0]
 [12 61 18  0]
 [ 0 10 68 14]
 [ 0  0 18 94]]
Classification Report:
              precision    recall  f1-score   support

           0       0.89      0.89      0.89       105
           1       0.73      0.67      0.70        91
           2       0.65      0.74      0.69        92
           3       0.87      0.84      0.85       112

    accuracy                           0.79       400
   macro avg       0.79      0.78      0.78       400
weighted avg       0.79      0.79      0.79       400



### Logistic Regression - Neural Network implementation

In [11]:
# transforming the data into dimensions (n,m) where m denoted the number of examples for ease of computation
X_train_nn, X_test_nn = X_train_vif_scaled.T, X_test_vif_scaled.T
y_train_nn,y_test_nn = y_train_vif.to_numpy().reshape((1,1600)), y_test_vif.to_numpy().reshape((1,400))

In [12]:
def softmax(z):
    exp_z = np.exp(z - np.max(z, axis=0, keepdims=True))  # for numerical stability
    return exp_z / np.sum(exp_z, axis=0, keepdims=True)

def initialize_parameters(n, num_classes):
    w = np.random.randn(n, num_classes) * 0.01
    b = np.zeros((num_classes, 1))
    return w, b

def propagate(w, b, X, Y):
    m = X.shape[1]
    
    # Forward propagation
    Z = np.dot(w.T, X) + b
    A = softmax(Z)
    cost = -1/m * np.sum(np.log(A[Y, np.arange(m)]))
    
    # Backward propagation
    dz = A.copy()
    dz[Y, np.arange(m)] -= 1
    dw = 1/m * np.dot(X, dz.T)
    db = 1/m * np.sum(dz, axis=1, keepdims=True)
    
    grads = {"dw": dw, "db": db}
    
    return grads, cost

def optimize(w, b, X, Y, num_iterations, learning_rate):
    for i in range(num_iterations):
        grads, cost = propagate(w, b, X, Y)
        
        # Update parameters
        w -= learning_rate * grads["dw"]
        b -= learning_rate * grads["db"]
    
    return w, b

def predict(w, b, X):
    Z = np.dot(w.T, X) + b
    A = softmax(Z)
    predictions = np.argmax(A, axis=0)
    return predictions

# we will try to encompass all of the above into one function called lr_nn_model

def lr_nn_model(X_train, Y_train, num_classes, num_iterations=1000, learning_rate=0.01):
    w, b = initialize_parameters(X_train.shape[0], num_classes)
    w, b = optimize(w, b, X_train, Y_train, num_iterations, learning_rate)
    return w, b
    
def accuracy(predictions, actual_labels):
    correct_predictions = np.sum(predictions == actual_labels)
    total_examples = len(actual_labels[0])
    acc = correct_predictions / total_examples
    return acc

In [15]:
w, b = lr_nn_model(X_train_nn,y_train_nn, 4)
test_predictions = predict(w, b, X_test_nn)
accuracy_scores_test = accuracy(test_predictions, y_test_nn)

cm = confusion_matrix(np.squeeze(y_test_nn), np.squeeze(test_predictions))
class_metrics = classification_report(np.squeeze(y_test_nn), np.squeeze(test_predictions))

print(f"Accuracy: {accuracy_scores_test}")
print("Confusion Matrix:")
print(cm)

print("\nClassification Report:")
print(class_metrics)

Accuracy: 0.7225
Confusion Matrix:
[[105   0   0   0]
 [ 33  40  17   1]
 [  0  10  39  43]
 [  0   0   7 105]]

Classification Report:
              precision    recall  f1-score   support

           0       0.76      1.00      0.86       105
           1       0.80      0.44      0.57        91
           2       0.62      0.42      0.50        92
           3       0.70      0.94      0.80       112

    accuracy                           0.72       400
   macro avg       0.72      0.70      0.68       400
weighted avg       0.72      0.72      0.70       400



### Neural Network with 1 Hidden layer

Now lets build further on the neural network implementation of logistic regression and add one hidden layer to the neural network

In [35]:
def tanh(x):
    return np.tanh(x)

def tanh_derivative(x):
    return 1 - np.tanh(x)**2

def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return np.where(x > 0, 1, 0)

def softmax(z):
    exp_z = np.exp(z - np.max(z, axis=0, keepdims=True))  # for numerical stability
    return exp_z / np.sum(exp_z, axis=0, keepdims=True)

def initialize_parameters(n, n_hidden, num_classes):
    w1 = np.random.randn(n_hidden, n) * np.sqrt(1 / n)  # Xavier initialization
    b1 = np.zeros((n_hidden, 1))
    w2 = np.random.randn(num_classes, n_hidden) * np.sqrt(1 / n_hidden)  # Xavier initialization
    b2 = np.zeros((num_classes, 1))
    return w1, b1, w2, b2

def forward_propagation(w1, b1, w2, b2, X):
    Z1 = np.dot(w1, X) + b1
    A1 = tanh(Z1)
    Z2 = np.dot(w2, A1) + b2
    A2 = softmax(Z2)
    return A1, A2

def backward_propagation(A1, A2, w2, X, Y):
    m = X.shape[1]
    
    # dz2 = A2 - Y
    dz2 = A2.copy()
    dz2[Y, np.arange(m)] -= 1
    dw2 = 1/m * np.dot(dz2, A1.T)
    db2 = 1/m * np.sum(dz2, axis=1, keepdims=True)
    
    dz1 = np.dot(w2.T, dz2) * tanh_derivative(A1)
    dw1 = 1/m * np.dot(dz1, X.T)
    db1 = 1/m * np.sum(dz1, axis=1, keepdims=True)
    
    grads = {"dw1": dw1, "db1": db1, "dw2": dw2, "db2": db2}
    
    return grads

def update_parameters(w1, b1, w2, b2, grads, learning_rate):
    w1 -= learning_rate * grads["dw1"]
    b1 -= learning_rate * grads["db1"]
    w2 -= learning_rate * grads["dw2"]
    b2 -= learning_rate * grads["db2"]
    return w1, b1, w2, b2

def predict(w1, b1, w2, b2, X):
    _, A2 = forward_propagation(w1, b1, w2, b2, X)
    predictions = np.argmax(A2, axis=0)
    return predictions

def nn_1_layer_model(X_train, Y_train, X_test, Y_test, num_classes, n_hidden, num_iterations=1000, learning_rate=0.01):
    # Initialize parameters
    w1, b1, w2, b2 = initialize_parameters(X_train.shape[0], n_hidden, num_classes)

    # Train the neural network
    for i in range(num_iterations):
        A1, A2 = forward_propagation(w1, b1, w2, b2, X_train)
        grads = backward_propagation(A1, A2, w2, X_train, Y_train)
        w1, b1, w2, b2 = update_parameters(w1, b1, w2, b2, grads, learning_rate)

    # Make predictions on the training set
    train_predictions = predict(w1, b1, w2, b2, X_train)

    # Make predictions on the test set
    test_predictions = predict(w1, b1, w2, b2, X_test)

    # Evaluate the model on the training set
    print("Training Set Evaluation:")
    print("Confusion Matrix:")
    print(confusion_matrix(np.squeeze(Y_train), np.squeeze(train_predictions)))

    print("\nClassification Report:")
    print(classification_report(np.squeeze(Y_train), np.squeeze(train_predictions)))

    # Evaluate the model on the test set
    print("\nTest Set Evaluation:")
    print("Confusion Matrix:")
    print(confusion_matrix(np.squeeze(Y_test), np.squeeze(test_predictions)))

    print("\nClassification Report:")
    print(classification_report(np.squeeze(Y_test), np.squeeze(test_predictions)))

In [36]:
nn_1_layer_model(X_train_nn, y_train_nn, X_test_nn, y_test_nn, num_classes=4, n_hidden=4, num_iterations=10000, learning_rate=0.001)

Training Set Evaluation:
Confusion Matrix:
[[386   9   0   0]
 [193 148  50  18]
 [ 13  46 183 166]
 [  0   0  68 320]]

Classification Report:
              precision    recall  f1-score   support

           0       0.65      0.98      0.78       395
           1       0.73      0.36      0.48       409
           2       0.61      0.45      0.52       408
           3       0.63      0.82      0.72       388

    accuracy                           0.65      1600
   macro avg       0.66      0.65      0.62      1600
weighted avg       0.66      0.65      0.62      1600


Test Set Evaluation:
Confusion Matrix:
[[102   3   0   0]
 [ 39  32  18   2]
 [  1   8  51  32]
 [  0   0  19  93]]

Classification Report:
              precision    recall  f1-score   support

           0       0.72      0.97      0.83       105
           1       0.74      0.35      0.48        91
           2       0.58      0.55      0.57        92
           3       0.73      0.83      0.78       112

    accu