In [1]:
# Importing necessary libraries
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from ml_from_scratch import LogisticRegression
from evaluation import confusion_matrix

In [5]:
# Create synthetic data for testing
X, y = make_classification(n_samples=1000, n_features=10, n_classes=2, random_state=42)

# Print the first 5 rows of features
print("X (features):")
print(X[:5])

# Print the first 5 labels
print("\ny (labels):")
print(y[:5])

X (features):
[[ 0.96479937 -0.06644898  0.98676805 -0.35807945  0.99726557  1.18189004
  -1.61567885 -1.2101605  -0.62807677  1.22727382]
 [-0.91651053 -0.56639459 -1.00861409  0.83161679 -1.17696211  1.82054391
   1.75237485 -0.98453405  0.36389642  0.20947008]
 [-0.10948373 -0.43277388 -0.4576493   0.79381847 -0.26864575 -1.83635978
   1.23908594 -0.2463834  -1.05814521 -0.29737608]
 [ 1.75041163  2.02360622  1.68815935  0.00679984 -1.60766103  0.18474058
  -2.61942676 -0.35744542 -1.47312719 -0.19003904]
 [-0.22472606 -0.71130323 -0.22077758  0.11712422  1.53606118  0.59753771
   0.34864462 -0.93915557  0.17591477  0.23622365]]

y (labels):
[0 1 1 0 1]


In [6]:
# Splitting the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Print the shapes of training and test data
print("X train shape:", X_train.shape)
print("y train shape:", y_train.shape)
print("X test shape :", X_test.shape)
print("y test shape :", y_test.shape)

X train shape: (800, 10)
y train shape: (800,)
X test shape : (200, 10)
y test shape : (200,)


In [8]:
# Creating an instance of LogisticRegression
clf = LogisticRegression(learning_rate=0.01, num_iterations=1000, tol=1e-5)

# Fitting the model to the training data
clf.fit(X_train, y_train)

# Making predictions on the test data
y_pred = clf.predict(X_test)

# Calculate predicted probabilities on the test data
y_pred_proba = clf.predict_proba(X_test)

# Printing the predicted probabilities
print("Predicted probabilities:\n", y_pred_proba[:5])

# Printing the predicted values
print("Predicted values:\n", y_pred[:5])

Predicted probabilities:
 [0.24576915 0.9432837  0.38067184 0.85584416 0.06634151]
Predicted values:
 [0 1 0 1 0]


In [9]:
# Compute the confusion matrix and evaluation metrics
cm, accuracy, sens, prec, f_score = confusion_matrix(y_test, y_pred)

# Print the results
print("Confusion Matrix:\n", cm)
print("Accuracy:", accuracy)
print("Sensitivity (Recall):", sens)
print("Precision:", prec)
print("F1 Score:", f_score)

Confusion Matrix:
 [[78 11]
 [24 87]]
Accuracy: 0.824999999995875
Sensitivity (Recall): 0.7837837837767226
Precision: 0.8877551020317576
F1 Score: 0.8325358846614317
