In [1]:
import numpy as np
import pickle
from sklearn.preprocessing import StandardScaler
import pandas as pd
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score, roc_auc_score, roc_curve


In [2]:
# Load the pickled lr object
with open('log_reg_model.pkl', 'rb') as lr_file:
    scaler, lr = pickle.load(lr_file)

In [3]:
# Creating new unknown data for input variables, using numpy. 
#The variables are in the range of min and max of these variables
#generating 154 random values, to match the Test cases 



unknown_data = {
    
    'Pregnancies': np.random.randint(1, 17, size=154),
    'Glucose': np.random.randint(44, 199, size=154),
    'BloodPressure': np.random.randint(24, 122, size=154),
    'SkinThickness': np.random.randint(7, 99, size=154),
    'Insulin': np.random.randint(18, 846, size=154),
    'BMI': np.random.randint(18, 68, size=154),
    'DiabetesPedigreeFunction': np.random.randint(0.078, 2.42, size=154),
    'Age': np.random.randint(21, 81, size=154)
    }



In [4]:
#converting the dictionar into df
unknown_df = pd.DataFrame(unknown_data)
unknown_df.describe()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
count,154.0,154.0,154.0,154.0,154.0,154.0,154.0,154.0
mean,7.461039,116.24026,74.402597,52.428571,446.948052,43.149351,0.474026,50.785714
std,4.592027,46.746858,27.705623,27.52868,243.124401,14.406236,0.500954,17.443453
min,1.0,44.0,24.0,7.0,26.0,18.0,0.0,21.0
25%,3.0,76.0,49.25,28.5,205.5,31.25,0.0,35.0
50%,7.5,111.0,76.5,50.0,487.0,43.5,0.0,50.5
75%,11.0,159.75,98.0,75.75,649.5,57.0,1.0,66.0
max,16.0,198.0,120.0,98.0,839.0,67.0,1.0,80.0


In [5]:
#scaling the data according to similar scaling done in main file 

# Use the scaler to transform new data
new_scaled_data = scaler.transform(unknown_df)

In [6]:
# Make predictions using the logistic regression model
predictions = lr.predict(new_scaled_data)


In [7]:
#opening the Y_test cases for evaluation of model
with open('y_test.pkl', 'rb') as file:
    y_test = pickle.load(file)

In [8]:
# Evaluate the model
accuracy = accuracy_score(y_test, predictions)
precision = precision_score(y_test, predictions)
recall = recall_score(y_test, predictions)
f1 = f1_score(y_test, predictions)
roc_auc = roc_auc_score(y_test, predictions)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("ROC AUC Score:", roc_auc)

Accuracy: 0.487012987012987
Precision: 0.35365853658536583
Recall: 0.5272727272727272
F1 Score: 0.42335766423357657
ROC AUC Score: 0.49595959595959593
