In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
filepath = '/content/drive/My Drive/Colab Notebooks/Datasets/diabetes.csv'

In [None]:
#import the libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [None]:
# Load the dataset
data=pd.read_csv(filepath)
column_names = ["Pregnancies", "Glucose", "BloodPressure", "SkinThickness", "Insulin", "BMI", "DiabetesPedigreeFunction", "Age", "Outcome"]
data = pd.read_csv(filepath, names=column_names, header=None)

# Display the first few rows
data.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [None]:
# Prepare the data
X = data.drop("Outcome", axis=1)
y = data["Outcome"]

In [None]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [None]:
# Create and train the logistic regression model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

In [None]:
# Compute the confusion matrix, accuracy, and classification report
cm = confusion_matrix(y_test, y_pred)
acc = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

# Print the results
print("Confusion Matrix:")
print(cm)
print("\nAccuracy:", acc)
print("\nClassification Report:")
print(report)

Confusion Matrix:
[[120  31]
 [ 30  50]]

Accuracy: 0.7359307359307359

Classification Report:
              precision    recall  f1-score   support

           0       0.80      0.79      0.80       151
           1       0.62      0.62      0.62        80

    accuracy                           0.74       231
   macro avg       0.71      0.71      0.71       231
weighted avg       0.74      0.74      0.74       231



In [None]:
# Ask the user to provide input values for each feature
print("Please enter the values for each feature:")
pregnancies = int(input("Pregnancies: "))
glucose = float(input("Glucose: "))
blood_pressure = float(input("Blood Pressure: "))
skin_thickness = float(input("Skin Thickness: "))
insulin = float(input("Insulin: "))
bmi = float(input("BMI: "))
diabetes_pedigree_function = float(input("Diabetes Pedigree Function: "))
age = int(input("Age: "))

# Create a new sample with the input values
new_sample = np.array([[pregnancies, glucose, blood_pressure, skin_thickness, insulin, bmi, diabetes_pedigree_function, age]])

# Scale the new sample using the same scaler used for the training data
new_sample_scaled = scaler.transform(new_sample)

# Make a prediction for the new sample
new_prediction = model.predict(new_sample_scaled)

# Display the prediction
if new_prediction == 1:
    print("The model predicts diabetes.")
else:
    print("The model predicts no diabetes.")


Please enter the values for each feature:
Pregnancies: 0
Glucose: 200
Blood Pressure: 70
Skin Thickness: 40
Insulin: 150
BMI: 40
Diabetes Pedigree Function: .6
Age: 27
The model predicts diabetes.




In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

# Load the data2set
data2=pd.read_csv(filepath)
column_names = ["Pregnancies", "Glucose", "BloodPressure", "SkinThickness", "Insulin", "BMI", "DiabetesPedigreeFunction", "Age", "Outcome"]
data2 = pd.read_csv(filepath, names=column_names, header=None)

# Prepare the data2
X = data2.drop("Outcome", axis=1)
y = data2["Outcome"]

# Split the data2set into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Perform feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Set up the logistic regression model with GridSearchCV for hyperparameter tuning
params = {"C": [0.001, 0.01, 0.1, 1, 10, 100],
          "solver": ["newton-cg", "lbfgs", "liblinear", "sag", "saga"]}

model = GridSearchCV(LogisticRegression(max_iter=1000), params, cv=5, scoring="accuracy")
model.fit(X_train_scaled, y_train)

# Print the best hyperparameters
print("Best hyperparameters:", model.best_params_)

# Make predictions on the test set
y_pred = model.predict(X_test_scaled)

# Compute the confusion matrix, accuracy, and classification report
cm = confusion_matrix(y_test, y_pred)
acc = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

# Print the results
print("\nConfusion Matrix:")
print(cm)
print("\nAccuracy:", acc)
print("\nClassification Report:")
print(report)


Best hyperparameters: {'C': 0.1, 'solver': 'newton-cg'}

Confusion Matrix:
[[123  28]
 [ 32  48]]

Accuracy: 0.7402597402597403

Classification Report:
              precision    recall  f1-score   support

           0       0.79      0.81      0.80       151
           1       0.63      0.60      0.62        80

    accuracy                           0.74       231
   macro avg       0.71      0.71      0.71       231
weighted avg       0.74      0.74      0.74       231

