<a href="https://colab.research.google.com/github/jorden17/Prediction_Sysytem/blob/main/diabtes_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Importing the Dependencies
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib

In [2]:
# Load the Dataset
# Replace this with your actual dataset
from sklearn.datasets import load_diabetes
data = load_diabetes(as_frame=True)
X = data.data
y = (data.target > data.target.mean()).astype(int)  # Binary classification: 1 if above mean, else 0


In [3]:
# Splitting the Data into Training and Testing Sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


In [4]:
# Feature Scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [5]:
# Hyperparameter Tuning for SVC
param_grid = {
    'C': [0.1, 1, 10, 100],
    'kernel': ['linear', 'rbf', 'poly'],
    'gamma': ['scale', 'auto']
}

In [6]:
svc = SVC(probability=True, random_state=42)
grid_search = GridSearchCV(svc, param_grid, cv=5, scoring='accuracy', verbose=2)
grid_search.fit(X_train, y_train)


Fitting 5 folds for each of 24 candidates, totalling 120 fits
[CV] END ..................C=0.1, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ..................C=0.1, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ..................C=0.1, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ..................C=0.1, gamma=scale, kernel=linear; total time=   0.0s
[CV] END ..................C=0.1, gamma=scale, kernel=linear; total time=   0.0s
[CV] END .....................C=0.1, gamma=scale, kernel=rbf; total time=   0.0s
[CV] END .....................C=0.1, gamma=scale, kernel=rbf; total time=   0.1s
[CV] END .....................C=0.1, gamma=scale, kernel=rbf; total time=   0.0s
[CV] END .....................C=0.1, gamma=scale, kernel=rbf; total time=   0.1s
[CV] END .....................C=0.1, gamma=scale, kernel=rbf; total time=   0.0s
[CV] END ....................C=0.1, gamma=scale, kernel=poly; total time=   0.1s
[CV] END ....................C=0.1, gamma=scale

In [7]:
# Best Parameters and Model
best_model = grid_search.best_estimator_
print("Best Parameters:", grid_search.best_params_)


Best Parameters: {'C': 1, 'gamma': 'scale', 'kernel': 'linear'}


In [8]:
# Model Evaluation
y_pred = best_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy on Test Data:", accuracy)
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))


Accuracy on Test Data: 0.7752808988764045

Classification Report:
              precision    recall  f1-score   support

           0       0.80      0.80      0.80        50
           1       0.74      0.74      0.74        39

    accuracy                           0.78        89
   macro avg       0.77      0.77      0.77        89
weighted avg       0.78      0.78      0.78        89


Confusion Matrix:
[[40 10]
 [10 29]]


In [12]:
X.head()


Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204
2,0.085299,0.05068,0.044451,-0.00567,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.02593
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641


In [14]:
# Input data (replace with actual values or use feature means)
input_data = (50, 1, 25.8, 80, 150, 100, 50, 4.5, 5.2, 85)  # Example input

# Convert the input data to a NumPy array
input_data_as_numpy_array = np.asarray(input_data)

# Reshape the array as we are predicting for one instance
input_data_reshaped = input_data_as_numpy_array.reshape(1, -1)

# Scale the input data using the saved scaler
input_data_scaled = scaler.transform(input_data_reshaped)

# Make a prediction
prediction = best_model.predict(input_data_scaled)

# Output the result
if prediction[0] == 0:
    print("The person is not diabetic")
else:
    print("The person is diabetic")

The person is diabetic




In [17]:
import numpy as np
import joblib

# Save the Model and Scaler
joblib.dump(best_model, "diabetes1_model.sav")
joblib.dump(scaler, "scaler1.sav")
print("Model and scaler saved as 'diabetes_model.sav' and 'scaler.sav'.")

Model and scaler saved as 'diabetes_model.sav' and 'scaler.sav'.
