<a href="https://colab.research.google.com/github/abubakar-ahmed/Formative_1_Databases/blob/main/Task3_Fetch_Data_From_API_For_Predictions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [25]:
!pip install keras-tuner --upgrade



In [26]:
pip install imbalanced-learn scikit-learn



In [63]:
import pandas as pd
import numpy as np
import requests
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.utils import class_weight
from imblearn.over_sampling import SMOTE
from tensorflow.keras import layers, models, optimizers, regularizers
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import load_model
from sklearn.metrics import confusion_matrix, classification_report

# --- Step 1: Fetch Data from API ---
api_url = "https://formative-1-databases.onrender.com/patients/"
response = requests.get(api_url)

# Check if data is fetched successfully
if response.status_code == 200:
    data = response.json()
    df = pd.DataFrame(data)
    print("Data fetched successfully!")
    print(df.head())

    features = [
        'Age', 'Gender', 'Education_Level', 'Marital_Status',
        'Occupation', 'Income_Level', 'Live_Area'
    ]

    target = 'Age'

    if target not in df.columns:
        print(f"Error: {target} column is missing in the dataset.")
        exit()

    X = df[features]
    y = df[target]

    # Encode target as binary (Age >= 45 considered as 1, Age < 45 as 0)
    y_encoded = (y >= 45).astype(int)  # 1 for age >= 45, 0 for age < 45
    print("Classes in the target variable:", np.unique(y_encoded))

    # Check the number of samples
    if len(X) > 3:  # Ensure there are enough samples for SMOTE
        smote = SMOTE(random_state=42, k_neighbors=2)  # Reduced k_neighbors to 2
        X_resampled, y_resampled = smote.fit_resample(X, y_encoded)
        print("SMOTE applied: Dataset is now balanced.")
    else:
        print("Dataset too small for SMOTE. Using the original data.")
        X_resampled, y_resampled = X, y_encoded  # Use original data if SMOTE cannot be applied

    # --- Train-test Split ---
    X_train, X_test, y_train, y_test = train_test_split(
        X_resampled, y_resampled, test_size=0.2, random_state=42, stratify=y_resampled, shuffle=True
    )

    # --- Feature Scaling ---
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # --- Step 2: Calculate Class Weights ---
    class_weights = class_weight.compute_class_weight(
        class_weight='balanced',
        classes=np.unique(y_resampled),
        y=y_train
    )
    class_weights = dict(enumerate(class_weights))
    print(f"Class weights: {class_weights}")

    # --- Step 3: Define and Train the Model ---
    model = models.Sequential()
    model.add(layers.Input(shape=(X_train_scaled.shape[1],)))  # Input layer
    model.add(layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
    model.add(layers.BatchNormalization())
    model.add(layers.Dropout(0.5))  # Dropout layer
    model.add(layers.Dense(64, activation='relu'))  # Additional dense layer
    model.add(layers.Dense(1, activation='sigmoid'))  # Sigmoid for binary classification

    # Compile the model
    model.compile(
        optimizer=optimizers.Adam(learning_rate=0.001),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )

    # Define early stopping to prevent overfitting
    early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

    # Train the model
    model.fit(X_train_scaled, y_train, epochs=50, validation_split=0.2, callbacks=[early_stop], class_weight=class_weights)

    # --- Step 4: Evaluate the Model ---
    test_loss, test_accuracy = model.evaluate(X_test_scaled, y_test)
    print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

    # Make predictions
    y_pred = (model.predict(X_test_scaled) > 0.5).astype("int32")  # Convert probabilities to binary (0 or 1)

    # Print confusion matrix
    print("\nConfusion Matrix:")
    cm = confusion_matrix(y_test, y_pred, labels=[0, 1])
    print(cm)

    # Print classification report
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred, labels=[0, 1], zero_division=0))

    # Save the model
    model.save('trained_model.keras')
    print('Model saved to trained_model.keras')



Data fetched successfully!
   Age  Gender  Education_Level  Marital_Status  Occupation  Income_Level  \
0   15       1                4               1           2             3   
1   30       1                5               1          50            40   
2   45       1                3               3           3             3   
3   45       1                4               1           6             7   
4   45       1                4               1           2             3   

   Live_Area  Patient_ID  
0          0           1  
1          5           2  
2          0           3  
3          0           4  
4          0           6  
Classes in the target variable: [0 1]
SMOTE applied: Dataset is now balanced.
Class weights: {0: np.float64(1.0), 1: np.float64(1.0)}
Epoch 1/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step - accuracy: 0.4167 - loss: 1.0586 - val_accuracy: 1.0000 - val_loss: 0.7025
Epoch 2/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

In [65]:
# --- Predict the latest entry based on Patient_ID ---
last_patient_id = 15
latest_entry = df[df['Patient_ID'] == last_patient_id]

# Ensure there's a valid entry for the specified Patient_ID
if not latest_entry.empty:
    print(f"\nLatest entry for Patient_ID {last_patient_id}:\n{latest_entry}")

    # Preprocess the latest entry (drop Patient_ID and select the features)
    latest_entry_features = latest_entry[features].values.reshape(1, -1)
    latest_entry_scaled = scaler.transform(latest_entry_features)

    # Make prediction
    prediction = (model.predict(latest_entry_scaled) > 0.5).astype("int32")
    print(f"\nPrediction for Patient_ID {last_patient_id} (0 = No Schizophrenia, 1 = Schizophrenia): {prediction[0][0]}")
else:
    print(f"\nNo data found for Patient_ID {last_patient_id}")




Latest entry for Patient_ID 15:
    Age  Gender  Education_Level  Marital_Status  Occupation  Income_Level  \
10   95       0                8               1           2             3   

    Live_Area  Patient_ID  
10          1          15  
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step

Prediction for Patient_ID 15 (0 = No Schizophrenia, 1 = Schizophrenia): 1




In [60]:
# Check all available Patient_IDs after fetching the data
print("Available Patient_IDs from the API:", df['Patient_ID'].unique())

# Fetch and print the full dataset to confirm the data from API
print("Full dataset:\n", df.head())


Available Patient_IDs from the API: [ 1  2  3  4  6  7  8 10 13 14 15 16 17 18 19 20 21 24 25]
Full dataset:
    Age  Gender  Education_Level  Marital_Status  Occupation  Income_Level  \
0   15       1                4               1           2             3   
1   45       0                4               1           2             3   
2   45       1                3               3           3             3   
3   45       1                4               1           6             7   
4   45       1                4               1           2             3   

   Live_Area  Patient_ID  
0          0           1  
1          1           2  
2          0           3  
3          0           4  
4          0           6  


In [62]:
# Checking for pagination and fetching next pages
page_number = 1
while True:
    response = requests.get(f"https://formative-1-databases.onrender.com//patients/?page={page_number}")
    if response.status_code == 200:
        data = response.json()
        if not data:
            break
        df = pd.DataFrame(data)
        if 25 in df['Patient_ID'].values:
            print("Patient_ID 25 is found.")
            break
        page_number += 1
    else:
        print(f"Failed to fetch data: {response.status_code}")
        break


Patient_ID 25 is found.
