In [None]:
# Importing necessary libraries for the project

import numpy as np

# Importing necessary libraries for the project

import pandas as pd

# Importing necessary libraries for the project

import seaborn as sns

# Importing necessary libraries for the project

import matplotlib.pyplot as plt



# Load the dataset

data = pd.read_csv('/content/diabetes_012_health_indicators_BRFSS2021.csv')



# Calculate the correlation matrix for the new dataset

correlation_matrix_new = data.corr()



# Plot the heatmap of the correlation matrix

plt.figure(figsize=(12, 10))

sns.heatmap(correlation_matrix_new, annot=True, cmap='coolwarm', fmt=".2f")

plt.title('Correlation Matrix for Diabetes Health Indicators Dataset')

plt.show()



# Display the correlation matrix itself

print(correlation_matrix_new)



!pip install imbalanced-learn



# Importing necessary libraries for the project

import pandas as pd

# Importing necessary libraries for the project

import numpy as np

# Importing necessary libraries for the project

from sklearn.model_selection import train_test_split

# Importing necessary libraries for the project

from sklearn.preprocessing import StandardScaler

# Importing necessary libraries for the project

from imblearn.combine import SMOTEENN  # Import SMOTE for oversampling

# Importing necessary libraries for the project

from tensorflow.keras.models import Sequential

# Importing necessary libraries for the project

from tensorflow.keras.layers import Dense, Dropout, LeakyReLU, BatchNormalization

# Importing necessary libraries for the project

from tensorflow.keras.optimizers import Adam

# Importing necessary libraries for the project

from sklearn.metrics import accuracy_score

# Importing necessary libraries for the project

from tensorflow.keras.callbacks import ReduceLROnPlateau

# Importing necessary libraries for the project

from tensorflow.keras.utils import to_categorical



# Load the dataset

data = pd.read_csv('/content/diabetes_012_health_indicators_BRFSS2015.csv')



# Selecting features and target

X = data.drop(columns=['Diabetes_012'])  # All columns except the target

y = data['Diabetes_012']  # Target column for prediction



# Identify continuous and binary features

continuous_features = ['BMI', 'GenHlth', 'MentHlth', 'PhysHlth', 'Age', 'Education', 'Income']

binary_features = ['HighBP', 'HighChol', 'CholCheck', 'Smoker', 'Stroke', 'HeartDiseaseorAttack',

                   'PhysActivity', 'Fruits', 'Veggies', 'HvyAlcoholConsump', 'AnyHealthcare',

                   'NoDocbcCost', 'DiffWalk', 'Sex']



# Scale only continuous features

scaler = StandardScaler()

X[continuous_features] = scaler.fit_transform(X[continuous_features])



y = to_categorical(y)



# Split the resampled data into training and test sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)







# Use SMOTEENN to perform a combination of over-sampling the minority class and under-sampling the majority class

smote_enn = SMOTEENN(random_state=42)

X_resampled, y_resampled = smote_enn.fit_resample(X_train, y_train)









# Function to build a neural network model

def build_model(input_dim, units_1, units_2, dropout_rate):

    model = Sequential()



    # First Dense Layer with Batch Normalization and LeakyReLU

    model.add(Dense(units=units_1, input_dim=input_dim, kernel_initializer='he_uniform'))

    model.add(BatchNormalization())

    model.add(LeakyReLU(alpha=0.01))

    model.add(Dropout(dropout_rate))



    # Second Dense Layer with Batch Normalization and LeakyReLU

    model.add(Dense(units=units_2, kernel_initializer='he_uniform'))

    model.add(BatchNormalization())

    model.add(LeakyReLU(alpha=0.01))

    model.add(Dropout(dropout_rate))



    # Third Dense Layer with Batch Normalization and LeakyReLU

    model.add(Dense(units=32, kernel_initializer='he_uniform'))

    model.add(BatchNormalization())

    model.add(LeakyReLU(alpha=0.01))



    # Output layer for multi-class classification

    model.add(Dense(units=3, activation='softmax'))



    # Compile the model

    model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

    return model



# Variables to store the best accuracy and the best model filename

best_accuracy = 0

best_model_filename = 'best_model.h5'



# Learning Rate Scheduler

lr_schedule = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1, min_lr=1e-6)



# Hyperparameter grid search

units_1_options = [64, 128]

units_2_options = [32, 64]

dropout_rates = [0.2, 0.3]

epochs = 100

batch_size = 32



for units_1 in units_1_options:

    for units_2 in units_2_options:

        for dropout_rate in dropout_rates:

            # Build and train the model

            model = build_model(input_dim=X_train.shape[1], units_1=units_1, units_2=units_2, dropout_rate=dropout_rate)

            model.fit(X_resampled, y_resampled, epochs=epochs, batch_size=batch_size, validation_split=0.2,

                      callbacks=[lr_schedule], verbose=1)



            # Evaluate the model on the test set

# Make a prediction using the pre-trained model with both time-series and fine-tuned features

            y_pred = model.predict(X_test)

            y_pred_class = np.argmax(y_pred, axis=1)

            y_test_class = np.argmax(y_test, axis=1)



            # Calculate accuracy

            accuracy = accuracy_score(y_test_class, y_pred_class)

            print(f"Model with {units_1} and {units_2} units, dropout {dropout_rate}: Accuracy = {accuracy}")



            # If the new accuracy is better, save the model

            if accuracy > best_accuracy:

                best_accuracy = accuracy

                best_model_filename = f"best_model_units1_{units_1}_units2_{units_2}_dropout_{dropout_rate}.h5"

                model.save(f'/content/all_features_batchnormal_lr_scheduler/{best_model_filename}')

                print(f"New best model saved as {best_model_filename} with accuracy: {accuracy}")



print(f"Best model saved as {best_model_filename} with accuracy: {best_accuracy}")





# Importing necessary libraries for the project

import tensorflow as tf

# Importing necessary libraries for the project

from tensorflow.keras.models import load_model, Sequential

# Importing necessary libraries for the project

from tensorflow.keras.layers import Dense, BatchNormalization, Dropout, LeakyReLU

# Importing necessary libraries for the project

from tensorflow.keras.optimizers import Adam

# Importing necessary libraries for the project

from sklearn.model_selection import train_test_split

# Importing necessary libraries for the project

from sklearn.preprocessing import StandardScaler

# Importing necessary libraries for the project

import pandas as pd



# Load the new dataset

data = pd.read_csv('/content/gestational_diabetes_dataset.csv')

# load pretrained model

# Load the pre-trained Keras deep learning model from the specified .h5 file

pretrained_model = load_model('/content/best_model_units1_128_units2_64_dropout_0.2.h5')





# Get the input shape from the dataset (8 features)

input_shape = (8,)



# Build a new model based on the pre-trained model

model = Sequential()



# Modify the input layer to match the new dataset

model.add(Dense(128, input_shape=input_shape))

model.add(BatchNormalization())

model.add(LeakyReLU())

model.add(Dropout(0.2))



# Reuse the rest of the pre-trained layers (we'll add them layer by layer)

for layer in pretrained_model.layers[1:-1]:

    model.add(layer)



# Add a new output layer with 1 neuron for binary classification

model.add(Dense(1, activation='sigmoid'))



# Compile the new model

model.compile(optimizer=Adam(learning_rate=0.001),

              loss='binary_crossentropy',

              metrics=['accuracy'])



X = data[['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age']]

y = data['Outcome']



# Split the data into training and testing sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



# Standardize the data

scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)

X_test_scaled = scaler.transform(X_test)



# Train the modified model on the new dataset using transfer learning

history = model.fit(X_train_scaled, y_train, epochs=50, batch_size=32, validation_data=(X_test_scaled, y_test))



# Save the fine-tuned model

model.save('fine_tuned_model_with_modified_input.h5')







'''

# Select features and labels

features = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age']

X = data[features]

y = data['Outcome']



# Split the data into training and testing sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



# Scale the features

scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)

X_test = scaler.transform(X_test)



# 1. Load the pre-trained model

# Load the pre-trained Keras deep learning model from the specified .h5 file

model = load_model('/content/all_features_batchnormal_lr_scheduler/best_model_units1_128_units2_64_dropout_0.2.h5')



# 2. Modify the input layer dimension to match the new dataset (8 features)

new_input = layers.Input(shape=(X_train.shape[1],))  # Shape matches the 8 features of the new dataset

old_layers = model.layers[1:]  # Exclude the original input layer



# Reconnect the old layers with the new input

x = new_input

for layer in old_layers:

    x = layer(x)



# Build the new model

new_model = Model(inputs=new_input, outputs=x)



# 3. Freeze all layers except the last one

for layer in new_model.layers[:-1]:  # Freeze all layers except the last one

    layer.trainable = False



# 4. Modify the last layer for binary classification

new_model.layers[-1] = layers.Dense(1, activation='sigmoid')  # Adjust this for binary classification



# 5. Compile the model

new_model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])



# Learning Rate Scheduler

lr_schedule = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1, min_lr=1e-6)



# Train the modified model on the new dataset

history = new_model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.2, callbacks=[lr_schedule], verbose=1)



# Save the updated model

new_model.save('/content/gestational_health_indicator_model/model.h5')

'''



















# Importing necessary libraries for the project

import numpy as np



# Load the fine-tuned model

# Load the pre-trained Keras deep learning model from the specified .h5 file

fine_tuned_model = load_model('fine_tuned_model_with_modified_input.h5')



# Perform predictions on the test data

# Make a prediction using the pre-trained model with both time-series and fine-tuned features

y_pred = fine_tuned_model.predict(X_test_scaled)



# Convert probabilities to binary outcome (0 or 1)

y_pred_binary = (y_pred > 0.5).astype(int)



# Display the first few predictions and their corresponding true values

predictions_comparison = pd.DataFrame({'True Outcome': y_test, 'Predicted Outcome': y_pred_binary.flatten()})

print(predictions_comparison.head())



# Evaluate the model on the test data

test_loss, test_accuracy = fine_tuned_model.evaluate(X_test_scaled, y_test)



# Print out the test accuracy and loss

print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

print(f"Test Loss: {test_loss:.4f}")





# Importing necessary libraries for the project

import pandas as pd



# Load the dataset

file_path = 'BIT_2019.csv'

data = pd.read_csv(file_path)



# Function to classify diabetes status based on Diabetic, Pdiabetes, and Pregancies columns

def classify_diabetes(row):

    if row['Diabetic'] == 'no' and row['Pdiabetes'] == 0:

        return 'No Diabetes'

    elif row['Pdiabetes'] == 1:

        return 'Prediabetes'

    elif row['Diabetic'] == 'yes' and row['Pregancies'] == 0:

        return 'Type-2 Diabetes'

    elif row['Diabetic'] == 'yes' and row['Pregancies'] > 0:

        return 'Gestational Diabetes'

    else:

        return 'Unknown'



# Fill missing values in 'Pdiabetes' and 'Diabetic' columns

data['Pdiabetes'].fillna('0', inplace=True)  # Assuming no prediabetes for missing values

data['Diabetic'].fillna('no', inplace=True)  # Assuming no diabetes for missing values



# Standardize the 'Pdiabetes' column to binary format (0 for no, 1 for yes)

data['Pdiabetes'] = data['Pdiabetes'].replace({'yes': 1, 'no': 0, '0': 0}).astype(int)



# Apply the classification function to create the target column 'Diabetes_Status'

data['Diabetes_Status'] = data.apply(classify_diabetes, axis=1)



# Check the distribution of the newly created 'Diabetes_Status' column

print(data['Diabetes_Status'].value_counts())



# Drop rows where 'Diabetes_Status' is 'Unknown'

data_cleaned = data[data['Diabetes_Status'] != 'Unknown']



# Check the updated distribution after dropping 'Unknown' cases

print(data_cleaned['Diabetes_Status'].value_counts())



print(data.head())



# Importing necessary libraries for the project

from sklearn.model_selection import train_test_split

# Importing necessary libraries for the project

from sklearn.preprocessing import StandardScaler, OneHotEncoder

# Importing necessary libraries for the project

from sklearn.compose import ColumnTransformer

# Importing necessary libraries for the project

from imblearn.over_sampling import SMOTE

# Importing necessary libraries for the project

from sklearn.impute import SimpleImputer

# Importing necessary libraries for the project

from collections import Counter

# Importing necessary libraries for the project

from sklearn.preprocessing import LabelEncoder

# Importing necessary libraries for the project

import joblib



# Drop the 'Diabetic' column as it's redundant

X = data_cleaned.drop(['Diabetic', 'Diabetes_Status'], axis=1)

y = data_cleaned['Diabetes_Status']



# Define a function to convert age ranges to numeric values (e.g., using the midpoint of the range)

def convert_age_range(age_range):

    if isinstance(age_range, str) and '-' in age_range:

        age_min, age_max = age_range.split('-')

        return (int(age_min) + int(age_max)) // 2

    else:

        return pd.to_numeric(age_range, errors='coerce')  # Handle any non-range values



# Apply the function to the Age column

X['Age'] = X['Age'].apply(convert_age_range)



# Split the data into training and testing sets before scaling

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



# One-hot encode categorical columns and scale numerical columns

categorical_features = ['Gender', 'Family_Diabetes', 'highBP', 'PhysicallyActive', 'Smoking', 'Alcohol', 'BPLevel']

numerical_features = ['Age', 'BMI', 'Pregancies', 'Sleep', 'SoundSleep']



# Create a preprocessor that scales numerical data and one-hot encodes categorical data

preprocessor = ColumnTransformer(

    transformers=[

        ('num', StandardScaler(), numerical_features),

        ('cat', OneHotEncoder(), categorical_features)

    ])



# Fit the preprocessor on the training data and transform both the training and test data and saving it for training data from Streamlit app

X_train_processed = preprocessor.fit_transform(X_train)

X_test_processed = preprocessor.transform(X_test)



joblib.dump(preprocessor, 'preprocesser.pkl')



# Handle missing values using SimpleImputer

imputer = SimpleImputer(strategy='mean')

X_train_imputed = imputer.fit_transform(X_train_processed)



# Encode the target variable (y_train) using LabelEncoder

label_encoder = LabelEncoder()

y_train_encoded = label_encoder.fit_transform(y_train)



# Check class distribution before applying SMOTE

print("Class distribution before SMOTE:", Counter(y_train_encoded))



# Apply SMOTE to the imputed training data

smote = SMOTE(random_state=42)

X_train_smote, y_train_smote = smote.fit_resample(X_train_imputed, y_train_encoded)



# Check class distribution after applying SMOTE

print("Class distribution after SMOTE:", Counter(y_train_smote))





# Importing necessary libraries for the project

import matplotlib.pyplot as plt

# Importing necessary libraries for the project

import seaborn as sns



X_train_smote_df = pd.DataFrame(X_train_smote, columns=preprocessor.get_feature_names_out())



# Generate the correlation matrix

corr_matrix = X_train_smote_df.corr()



# Plot the correlation matrix

plt.figure(figsize=(12, 8))

sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt='.2f', linewidths=0.5)

plt.title('Correlation Matrix')

plt.show()



print(corr_matrix)



# Importing necessary libraries for the project

from sklearn.ensemble import RandomForestClassifier



# Importing necessary libraries for the project

# Train a Random Forest model to evaluate feature importance

rf_model = RandomForestClassifier(random_state=42)

rf_model.fit(X_train_smote, y_train_smote)



# Importing necessary libraries for the project

# Get feature importances

# Importing necessary libraries for the project

importances = rf_model.feature_importances_

feature_names = X_train_smote_df.columns



# Importing necessary libraries for the project

# Create a DataFrame to display feature importance

# Importing necessary libraries for the project

feature_importance_df = pd.DataFrame({'Feature': feature_names, 'Importance': importances})

# Importing necessary libraries for the project

feature_importance_df = feature_importance_df.sort_values(by='Importance', ascending=False)



# Importing necessary libraries for the project

# Display the most important features

# Importing necessary libraries for the project

print(feature_importance_df)





# Importing necessary libraries for the project

# Keep the top 15 most important features

top_n = 15

# Importing necessary libraries for the project

top_features = feature_importance_df.head(top_n)['Feature']



# Filter the dataset to keep only the top features

X_train_top_features = X_train_smote_df[top_features]

X_test_top_features = X_test_processed[:, :len(top_features)]  # Ensure the test set matches the selected features



''' Top Features '''

'''

num__Pregancies (0.205519)

num__BMI (0.095190)

num__SoundSleep (0.081070)

num__Sleep (0.071806)

cat__Gender_Male (0.058135)

num__Age (0.053376)

cat__Gender_Female (0.052271)

cat__Family_Diabetes_yes (0.044955)

cat__PhysicallyActive_less than half an hr (0.044192)

cat__PhysicallyActive_none (0.038670)

cat__Family_Diabetes_no (0.037938)

cat__BPLevel_high (0.037674)

cat__highBP_no (0.034218)

cat__highBP_yes (0.032206)

cat__BPLevel_normal (0.027395)



'''





# Importing necessary libraries for the project

from tensorflow.keras.models import Sequential

# Importing necessary libraries for the project

from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, LeakyReLU

# Importing necessary libraries for the project

from tensorflow.keras.optimizers import Adam

# Importing necessary libraries for the project

from tensorflow.keras import layers

# Importing necessary libraries for the project

from tensorflow.keras.models import load_model

# Importing necessary libraries for the project

from tensorflow.keras.callbacks import ReduceLROnPlateau





# Load the fine-tuned model

# Load the pre-trained Keras deep learning model from the specified .h5 file

fine_tuned_model = load_model('fine_tuned_model_with_modified_input.h5')



# Number of classes (No Diabetes, Prediabetes, Type-2 Diabetes, Gestational Diabetes)

num_classes = 4



# Build a new model for fine-tuning with multi-class output

model = Sequential()



# Modify the input layer to accept only 15 features

model.add(Dense(128, input_shape=(15,), activation='relu'))

model.add(Dropout(0.2))



# Manually add the remaining layers from the pre-trained model with unique names

for i, layer in enumerate(fine_tuned_model.layers[1:-1]):  # Exclude the last layer

    if isinstance(layer, BatchNormalization):  # Handle BatchNormalization

        model.add(BatchNormalization(name=f"{layer.name}_{i}"))

    elif isinstance(layer, Dropout):  # Handle Dropout

        model.add(Dropout(rate=layer.rate, name=f"{layer.name}_{i}"))

    elif isinstance(layer, LeakyReLU):  # Handle LeakyReLU activation layer without passing alpha

        model.add(LeakyReLU(name=f"{layer.name}_{i}"))

    elif isinstance(layer, Dense):  # Handle Dense layers

        model.add(Dense(units=layer.units, activation=layer.activation, name=f"{layer.name}_{i}"))



# Add a new output layer for multi-class classification (4 classes)

model.add(Dense(num_classes, activation='softmax'))  # Softmax for multi-class output



# Compile the model with sparse categorical crossentropy loss (for label-encoded targets)

model.compile(optimizer=Adam(learning_rate=0.0001),  # Lower learning rate for fine-tuning

              loss='sparse_categorical_crossentropy',

              metrics=['accuracy'])



# Display the model summary

model.summary()



print(X_train_top_features.dtypes)  # Check if all columns are numerical



print(X_train_top_features.shape)  # Should be (num_samples, 15)

print(X_test_top_features.shape)   # Should be (num_samples, 15)



# Importing necessary libraries for the project

import numpy as np



# Ensure the features and labels are NumPy arrays

X_train_top_features = np.array(X_train_top_features)

X_test_top_features = np.array(X_test_top_features)

y_train_smote = np.array(y_train_smote)

y_test = np.array(y_test)







print(y_train_smote.shape)  # Should be (num_samples,)

print(y_test.shape)         # Should be (num_samples,)





print(X_train_top_features.shape)  # Expected shape: (num_samples, 15)

print(X_test_top_features.shape)   # Expected shape: (num_samples, 15)

print(y_train_smote.shape)         # Expected shape: (num_samples,)

print(y_test.shape)                # Expected shape: (num_samples,)



# Reshape labels to 1D if needed

y_train_smote = y_train_smote.reshape(-1)

y_test = y_test.reshape(-1)



# Check the model's output layer

model.summary()  # Ensure the output layer has 4 units and uses softmax



print(np.unique(y_train_smote))  # Check unique values in y_train_smote

print(np.unique(y_test))         # Check unique values in y_test





# Importing necessary libraries for the project

from sklearn.preprocessing import LabelEncoder



# Initialize LabelEncoder

label_encoder = LabelEncoder()



# Fit on the training labels to ensure consistent encoding

label_encoder.fit(['No Diabetes', 'Prediabetes', 'Type-2 Diabetes', 'Gestational Diabetes'])



# Transform the test labels

y_test_encoded = label_encoder.transform(y_test)



# Train the model on the label-encoded target variable

# Train the model

history = model.fit(X_train_top_features, y_train_smote,

                    validation_data=(X_test_top_features, y_test_encoded),

                    epochs=30, batch_size=32)





# Evaluate the model on the test data

test_loss, test_accuracy = model.evaluate(X_test_top_features, y_test_encoded)

print(f"Test Accuracy: {test_accuracy * 100:.2f}%")



# Save the fine-tuned model

model.save('fine_tuned_model_on_new_dataset.h5')



# Importing necessary libraries for the project

import pandas as pd

# Importing necessary libraries for the project

import numpy as np

# Importing necessary libraries for the project

from sklearn.preprocessing import MinMaxScaler

# Importing necessary libraries for the project

from sklearn.model_selection import train_test_split

# Importing necessary libraries for the project

from sklearn.metrics import confusion_matrix

# Importing necessary libraries for the project

import seaborn as sns

# Importing necessary libraries for the project

import matplotlib.pyplot as plt

# Importing necessary libraries for the project

import tensorflow as tf

# Importing necessary libraries for the project

from tensorflow.keras.models import Sequential

# Importing necessary libraries for the project

from tensorflow.keras.layers import LSTM, Dense, Dropout, Input

# Importing necessary libraries for the project

from tensorflow.keras.callbacks import EarlyStopping

# Importing necessary libraries for the project

from tensorflow.keras.regularizers import l2

# Importing necessary libraries for the project

from tensorflow.keras.optimizers import Adam

# Importing necessary libraries for the project

from tensorflow.keras.callbacks import ReduceLROnPlateau

# Importing necessary libraries for the project

import joblib





# Load the dataset

df = pd.read_csv('merged_CGM_clinical_data.csv')



# Preprocess the data

df['Hora'] = pd.to_timedelta(df['Hora']).dt.total_seconds()

df = df.dropna()



numerical_cols = ['Hora', 'Glucemia', 'BMI', 'age', 'HbA1c', 'follow.up']



# Scale the numerical columns

scaler = MinMaxScaler()

df[numerical_cols] = scaler.fit_transform(df[numerical_cols])



# saving scaler for time series data in Streamlit app

joblib.dump(scaler, 'time_series_scaler.pkl')



# Handle class imbalance by oversampling the minority class

# Importing necessary libraries for the project

from sklearn.utils import resample



# Check class distribution

df_majority = df[df.T2DM == False]

df_minority = df[df.T2DM == True]



# Upsample minority class

df_minority_upsampled = resample(df_minority,

                                 replace=True,     # sample with replacement

                                 n_samples=len(df_majority),    # to match majority class

                                 random_state=42)  # reproducible results



# Combine majority and upsampled minority class

df_upsampled = pd.concat([df_majority, df_minority_upsampled])



# Prepare the features and target

X_data = df_upsampled[numerical_cols].values

y_data = df_upsampled['T2DM'].astype(int).values



# Set the number of timesteps (window size)

window_size = 10  # Adjust this to the number of timesteps you want



# Define a function to create sequences of data (sliding window approach)

def create_sequences(data, target, window_size):

    X, y = [], []

    for i in range(len(data) - window_size):

        X.append(data[i:i+window_size])

        y.append(target[i+window_size])  # Predict the value after the window

    return np.array(X), np.array(y)



# Create sequences

X, y = create_sequences(X_data, y_data, window_size)



# Train/test split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



# Build the LSTM model with regularization and dropout

model = Sequential()

model.add(Input(shape=(X_train.shape[1], X_train.shape[2])))  # Define input shape with multiple timesteps

model.add(LSTM(64, return_sequences=True, kernel_regularizer=l2(0.01)))  # L2 regularization

model.add(Dropout(0.3))  # Increased dropout to prevent overfitting

model.add(LSTM(64, kernel_regularizer=l2(0.01)))

model.add(Dropout(0.3))

model.add(Dense(25, activation='relu', kernel_regularizer=l2(0.01)))

model.add(Dense(1, activation='sigmoid'))



# Compile the model

model.compile(optimizer=Adam(learning_rate=0.00001), loss='binary_crossentropy', metrics=['accuracy'])



# Early stopping to prevent overfitting

early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)



# Initialize ReduceLROnPlateau

reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6, verbose=1)



# Train the model

history = model.fit(X_train, y_train, epochs=50, batch_size=32,

                    validation_data=(X_test, y_test), callbacks=[early_stopping, reduce_lr])



# Evaluate the model

test_loss, test_accuracy = model.evaluate(X_test, y_test)

print(f"Test Loss: {test_loss}")

print(f"Test Accuracy: {test_accuracy}")



# Make predictions

# Make a prediction using the pre-trained model with both time-series and fine-tuned features

y_pred = (model.predict(X_test) > 0.5).astype(int)



# Plot the accuracy and loss curves

plt.plot(history.history['accuracy'], label='train accuracy')

plt.plot(history.history['val_accuracy'], label='test accuracy')

plt.title('Accuracy Curve')

plt.xlabel('Epochs')

plt.ylabel('Accuracy')

plt.legend()

plt.show()



plt.plot(history.history['loss'], label='train loss')

plt.plot(history.history['val_loss'], label='test loss')

plt.title('Loss Curve')

plt.xlabel('Epochs')

plt.ylabel('Loss')

plt.legend()

plt.show()



# Confusion matrix

cm = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(6, 4))

sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['No T2DM', 'T2DM'], yticklabels=['No T2DM', 'T2DM'])

plt.title('Confusion Matrix')

plt.xlabel('Predicted')

plt.ylabel('Actual')

plt.show()









model.save('/content/time_series_model.h5')



# Importing necessary libraries for the project

from tensorflow import keras



# Load the two models

model_1_path = '/content/time_series_model.h5'

model_2_path = '/content/fine_tuned_model_on_new_dataset.h5'



# Load models

model_1 = keras.models.load_model(model_1_path)

model_2 = keras.models.load_model(model_2_path)



# Summarize the models to analyze their architectures

model_1.summary()

model_2.summary()



model_2.input_shape





# Importing necessary libraries for the project

from tensorflow.keras.layers import Input, concatenate, Dense

# Importing necessary libraries for the project

from tensorflow.keras.models import Model



# Define input shapes based on the original models

# Collect CGM (Continuous Glucose Monitoring) data from the user as a comma-separated string

time_series_input_shape = model_1.input_shape[1:]  # Exclude batch size

fine_tuned_input_shape = model_2.input_shape[1:]   # Exclude batch size



# Create new input layers for both models

# Collect CGM (Continuous Glucose Monitoring) data from the user as a comma-separated string

time_series_input = Input(shape=time_series_input_shape)

fine_tuned_input = Input(shape=fine_tuned_input_shape)



# Call each model on its respective input

# Collect CGM (Continuous Glucose Monitoring) data from the user as a comma-separated string

time_series_output = model_1(time_series_input)

fine_tuned_output = model_2(fine_tuned_input)



# Concatenate the outputs from the two models

combined_features = concatenate([time_series_output, fine_tuned_output])



# Add a new output layer for the combined model

new_output = Dense(4, activation='softmax')(combined_features)  # 4 classes



# Create the new combined model

# Collect CGM (Continuous Glucose Monitoring) data from the user as a comma-separated string

combined_model = Model(inputs=[time_series_input, fine_tuned_input], outputs=new_output)



# Compile the model (use suitable optimizer and loss for multiclass classification)

combined_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])



# Summarize the new combined model

combined_model.summary()



combined_model.save('/content/final_deep_learning_model.h5')







# Importing necessary libraries for the project

from tensorflow.keras.models import load_model



# Load the combined model

# Load the pre-trained Keras deep learning model from the specified .h5 file

combined_model = load_model('final_deep_learning_model.h5')



# Get the input shapes of the combined model

# Collect CGM (Continuous Glucose Monitoring) data from the user as a comma-separated string

time_series_input_shape = combined_model.input[0].shape

fine_tuned_input_shape = combined_model.input[1].shape



# Collect CGM (Continuous Glucose Monitoring) data from the user as a comma-separated string

print(f"Time Series Input Shape: {time_series_input_shape}")

print(f"Fine-Tuned Input Shape: {fine_tuned_input_shape}")





# Importing necessary libraries for the project

import pandas as pd



# Load the dataset

time_series_data = pd.read_csv('/content/merged_CGM_clinical_data.csv')



# Display the first few rows to check the structure

print(time_series_data.head())




