In [3]:
# Attempt 1

In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.utils import plot_model
import matplotlib.pyplot as plt

# Step 1: Load and preprocess the data
csv12 = '/Users/schoudhry/Desktop/IIT/Research/researchData/StableS12.csv'
csv13 = '/Users/schoudhry/Desktop/IIT/Research/researchData/Stable13.csv'
csv6 = '/Users/schoudhry/Desktop/IIT/Research/researchData/Supplementary Tables S6 S6.csv'
csv1 = '/Users/schoudhry/Desktop/IIT/Research/researchData/Stable1.csv'

# Loading the datasets (assuming the structure is similar to earlier examples)
# S12: Gene expression data
data_s12 = pd.read_csv(csv12)
# S13: Bacterial abundance data
data_s13 = pd.read_csv(csv13)
# S6: CRC status and stability-selected associations
data_s6 = pd.read_csv(csv6)

# Assume that we have to work with some shared gene and bacterial names across these datasets
# For now, we can proceed assuming B01, B02, B03 (patients) are present in both CSVs

# Extract gene and bacteria data for specific patients, e.g., B01, B02, B03
genes_b01 = data_s12[['Unnamed: 0', 'B01']].rename(columns={'Unnamed: 0': 'Gene', 'B01': 'Gene Occurrence'})
bacteria_b01 = data_s13[['Unnamed: 0', 'B01']].rename(columns={'Unnamed: 0': 'Bacteria', 'B01': 'Bacteria Occurrence'})

# Combine gene and bacterial data into one DataFrame
merged_b01 = pd.concat([genes_b01, bacteria_b01], axis=1)

# You could combine multiple patients here, e.g., B01, B02, B03
# For this example, we'll just use B01 as input

# Assume we also have the CRC Status (from data_s6), and we align it with the gene/bacteria data
merged_b01['CRC_Status'] = data_s6['CRC_Status']

# Step 2: Prepare the input features and target
X = merged_b01.drop(columns=['CRC_Status']).values  # Features (gene and bacteria data)
y = merged_b01['CRC_Status'].values  # Target (CRC status)

# Step 3: Normalize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 4: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Step 5: Define the neural network model
model = tf.keras.Sequential([
    layers.InputLayer(input_shape=(X_train.shape[1],)),  # Input layer

    # First Dense Layer
    layers.Dense(128, activation='relu'),
    layers.BatchNormalization(),  # Batch normalization for stable learning
    layers.Dropout(0.3),  # Dropout to prevent overfitting

    # Second Dense Layer
    layers.Dense(64, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.3),

    # Third Dense Layer
    layers.Dense(32, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.2),

    # Fourth Dense Layer
    layers.Dense(16, activation='relu'),

    # Output Layer (sigmoid for binary classification)
    layers.Dense(1, activation='sigmoid')
])

# Step 6: Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Step 7: Train the model
history = model.fit(X_train, y_train, epochs=30, batch_size=32, validation_data=(X_test, y_test))

# Step 8: Visualize the model architecture using plot_model
plot_model(model, to_file='model_architecture.png', show_shapes=True, show_layer_names=True)

# Step 9: Evaluate the model on the test data
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=2)
print(f"Test Accuracy: {test_accuracy:.4f}")

# Step 10: Plot training history (accuracy and loss)
plt.figure(figsize=(12, 5))

# Plot accuracy
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Val Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.title('Training and Validation Accuracy')

# Plot loss
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.title('Training and Validation Loss')

plt.show()

# Step 11: Save the model plot to a file
from IPython.display import Image
Image('model_architecture.png')  # Show the saved model plot


  _BoolLike_co = Union[bool, np.bool]


AttributeError: module 'numpy' has no attribute 'bool'.
`np.bool` was a deprecated alias for the builtin `bool`. To avoid this error in existing code, use `bool` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.bool_` here.
The aliases was originally deprecated in NumPy 1.20; for more details and guidance see the original release note at:
    https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations