In [1]:
import train_test, transformers, classifiers
import pandas as pd



In [2]:
import keras
import tensorflow as tf

In [3]:
X, y, study_labels = train_test.load_data("../data")
X, y, study_labels = train_test.filter_data(X, y, study_labels, min_n = 10)
y, label_mapping = train_test.encode_labels(y)



  studies_series: 2834
  X_df: (60660, 2834)
  y_series: 2834
  Studies: 2834
  X shape: (2834, 60660)
  y: 2834


  Studies: 2376
  X shape: (2376, 60660)
  y: 2376


In [4]:
# Convert label_mapping dictionary to DataFrame
label_df = pd.DataFrame([
    {'Label': label, 'Encoded': encoded}
    for label, encoded in sorted(label_mapping.items(), key=lambda x: x[1])
])

# Save to CSV (same format as label_mapping_all.csv)
label_df.to_csv('../data/label_mapping.csv', index=True)
label_df


Unnamed: 0,Label,Encoded
0,AML with MDS-related cytogenetic abnormalities,0
1,AML with MDS-related gene mutations,1
2,AML with in-frame bZIP CEBPA,2
3,AML with inv(16)/t(16;16)/CBFB::MYH11,3
4,AML with mutated NPM1,4
5,AML with mutated TP53,5
6,AML with t(6;9)/DEK::NUP214,6
7,AML with t(8;21)/RUNX1::RUNX1T1,7
8,AML with t(9;11)/MLLT3::KMT2A,8
9,"APL, t(15;17)/PML::RARA",9


In [4]:
# Test TensorFlow setup with a simple neural network
print(f"TensorFlow version: {tf.__version__}")
print(f"Keras version: {keras.__version__}")

# Check available devices (M1 Macs can use MPS, but CPU is fine too)
gpu_devices = tf.config.list_physical_devices('GPU')
try:
    mps_devices = tf.config.list_physical_devices('MPS')
except:
    mps_devices = []
cpu_devices = tf.config.list_physical_devices('CPU')

print(f"\nAvailable devices:")
if mps_devices:
    print(f"  MPS (Metal Performance Shaders): {mps_devices}")
if gpu_devices:
    print(f"  GPU: {gpu_devices}")
if cpu_devices:
    print(f"  CPU: {cpu_devices}")

print("\nNote: On M1 Macs, TensorFlow will use CPU by default (which works great!).")
print("      MPS acceleration is optional and requires specific TensorFlow versions.")

# Get number of classes
n_classes = len(label_mapping)
n_features = X.shape[1]

print(f"\nData shape: {X.shape}")
print(f"Number of classes: {n_classes}")
print(f"Number of features: {n_features}")

# Create a simple neural network
model = keras.Sequential([
    keras.layers.Input(shape=(n_features,)),
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dropout(0.3),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dropout(0.3),
    keras.layers.Dense(n_classes, activation='softmax')
])

# Compile the model
model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

# Print model summary
print("\nModel Summary:")
model.summary()

# Test with a small sample of data (just to verify it works)
print("\nTesting model with a small sample...")
X_sample = X[:10]
y_sample = y[:10]

# This will verify the model can process the data
predictions = model.predict(X_sample, verbose=0)
print(f"Predictions shape: {predictions.shape}")
print(f"Sample prediction (first sample): {predictions[0]}")
print(f"Predicted class: {predictions[0].argmax()}")

print("\n✓ TensorFlow is set up correctly!")


TensorFlow version: 2.16.2
Keras version: 3.10.0

Available devices:
  GPU: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
  CPU: [PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]

Note: On M1 Macs, TensorFlow will use CPU by default (which works great!).
      MPS acceleration is optional and requires specific TensorFlow versions.

Data shape: (2376, 60660)
Number of classes: 23
Number of features: 60660

Model Summary:



Testing model with a small sample...
Predictions shape: (10, 23)
Sample prediction (first sample): [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Predicted class: 1

✓ TensorFlow is set up correctly!


In [5]:
# Test using the NeuralNet class from classifiers.py
print("Testing NeuralNet class from classifiers.py\n")

# Create a NeuralNet instance with some test parameters
nn = classifiers.NeuralNet(
    class_weight=False,
    n_neurons=[64, 32],  # Smaller network for quick testing
    learning_rate=0.001,
    dropout_rate=0.2,
    batch_size=32,
    loss_function="standard"
)

print(f"NeuralNet created with parameters:")
print(f"  n_neurons: {nn.params['n_neurons']}")
print(f"  learning_rate: {nn.params['learning_rate']}")
print(f"  dropout_rate: {nn.params['dropout_rate']}")
print(f"  batch_size: {nn.params['batch_size']}")

# Use a small subset of data for quick testing
X_test = X[:100]
y_test = y[:100]

print(f"\nFitting model on {len(X_test)} samples...")
print("(This may take a moment...)")

# Fit the model
nn.fit(X_test, y_test, epochs=5)

print("✓ Model fitted successfully!")

# Test predictions
print("\nTesting predictions...")
predictions = nn.predict(X_test[:5])
probabilities = nn.predict_proba(X_test[:5])

print(f"Predictions shape: {predictions.shape}")
print(f"Probabilities shape: {probabilities.shape}")
print(f"Sample predictions (first 5): {predictions}")
print(f"Sample probabilities (first sample): {probabilities[0]}")

print("\n✓ NeuralNet class is working correctly!")


Testing NeuralNet class from classifiers.py

NeuralNet created with parameters:
  n_neurons: [64, 32]
  learning_rate: 0.001
  dropout_rate: 0.2
  batch_size: 32

Fitting model on 100 samples...
(This may take a moment...)
Epoch 1/5
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 358ms/step - accuracy: 0.1467 - loss: 80997.4688
Epoch 2/5
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step - accuracy: 0.2521 - loss: 221292.0938
Epoch 3/5
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step - accuracy: 0.2472 - loss: 270378.5625
Epoch 4/5
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step - accuracy: 0.2809 - loss: 261296.9844
Epoch 5/5
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - accuracy: 0.2366 - loss: 304048.2500
✓ Model fitted successfully!

Testing predictions...
Predictions shape: (5,)
Probabilities shape: (5, 11)
Sample predictions (first 5): [1 1 1 1 1]
Sample probabilities (fi