In [None]:
import pandas as pd 

df = pd.read_excel("Simulated Data.xlsx", header=1)
df.head()
df.columns = df.columns.str.strip().str.lower().str.replace(" ", "_").str.replace("(", "").str.replace(")", "")
print(df.columns.tolist())

y = df[["fertilization_rate_%","blastulation_rate_%"]].copy()
x = df.drop(columns=["patient_id","fertilization_rate_%","blastulation_rate_%"])

y['fertilization_rate_%'] = (y['fertilization_rate_%'] >= 50).astype(int)
y['blastulation_rate_%'] = (y['blastulation_rate_%'] >= 50).astype(int)

In [None]:
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler

le_col= {}

for col in x.columns:
    if x[col].dtype == 'object':
        le = LabelEncoder()
        x[col] = le.fit_transform(x[col].astype(str))
        le_col[col] = le

for col in y.columns: 
    if y[col].dtype == "object":
        le = LabelEncoder()
        y.loc[col] = le.fit_transform(y[col])
        le_col[col] = le

scaler = StandardScaler()
x_scaled = scaler.fit_transform(x)

print("x Shape" , x_scaled.shape)
print("y shape" , y.shape)

In [None]:
from sklearn.model_selection import StratifiedKFold
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout,LeakyReLU
from tensorflow.keras.optimizers import Adam, SGD, RMSprop, Nadam
from tensorflow.keras.optimizers import AdamW
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import regularizers
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import BatchNormalization
from imblearn.over_sampling import RandomOverSampler
from tensorflow.keras.metrics import AUC
from tensorflow.keras import backend as K
from tensorflow.keras.losses import BinaryCrossentropy
import numpy as np

y_np = y.to_numpy()

combined_labels = np.array([f"{f}_{b}" for f, b in y_np])
strat_kf = StratifiedKFold(n_splits=5, shuffle=True,random_state=42)

fold_no=1 
all_scores=[]

def create_model(input_dim):
    input_layer = Input(shape=(x_scaled.shape[1],))
    x = Dense(64, kernel_regularizer=regularizers.l2(0.005))(input_layer)
    x = BatchNormalization()(x)
    x = LeakyReLU(0.1)(x)
    x = Dropout(0.2)(x)
    
    x = Dense(32, kernel_regularizer=regularizers.l2(0.005))(x)
    x = BatchNormalization()(x)
    x = LeakyReLU(0.1)(x)
    x = Dropout(0.2)(x)

    fert_output = Dense(1, activation='sigmoid', name='Fertalisation')(x)
    blast_output = Dense(1, activation='sigmoid', name='Blastulation')(x)
    
    return Model(inputs=input_layer, outputs=[fert_output, blast_output])
    

for train_index , test_index in strat_kf.split(x_scaled,combined_labels):
    print(f"\n Fold {fold_no}")

    x_train, x_test = x_scaled[train_index] , x_scaled[test_index]
    y_train, y_test = y_np[train_index], y_np[test_index]

    y_true_fert = y_test[:, 0]
    y_true_blast = y_test[:, 1]
    
    combined_train_labels = combined_labels[train_index]
    
    ros = RandomOverSampler(random_state=42)
    x_train_res, y_combined_res = ros.fit_resample(x_train, combined_train_labels)

    y_fert_res = np.array([int(label.split('_')[0]) for label in y_combined_res])
    y_blast_res = np.array([int(label.split('_')[1]) for label in y_combined_res])

    y_train_dict = {
        'Fertalisation': y_fert_res,
        'Blastulation' : y_blast_res
    }
    val_dict = {
        'Fertalisation': y_test[:,0],
        'Blastulation' : y_test[:,1]
    }
    fert_class_dist = np.bincount(y_fert_res)
    blast_class_dist = np.bincount(y_blast_res)

    print("📊 Fertilisation Class Distribution:")
    print(f"Class 0 (Negative): {fert_class_dist[0]}")
    print(f"Class 1 (Positive): {fert_class_dist[1]}")

    print("\n📊 Blastulation Class Distribution:")
    print(f"Class 0 (Negative): {blast_class_dist[0]}")
    print(f"Class 1 (Positive): {blast_class_dist[1]}")

    model = create_model(x_scaled.shape[1])


    loss_fn = BinaryCrossentropy(label_smoothing=0.05)
    opt = AdamW(learning_rate=0.001, weight_decay=1e-4)

    model.compile(optimizer=opt,
                 loss={
                     'Fertalisation': loss_fn,
                     'Blastulation': loss_fn},
                 metrics = {
                   'Fertalisation': ['accuracy', AUC(name="auc_fert")], 
                   'Blastulation': ['accuracy', AUC(name="auc_blast")]
    }
)
    
    early_stopping = EarlyStopping(monitor='val_loss',patience=3,restore_best_weights=True)
        
    model.fit(x_train_res,y_train_dict,
              validation_data=(x_test, val_dict),
              epochs=20, batch_size=32,callbacks=[early_stopping],
              verbose=1)

    score = model.evaluate(x_test , [y_test[:,0], y_test[:,1]], verbose=1)
    
    eval_results = dict(zip(model.metrics_names, score))
    print(eval_results)
    print("🔍 Available metric names:", model.metrics_names)
    
    all_scores.append(score)
    fold_no +=1

    avg_fert_acc = np.mean([score[3] for score in all_scores])
    avg_blast_acc = np.mean([score[5] for score in all_scores])

    print(f"\n Avergae Fertalisation Accuracy: {avg_fert_acc:.4f}")
    print(f"\n Avergae Blastulation Accuracy: {avg_blast_acc:.4f}")


In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.isotonic import IsotonicRegression
from sklearn.metrics import f1_score, roc_auc_score
import matplotlib.pyplot as plt
import numpy as np

y_pred = model.predict(x_test)
fert_probs = y_pred[0].flatten()
blast_probs = y_pred[1].flatten()

true_fert = y_test[:,0]
true_blast= y_test[:,1]

plt.hist(fert_probs, bins=20, alpha=0.5, label='Fertilisation')
plt.hist(blast_probs, bins=20, alpha=0.5, label='Blastulation')
plt.axvline(0.5, color='black', linestyle='--', label='Default threshold')
plt.legend()
plt.title("Prediction Probabilities Distribution")
plt.xlabel("Probability")
plt.ylabel("Frequency")
plt.savefig("my_prediction_plot.png")
plt.show()


def find_best_threshold(y_true, y_probs):
    best_thresh, best_f1 = 0.5, 0
    for thresh in np.linspace(0 ,1,200):
        f1=f1_score(y_true, y_probs > thresh)
        if f1 > best_f1:
            best_f1 =f1
            best_thresh = thresh
    return best_thresh, best_f1

def calibrate_probs(y_true, y_probs):
    # Platt Scaling
    platt = LogisticRegression()
    platt.fit(y_probs.reshape(-1, 1), y_true)
    platt_probs = platt.predict_proba(y_probs.reshape(-1, 1))[:, 1]

    # Isotonic Regression
    iso = IsotonicRegression(out_of_bounds='clip')
    iso.fit(y_probs, y_true)
    iso_probs = iso.predict(y_probs)

    return platt_probs, iso_probs

for name, y_true, y_probs in [
    ("Fertilisation", true_fert, fert_probs),
    ("Blastulation", true_blast, blast_probs)
]:
    platt_probs, iso_probs = calibrate_probs(y_true, y_probs)

    for method_name, probs in [
        ("Original", y_probs),
        ("Platt-Calibrated", platt_probs),
        ("Isotonic-Calibrated", iso_probs)
    ]:
        thresh, f1 = find_best_threshold(y_true, probs)
        auc = roc_auc_score(y_true, probs)
        print(f"Fold {fold_no} - {method_name} {name}: Threshold: {thresh:.2f}, F1: {f1:.4f}, AUC: {auc:.4f}")


In [None]:
from sklearn.calibration import calibration_curve
import matplotlib.pyplot as plt

def plot_calibration_curve(y_true, y_probs, title):
    prob_true, prob_pred = calibration_curve(y_true, y_probs, n_bins=10, strategy='uniform')
    plt.plot(prob_pred, prob_true, marker='o', label='Model')
    plt.plot([0, 1], [0, 1], 'k--', label='Perfectly calibrated')
    plt.title(f"Calibration Curve - {title}")
    plt.xlabel("Mean Predicted Probability")
    plt.ylabel("Fraction of Positives")
    plt.legend()
    plt.grid()
    plt.savefig("calibrations.png")
    plt.show()

plot_calibration_curve(true_fert, fert_probs, "Fertilisation")
plt.savefig("calibrationFert.png")
plot_calibration_curve(true_blast, blast_probs, "Blastulation")

In [1]:
!git commit -m "Update model for classification"
!git push

On branch master
Your branch is up to date with 'origin/master'.

Changes not staged for commit:
  (use "git add <file>..." to update what will be committed)
  (use "git restore <file>..." to discard changes in working directory)
	modified:   Predict_Fertalisation.ipynb

Untracked files:
  (use "git add <file>..." to include in what will be committed)
	.ipynb_checkpoints/
	Simulated Data.xlsx
	Untitled.ipynb
	calibrationFert.png
	calibrations.png
	my_prediction_plot.png

no changes added to commit (use "git add" and/or "git commit -a")


Everything up-to-date
