In [17]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam
import joblib

In [3]:
data = pd.read_csv('raw_data/AllPos_acdc_output_bf_260_frames.csv')
relevant_data = data[['time_minutes', 'cell_area_um2', 'division_frame_i']].dropna()

scaler = MinMaxScaler()
relevant_data['cell_area_um2'] = scaler.fit_transform(relevant_data[['cell_area_um2']])

relevant_data['division_label'] = relevant_data['division_frame_i'].apply(lambda x: 0 if x == -1 else 1)

In [4]:
def remove_post_division(data):
    divided_frames = set()  
    filtered_rows = []

    for idx, row in data.iterrows():
        division_frame = row['division_frame_i']
        if division_frame != -1 and division_frame in divided_frames:
            continue
        if division_frame != -1:
            divided_frames.add(division_frame) 
        filtered_rows.append(row)
    
    return pd.DataFrame(filtered_rows)

cleaned_data = remove_post_division(relevant_data)

In [5]:
def create_sequences(data, sequence_length=10):
    X, y = [], []
    for i in range(len(data) - sequence_length):
        seq_x = data['cell_area_um2'].iloc[i:i+sequence_length].values
        seq_y = data['division_label'].iloc[i+sequence_length]
        X.append(seq_x)
        y.append(seq_y)
    return np.array(X), np.array(y)

sequence_length = 10
X, y = create_sequences(cleaned_data, sequence_length)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
model = Sequential([
    LSTM(64, input_shape=(sequence_length, 1), return_sequences=False),
    Dense(1, activation='sigmoid') 
])
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

  super().__init__(**kwargs)


In [7]:
X_train = X_train.reshape((-1, sequence_length, 1))
X_test = X_test.reshape((-1, sequence_length, 1))
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.1)

Epoch 1/20
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step - accuracy: 0.9823 - loss: 0.3702 - val_accuracy: 0.9866 - val_loss: 0.0692
Epoch 2/20
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9824 - loss: 0.0873 - val_accuracy: 0.9866 - val_loss: 0.0667
Epoch 3/20
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9862 - loss: 0.0718 - val_accuracy: 0.9866 - val_loss: 0.0667
Epoch 4/20
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9820 - loss: 0.0911 - val_accuracy: 0.9866 - val_loss: 0.0672
Epoch 5/20
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9821 - loss: 0.0885 - val_accuracy: 0.9866 - val_loss: 0.0677
Epoch 6/20
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9828 - loss: 0.0862 - val_accuracy: 0.9866 - val_loss: 0.0664
Epoch 7/20
[1m84/84[0m [32m━━━━━━━━━━

In [8]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss}, Test Accuracy: {accuracy}")

[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9787 - loss: 0.1043
Test Loss: 0.09812430292367935, Test Accuracy: 0.9798927903175354


In [18]:
def predict_division(cell_size_sequence):
    normalized_sequence = scaler.transform(np.array(cell_size_sequence).reshape(-1, 1))
    normalized_sequence = normalized_sequence.reshape(1, sequence_length, 1)
    return model.predict(normalized_sequence)[0][0]

model.save('lstm_cell_division_model.h5')
joblib.dump(scaler, 'scaler_cellarea.pkl')



['scaler_cellarea.pkl']