In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import load_model
import joblib

In [2]:
model = load_model('lstm_cell_division_model.h5')
scaler = joblib.load('scaler_cellarea.pkl')



In [7]:
def preprocess_csv(csv_path):
    data = pd.read_csv(csv_path)
    
    required_columns = ['time_minutes', 'cell_area_um2', 'division_frame_i']
    if not all(col in data.columns for col in required_columns):
        raise ValueError(f"CSV must contain columns: {', '.join(required_columns)}")
    
    data['cell_area_um2'] = scaler.transform(data[['cell_area_um2']])
    
    data['division_label'] = data['division_frame_i'].apply(lambda x: 0 if x == -1 else 1)
    
    divided_frames = set()
    filtered_rows = []
    for idx, row in data.iterrows():
        division_frame = row['division_frame_i']
        if division_frame != -1 and division_frame in divided_frames:
            continue
        if division_frame != -1:
            divided_frames.add(division_frame)  
        filtered_rows.append(row)
  
    filtered_data = pd.DataFrame(filtered_rows)
    return filtered_data

In [8]:
def create_sequences(data, sequence_length=10):
    X = []
    for i in range(len(data) - sequence_length):
        seq_x = data['cell_area_um2'].iloc[i:i+sequence_length].values
        X.append(seq_x)
    return np.array(X)

In [9]:
def predict_division_probability(csv_path, sequence_length=10):
    processed_data = preprocess_csv(csv_path)
    
    X = create_sequences(processed_data, sequence_length)
    
    X_reshaped = X.reshape((-1, sequence_length, 1))
    
    probabilities = model.predict(X_reshaped)
    
    return probabilities

In [10]:
csv_path = 'raw_data/AllPos_acdc_output_bf_260_frames.csv'  
probabilities = predict_division_probability(csv_path)

print(probabilities)

[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[[0.00776172]
 [0.0066362 ]
 [0.00881035]
 ...
 [0.02001974]
 [0.02210603]
 [0.02501289]]
