#### Boilerplate

In [None]:
%cd CellModeller-ingallslab
%pip install -e . --use-pep517
#%cd CellProfilerAnalysis/
#%pip install - e . --use-pep517
#%pip install CellProfiler


In [2]:
import numpy as np
import pandas as pd


def make_numpy_array(pickle_to_dict):
    # Definition of dictionary
    property_dict = {'time': [], 'id': [], 'parent': [], 'label': [],
                     'cellType': [], 'divideFlag': [], 'cellAge': [], 'growthRate': [], 'LifeHistory': [],
                     'startVol': [], 'targetVol': [], 'pos': [], 'radius': [], 'length': [], 'dir': [],
                     'ends0': [], 'ends1': [], 'strainRate': [], 'strainRate_rolling': []}

    # Fill the dictionary
    for key in pickle_to_dict['cellStates'].keys():
        cell_state = pickle_to_dict['cellStates'][key]

        # Append values to dictionary, assign zero if attribute not present
        property_dict['time'].append(cell_state.time)
        property_dict['id'].append(cell_state.id)
        property_dict['label'].append(cell_state.label)
        property_dict['cellType'].append(cell_state.cellType)
        property_dict['divideFlag'].append(cell_state.divideFlag)
        property_dict['cellAge'].append(cell_state.cellAge)
        property_dict['growthRate'].append(cell_state.growthRate)

        # Handle 'LifeHistory' attribute
        # If not present, assign value 0
        if hasattr(cell_state, 'LifeHistory'):
            property_dict['LifeHistory'].append(cell_state.LifeHistory)
        else:
            property_dict['LifeHistory'].append(0)

        property_dict['startVol'].append(cell_state.startVol)
        property_dict['targetVol'].append(cell_state.targetVol)
        property_dict['pos'].append(
            np.sqrt(np.sum(np.power(cell_state.pos, 2))))
        property_dict['radius'].append(cell_state.radius)
        property_dict['length'].append(cell_state.length)
        property_dict['dir'].append(np.arctan2(
            cell_state.dir[1], cell_state.dir[0]))
        property_dict['ends0'].append(
            np.sqrt(np.sum(np.power(cell_state.ends[0], 2))))
        property_dict['ends1'].append(
            np.sqrt(np.sum(np.power(cell_state.ends[1], 2))))
        property_dict['strainRate'].append(cell_state.strainRate)
        property_dict['strainRate_rolling'].append(
            cell_state.strainRate_rolling)

    # Structure of 'lineage': id : parent id
    # If no parent, assign value 0
    for bac_id in property_dict['id']:
        if bac_id in pickle_to_dict['lineage']:
            property_dict['parent'].append(pickle_to_dict['lineage'][bac_id])
        else:
            property_dict['parent'].append(0)

    # Convert dictionary to pandas DataFrame
    df_bacteria = pd.DataFrame.from_dict(property_dict)

    # Replacing NaN values with 0
    df_bacteria.fillna(0, inplace=True)

    # Convert all columns to float
    df_bacteria = df_bacteria.astype(float)

    return df_bacteria


In [3]:
import os
import torch
pickle_file = '/home/stormageddon/MITACS/test/why/adh_0.1_27/step-00120.pickle'


pickle_to_dict = np.load(pickle_file, allow_pickle=True)
df = make_numpy_array(pickle_to_dict)

# Convert DataFrame to PyTorch tensor
tensor = torch.tensor(df.values)

# write to csv
#df.to_csv('input features.csv', index=False)

print(tensor.shape)


torch.Size([32, 19])


In [4]:
import os
import torch
import numpy as np

# Define the directory containing the pickle files
directory = '/home/stormageddon/MITACS/test/why/adh_0.01_27/'

# Get the list of pickle files in the directory
pickle_files = [file for file in os.listdir(
    directory) if file.endswith('.pickle')]

# Initialize an empty list to store the tensor for each pickle file
tensor_list = []

# Process each pickle file
for pickle_file in pickle_files:
    # Load the pickle file and convert it to a dictionary
    pickle_to_dict = np.load(os.path.join(
        directory, pickle_file), allow_pickle=True)
    # Assuming you have a function to convert the dictionary to a DataFrame
    df = make_numpy_array(pickle_to_dict)

    # Convert DataFrame to PyTorch tensor
    tensor = torch.tensor(df.values)

    # Append the tensor to the list
    tensor_list.append(tensor)

# Concatenate the tensors along the first dimension (num_pickles)
final_tensor = torch.cat(tensor_list, dim=0)

print(final_tensor.shape)


torch.Size([873, 19])


In [5]:
import os
import torch
import numpy as np

# Define the directory containing the pickle files
directory = '/home/stormageddon/MITACS/test/why/adh_0.01_28/'

# Get the list of pickle files in the directory
pickle_files = [file for file in os.listdir(
    directory) if file.endswith('.pickle')]

# Initialize an empty list to store the tensor for each pickle file
tensor_list = []

# Process each pickle file
for pickle_file in pickle_files:
    # Load the pickle file and convert it to a dictionary
    pickle_to_dict = np.load(os.path.join(
        directory, pickle_file), allow_pickle=True)
    # Assuming you have a function to convert the dictionary to a DataFrame
    df = make_numpy_array(pickle_to_dict)

    # Convert DataFrame to PyTorch tensor
    tensor = torch.tensor(df.values)

    # Append the tensor to the list
    tensor_list.append(tensor)

# Concatenate the tensors along the first dimension (num_pickles)
final_tensor2 = torch.cat(tensor_list, dim=0)

print(final_tensor2.shape)


torch.Size([879, 19])


In [6]:
import os
import torch
import numpy as np

# Define the directory containing the pickle files
directory = '/home/stormageddon/MITACS/test/why/adh_0.01_24/'

# Get the list of pickle files in the directory
pickle_files = [file for file in os.listdir(
    directory) if file.endswith('.pickle')]

# Initialize an empty list to store the tensor for each pickle file
tensor_list = []

# Process each pickle file
for pickle_file in pickle_files:
    # Load the pickle file and convert it to a dictionary
    pickle_to_dict = np.load(os.path.join(
        directory, pickle_file), allow_pickle=True)
    # Assuming you have a function to convert the dictionary to a DataFrame
    df = make_numpy_array(pickle_to_dict)

    # Convert DataFrame to PyTorch tensor
    tensor = torch.tensor(df.values)

    # Append the tensor to the list
    tensor_list.append(tensor)

# Concatenate the tensors along the first dimension (num_pickles)
final_tensor3 = torch.cat(tensor_list, dim=0)

print(final_tensor3.shape)


torch.Size([1121, 19])


In [7]:
max_num_obs = max(final_tensor.shape[0], final_tensor2.shape[0], final_tensor3.shape[0])

In [8]:
padded_tensor1 = torch.cat((final_tensor, torch.zeros(max_num_obs - final_tensor.shape[0], final_tensor.shape[1])), dim=0)
padded_tensor2 = torch.cat((final_tensor2, torch.zeros(max_num_obs - final_tensor2.shape[0], final_tensor2.shape[1])), dim=0)
padded_tensor3 = torch.cat((final_tensor3, torch.zeros(max_num_obs - final_tensor3.shape[0], final_tensor3.shape[1])), dim=0)

In [9]:
array = torch.stack((padded_tensor1, padded_tensor2, padded_tensor3), dim=0)
array.shape

torch.Size([3, 1121, 19])

In [12]:
import torch
import torch.nn as nn


class Autoencoder(nn.Module):
    def __init__(self, input_shape, latent_dim):
        super(Autoencoder, self).__init__()
        self.encoder_cnn = nn.Conv1d(
            input_shape[2], 32, kernel_size=3, padding=1)
        self.encoder_lstm = nn.LSTM(32, latent_dim, batch_first=True)
        self.decoder_lstm = nn.LSTM(latent_dim, 32, batch_first=True)
        self.decoder_cnn = nn.ConvTranspose1d(
            32, input_shape[2], kernel_size=3, padding=1)

    def forward(self, x):
        #print(x.shape)
        x = self.encoder_cnn(x.transpose(1, 2))
        #print(x.shape)
        x = x.transpose(1, 2)
        #print(x.shape)
        _, (h, _) = self.encoder_lstm(x)
        #print(h.shape)
        x = h.repeat(1, x.size(1), 1)
        #print(x.shape)
        x, _ = self.decoder_lstm(x)
        #print(x.shape)
        x = self.decoder_cnn(x.transpose(1, 2))
        #print(x.shape)
        x = x.transpose(1, 2)
        #print(x.shape)
        x = x.view(3, 1121,19
                   )

        return x




In [None]:
import torch.optim as optim
array=array.float()
losss=[]
input_shape = (3, 1121, 19)
latent_dim = 64
model = Autoencoder(input_shape, latent_dim)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.1)
input_tensor = torch.randn(input_shape).float()
num_epochs = 1000
for epoch in range(num_epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(input_tensor)
    
    print(outputs.dtype)
    loss= criterion(outputs, array)
    print(loss)
    loss.backward()
    optimizer.step()

    # Print the loss for monitoring
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item()}")


In [22]:
import os
import torch
import numpy as np
import torch.nn.functional as F

# Define the directory containing the folders
directory = '/home/stormageddon/MITACS/test/why'

# Get the list of folders in the directory
folders = [folder for folder in os.listdir(
    directory) if os.path.isdir(os.path.join(directory, folder))]

# Initialize an empty list to store the final tensors
final_tensors = []

# Process each folder
for folder in folders:
    folder_path = os.path.join(directory, folder)

    # Get the list of pickle files in the folder
    pickle_files = [file for file in os.listdir(
        folder_path) if file.endswith('.pickle')]

    # Initialize an empty list to store the tensor for each pickle file in the folder
    tensor_list = []
    count_large=0

    # Process each pickle file in the folder
    for pickle_file in pickle_files:
        # Load the pickle file and convert it to a dictionary
        pickle_to_dict = np.load(os.path.join(
            folder_path, pickle_file), allow_pickle=True)
        # Assuming you have a function to convert the dictionary to a DataFrame
        df = make_numpy_array(pickle_to_dict)

        # Convert DataFrame to PyTorch tensor
        tensor = torch.tensor(df.values)

        # Append the tensor to the list
        tensor_list.append(tensor)

    # Find the maximum number of observations in tensor_list
    max_num_obs = max([tensor.shape[0] for tensor in tensor_list])

    # Pad tensors with zeros to match the maximum number of observations
    padded_tensors = []
    for tensor in tensor_list:
        pad_shape = (max_num_obs - tensor.shape[0], tensor.shape[1])
        padded_tensor = F.pad(tensor, (0, 0, 0, pad_shape[0]))
        padded_tensors.append(padded_tensor)

    # Concatenate the padded tensors along the first dimension (num_pickles) in the folder
    final_tensor = torch.cat(padded_tensors, dim=0)

    # Append the padded tensor to the list of final tensors
    final_tensors.append(final_tensor)

# Pad the final tensors in final_tensors to have the same number of rows
max_num_rows = max([tensor.shape[0] for tensor in final_tensors])
print(sum([tensor.shape[0] for tensor in final_tensors])/len(final_tensors))
padded_final_tensors = []
for tensor in final_tensors:
    pad_shape = (max_num_rows - tensor.shape[0], tensor.shape[1])
    padded_tensor = F.pad(tensor, (0, 0, 0, pad_shape[0]))
    padded_final_tensors.append(padded_tensor)

# Stack the final tensors along a new dimension (num_folders)
array = torch.stack(padded_final_tensors, dim=0)


5053.916417910447
0


In [24]:
import os
import torch
import numpy as np
import gzip

# Define the root directory containing the folders



# Save the final tensor array in a compressed format
output_file = '/home/stormageddon/MITACS/test/compressed_input.pt.gz'
with gzip.open(output_file, 'wb') as f:
    torch.save(array, f)

print(array.shape)
print("Final tensor saved in compressed format.")


torch.Size([335, 6144, 19])
Final tensor saved in compressed format.


In [25]:
import torch
import torch.nn as nn


class FeatureExtractor(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(FeatureExtractor, self).__init__()

        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # x: (batch_size, sequence_length, input_size)
        _, (h_n, _) = self.lstm(x)
        # h_n: (1, batch_size, hidden_size)
        features = self.fc(h_n.squeeze(0))
        # features: (batch_size, output_size)
        return features


In [29]:
# Define the input size, hidden size, and output size
input_size = 19  # Number of features in the input
hidden_size = 128  # Size of the LSTM hidden state
output_size = 64  # Size of the extracted features

# Instantiate the feature extraction model
model = FeatureExtractor(input_size, hidden_size, output_size)

# Generate random input data (replace with your actual time series data)
batch_size = 335  # Number of simulations
sequence_length = 6144  # Number of time steps
input_data = array.float()


# Pass the input data through the model
output = model(input_data)

print(output.shape)


torch.Size([335, 64])
