**Imports**

In [49]:
import tensorflow as tf
import pandas as pd
import numpy as np
import os
import csv
import math
from datetime import datetime
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.layers import Embedding, LSTM, Dense, Flatten, Reshape
from tensorflow.keras.models import Sequential


**CUDA**

In [15]:
#os.environ["CUDA_VISIBLE_DEVICES"] = "0"
print(tf.config.list_physical_devices('GPU'))

[]


# **Data Preparation**

**Convert .txt to .csv**

In [None]:
input_folder = "C:/Users/Ioannis/Documents/UvA thesis/UvA-thesis/data/concentrations_txt/S8"
output_folder = "C:/Users/Ioannis/Documents/UvA thesis/UvA-thesis/data/concentrations/S8"

# create output folder if it doesn't exist
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# iterate through files in input folder
for filename in os.listdir(input_folder):
    if filename.endswith(".txt"):
        try:
            # construct input and output file paths
            input_filepath = os.path.join(input_folder, filename)
            output_filepath = os.path.join(output_folder, os.path.splitext(filename)[0] + ".csv")

            # read txt file and remove leading and trailing quotation marks from each line
            with open(input_filepath, 'r') as file:
                lines = [line.strip().strip('"') for line in file.readlines()]

            # convert to df and save as csv
            df = pd.DataFrame([line.split(',') for line in lines])
            df.to_csv(output_filepath, index=False, header=False, quoting=csv.QUOTE_NONE, escapechar=' ')
            print(f"Converted {input_filepath} to {output_filepath}")
        except Exception as e:
            print(f"Error converting {input_filepath}: {e}")


**Merge data in batches**

In [40]:
# Define file paths
cellcounts_folder = 'C:/Users/Ioannis/Documents/UvA thesis/UvA-thesis/data/cellcounts'
base_cytokine_folder = 'C:/Users/Ioannis/Documents/UvA thesis/UvA-thesis/data/concentrations'
output_folder = 'C:/Users/Ioannis/Documents/UvA thesis/UvA-thesis/data/merged_data'

if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# Define batch size
cellcounts_batch_size = 8
cytokine_batch_size = 101

# Iterate over batches
for batch_index in range(1, cellcounts_batch_size + 1):
    print(f"Processing batch {batch_index}")

    # Get cellcounts file for the current batch
    cellcounts_file = os.path.join(cellcounts_folder, f'cellcount S{batch_index}.csv')
    print("Cellcounts file:", cellcounts_file)

    # Get cytokine subfolder for the current batch
    cytokine_subfolder = f'S{batch_index}'
    cytokine_folder = os.path.join(base_cytokine_folder, cytokine_subfolder)

    # Read cellcounts data
    df_cellcounts_batch = pd.read_csv(cellcounts_file)
    print("Cellcounts batch shape:", df_cellcounts_batch.shape)

    # Initialize an empty DataFrame to store cytokine data
    df_cytokine_batch = pd.DataFrame()

    # Iterate over cytokine files in the subfolder
    for cytokine_file in os.listdir(cytokine_folder):
        if cytokine_file.endswith('.csv'):
            file_path = os.path.join(cytokine_folder, cytokine_file)
            df_cytokine = pd.read_csv(file_path)
            df_cytokine_batch = pd.concat([df_cytokine_batch, df_cytokine], ignore_index=True)

    print("Cytokine batch shape:", df_cytokine_batch.shape)

    # Merge cellcounts and cytokine data
    merged_data = pd.merge(df_cellcounts_batch, df_cytokine_batch, on='mcsteps')
    print("Merged data shape:", merged_data.shape)

    # Save merged data to a new CSV file
    output_filename = os.path.join(output_folder, f'combined_data_batch_{batch_index}.csv')
    merged_data.to_csv(output_filename, index=False)
    print(f"Saved merged data to {output_filename}")

Processing batch 1
Cellcounts file: C:/Users/Ioannis/Documents/UvA thesis/UvA-thesis/data/cellcounts\cellcount S1.csv
Cellcounts batch shape: (101, 11)
Cytokine batch shape: (74758, 10)
Merged data shape: (74758, 20)
Saved merged data to C:/Users/Ioannis/Documents/UvA thesis/UvA-thesis/data/merged_data\combined_data_batch_1.csv
Processing batch 2
Cellcounts file: C:/Users/Ioannis/Documents/UvA thesis/UvA-thesis/data/cellcounts\cellcount S2.csv
Cellcounts batch shape: (101, 11)
Cytokine batch shape: (83259, 10)
Merged data shape: (83259, 20)
Saved merged data to C:/Users/Ioannis/Documents/UvA thesis/UvA-thesis/data/merged_data\combined_data_batch_2.csv
Processing batch 3
Cellcounts file: C:/Users/Ioannis/Documents/UvA thesis/UvA-thesis/data/cellcounts\cellcount S3.csv
Cellcounts batch shape: (101, 11)
Cytokine batch shape: (123611, 10)
Merged data shape: (123611, 20)
Saved merged data to C:/Users/Ioannis/Documents/UvA thesis/UvA-thesis/data/merged_data\combined_data_batch_3.csv
Processi

**Load data to cover all initializations and drop zCOM column as we have 2D spatial data**

In [3]:
data_frames = []
for i in range(1, 8):
    file_path = f'C:/Users/Ioannis/Documents/UvA thesis/UvA-thesis/data/merged_data/combined_data_batch_{i}.csv'
    df = pd.read_csv(file_path)
    df.drop(columns=['zCOM'], inplace=True)  # Drop the 'zCOM' column
    print(df.shape)
    print(df.head())
    data_frames.append(df)

(74758, 19)
   mcsteps     1       2      3      4     5    6      7    8    9    10  \
0        0  10.0  1072.0  972.0  109.0  25.0  0.0  121.0  0.0  0.0  29.0   
1        0  10.0  1072.0  972.0  109.0  25.0  0.0  121.0  0.0  0.0  29.0   
2        0  10.0  1072.0  972.0  109.0  25.0  0.0  121.0  0.0  0.0  29.0   
3        0  10.0  1072.0  972.0  109.0  25.0  0.0  121.0  0.0  0.0  29.0   
4        0  10.0  1072.0  972.0  109.0  25.0  0.0  121.0  0.0  0.0  29.0   

   xCOM  yCOM           il8  il1  il6  il10  tnf  tgf  
0    84   376  8.526701e-10  0.0  0.0   0.0  0.0  0.0  
1   432   181  8.526701e-10  0.0  0.0   0.0  0.0  0.0  
2   409   105  8.526701e-10  0.0  0.0   0.0  0.0  0.0  
3   247   394  8.526701e-10  0.0  0.0   0.0  0.0  0.0  
4   132   141  8.526701e-10  0.0  0.0   0.0  0.0  0.0  
(83259, 19)
   mcsteps      1       2      3      4     5    6      7    8    9    10  \
0        0  100.0  1072.0  969.0  110.0  26.0  0.0  125.0  0.0  0.0  28.0   
1        0  100.0  1072.0  96

# **Model**

In [None]:
with tf.device('/GPU:0'):
 class Model(tf.keras.Model):
    def __init__(self, model_path, train_mode=True, input_dim=19, lstm_size=500, batch_size=4, e_learning_rate=1e-5):
        super(Model, self).__init__()
        self.model_path = model_path
        self.train_mode = train_mode
        self.input_dim = input_dim
        self.lstm_size = lstm_size
        self.batch_size = batch_size
        self.e_learning_rate = e_learning_rate

        #define LSTM layer
        self.lstm_layer = tf.keras.layers.LSTM(units=self.lstm_size, return_sequences=True)

        #define output layer
        self.output_layer = tf.keras.layers.Dense(units=2, activation=None)
        # Define reshape layer
        self.reshape_layer = tf.keras.layers.Reshape((500, 500))  # Reshape to match spatial dimensions

        #define optimizer
        self.optimizer = tf.keras.optimizers.Adam(learning_rate=self.e_learning_rate)

        #define MSE as loss function
        self.loss_fn = tf.keras.losses.MeanSquaredError()

        
        
    def call(self, inputs):
        # Reshape inputs to add sequence length dimension
        inputs = tf.expand_dims(inputs, axis=1)
        
        # Input shape: (batch_size, sequence_length, input_dim)
        x = self.lstm_layer(inputs)
        # Output shape: (batch_size, sequence_length, lstm_size)
        output = self.output_layer(x)
        # Output shape: (batch_size, sequence_length, 2) - 2 for outputs
        
        # Reshape output to match spatial dimensions
        output = self.reshape_layer(output)
        return output

    def train_step(self, xtrain, ytrain):
        with tf.GradientTape() as tape:
            y_pred = self(xtrain, training=True)
            loss = self.loss_fn(ytrain, y_pred)
        gradients = tape.gradient(loss, self.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
        return loss

    def train(self, train_set, valid_set, maxEpoch=10):
        x_train, y_train = train_set
        x_valid, y_valid = valid_set
        
        for epoch in range(maxEpoch):
            train_loss = self.train_step(x_train, y_train)
            valid_loss = self.loss_fn(y_valid, self(x_valid, training=False))
            print(f"Epoch {epoch + 1}, Train Loss: {train_loss}, Valid Loss: {valid_loss}")



if __name__ == "__main__":

    df1 = data_frames[0]
    
    #convert the df to numpy array and cast the data to float
    results = df1.to_numpy(dtype='float')

    #define input indices and output indices
    input_indices = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]
    output_indices = [11, 12] 

   # Split data into train and valid sets
    train_size = int(len(results) * 0.9)
    train_features = results[:train_size, input_indices]
    train_targets = results[:train_size, output_indices]
    valid_features = results[train_size:, input_indices]
    valid_targets = results[train_size:, output_indices]

    # Create train and valid sets with input features and targets
    train_set = (train_features, train_targets)
    valid_set = (valid_features, valid_targets)

    #initialize and train the model
    mymodel = Model(model_path="saved_model")
    mymodel.train(train_set, valid_set, maxEpoch=500)   


In [66]:
input_shape = (1, 19)
output_shape = 2
embedding_size = 32 
#num_unique_locations = 250000 #as 500x500 spatial data
num_unique_locations = len(set(df1.iloc[:, 11].unique()) | set(df1.iloc[:, 12].unique()))

model = Sequential([
    Embedding(input_dim=num_unique_locations, output_dim=embedding_size, input_length=input_shape[1]),
    #Flatten(), 
    LSTM(units=64, return_sequences=False),
    Dense(units=output_shape)
])
model.summary()

X = df1.iloc[:, :19]
Y = df1.iloc[:, 11:13]

X_reshaped = X.values  # Remove the extra dimension

model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
history = model.fit(X_reshaped, Y, epochs=100, batch_size=32, validation_split=0.1, verbose=2)

# Predict on the training data
y_pred = model.predict(X_reshaped)

# Round the predictions to the nearest integer
y_pred_rounded = np.round(y_pred)

Model: "sequential_12"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_12 (Embedding)    (None, 19, 32)            2295776   
                                                                 
 lstm_28 (LSTM)              (None, 64)                24832     
                                                                 
 dense_28 (Dense)            (None, 2)                 130       
                                                                 
Total params: 2320738 (8.85 MB)
Trainable params: 2320738 (8.85 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/100


InvalidArgumentError: Graph execution error:

Detected at node 'sequential_12/embedding_12/embedding_lookup' defined at (most recent call last):
    File "c:\Program Files\Python38\lib\runpy.py", line 192, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "c:\Program Files\Python38\lib\runpy.py", line 85, in _run_code
      exec(code, run_globals)
    File "C:\Users\Ioannis\AppData\Roaming\Python\Python38\site-packages\ipykernel_launcher.py", line 18, in <module>
      app.launch_new_instance()
    File "C:\Users\Ioannis\AppData\Roaming\Python\Python38\site-packages\traitlets\config\application.py", line 1075, in launch_instance
      app.start()
    File "C:\Users\Ioannis\AppData\Roaming\Python\Python38\site-packages\ipykernel\kernelapp.py", line 739, in start
      self.io_loop.start()
    File "C:\Users\Ioannis\AppData\Roaming\Python\Python38\site-packages\tornado\platform\asyncio.py", line 205, in start
      self.asyncio_loop.run_forever()
    File "c:\Program Files\Python38\lib\asyncio\base_events.py", line 563, in run_forever
      self._run_once()
    File "c:\Program Files\Python38\lib\asyncio\base_events.py", line 1844, in _run_once
      handle._run()
    File "c:\Program Files\Python38\lib\asyncio\events.py", line 81, in _run
      self._context.run(self._callback, *self._args)
    File "C:\Users\Ioannis\AppData\Roaming\Python\Python38\site-packages\ipykernel\kernelbase.py", line 545, in dispatch_queue
      await self.process_one()
    File "C:\Users\Ioannis\AppData\Roaming\Python\Python38\site-packages\ipykernel\kernelbase.py", line 534, in process_one
      await dispatch(*args)
    File "C:\Users\Ioannis\AppData\Roaming\Python\Python38\site-packages\ipykernel\kernelbase.py", line 437, in dispatch_shell
      await result
    File "C:\Users\Ioannis\AppData\Roaming\Python\Python38\site-packages\ipykernel\ipkernel.py", line 359, in execute_request
      await super().execute_request(stream, ident, parent)
    File "C:\Users\Ioannis\AppData\Roaming\Python\Python38\site-packages\ipykernel\kernelbase.py", line 778, in execute_request
      reply_content = await reply_content
    File "C:\Users\Ioannis\AppData\Roaming\Python\Python38\site-packages\ipykernel\ipkernel.py", line 446, in do_execute
      res = shell.run_cell(
    File "C:\Users\Ioannis\AppData\Roaming\Python\Python38\site-packages\ipykernel\zmqshell.py", line 549, in run_cell
      return super().run_cell(*args, **kwargs)
    File "C:\Users\Ioannis\AppData\Roaming\Python\Python38\site-packages\IPython\core\interactiveshell.py", line 3009, in run_cell
      result = self._run_cell(
    File "C:\Users\Ioannis\AppData\Roaming\Python\Python38\site-packages\IPython\core\interactiveshell.py", line 3064, in _run_cell
      result = runner(coro)
    File "C:\Users\Ioannis\AppData\Roaming\Python\Python38\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "C:\Users\Ioannis\AppData\Roaming\Python\Python38\site-packages\IPython\core\interactiveshell.py", line 3269, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "C:\Users\Ioannis\AppData\Roaming\Python\Python38\site-packages\IPython\core\interactiveshell.py", line 3448, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "C:\Users\Ioannis\AppData\Roaming\Python\Python38\site-packages\IPython\core\interactiveshell.py", line 3508, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "C:\Users\Ioannis\AppData\Local\Temp\ipykernel_10588\3608765077.py", line 23, in <module>
      history = model.fit(X_reshaped, Y, epochs=100, batch_size=32, validation_split=0.1, verbose=2)
    File "C:\Users\Ioannis\AppData\Roaming\Python\Python38\site-packages\keras\src\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\Ioannis\AppData\Roaming\Python\Python38\site-packages\keras\src\engine\training.py", line 1742, in fit
      tmp_logs = self.train_function(iterator)
    File "C:\Users\Ioannis\AppData\Roaming\Python\Python38\site-packages\keras\src\engine\training.py", line 1338, in train_function
      return step_function(self, iterator)
    File "C:\Users\Ioannis\AppData\Roaming\Python\Python38\site-packages\keras\src\engine\training.py", line 1322, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\Ioannis\AppData\Roaming\Python\Python38\site-packages\keras\src\engine\training.py", line 1303, in run_step
      outputs = model.train_step(data)
    File "C:\Users\Ioannis\AppData\Roaming\Python\Python38\site-packages\keras\src\engine\training.py", line 1080, in train_step
      y_pred = self(x, training=True)
    File "C:\Users\Ioannis\AppData\Roaming\Python\Python38\site-packages\keras\src\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\Ioannis\AppData\Roaming\Python\Python38\site-packages\keras\src\engine\training.py", line 569, in __call__
      return super().__call__(*args, **kwargs)
    File "C:\Users\Ioannis\AppData\Roaming\Python\Python38\site-packages\keras\src\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\Ioannis\AppData\Roaming\Python\Python38\site-packages\keras\src\engine\base_layer.py", line 1150, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "C:\Users\Ioannis\AppData\Roaming\Python\Python38\site-packages\keras\src\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\Ioannis\AppData\Roaming\Python\Python38\site-packages\keras\src\engine\sequential.py", line 405, in call
      return super().call(inputs, training=training, mask=mask)
    File "C:\Users\Ioannis\AppData\Roaming\Python\Python38\site-packages\keras\src\engine\functional.py", line 512, in call
      return self._run_internal_graph(inputs, training=training, mask=mask)
    File "C:\Users\Ioannis\AppData\Roaming\Python\Python38\site-packages\keras\src\engine\functional.py", line 669, in _run_internal_graph
      outputs = node.layer(*args, **kwargs)
    File "C:\Users\Ioannis\AppData\Roaming\Python\Python38\site-packages\keras\src\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\Ioannis\AppData\Roaming\Python\Python38\site-packages\keras\src\engine\base_layer.py", line 1150, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "C:\Users\Ioannis\AppData\Roaming\Python\Python38\site-packages\keras\src\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\Ioannis\AppData\Roaming\Python\Python38\site-packages\keras\src\layers\core\embedding.py", line 272, in call
      out = tf.nn.embedding_lookup(self.embeddings, inputs)
Node: 'sequential_12/embedding_12/embedding_lookup'
indices[0,0] = 800000 is not in [0, 71743)
	 [[{{node sequential_12/embedding_12/embedding_lookup}}]] [Op:__inference_train_function_1369276]

In [62]:
print("Sample of input features (X):")
print(X.head())

print("\nSample of scaled target variables (Y):")
print(Y.head())

Sample of input features (X):
   mcsteps     1       2      3      4     5    6      7    8    9    10  \
0        0  10.0  1072.0  972.0  109.0  25.0  0.0  121.0  0.0  0.0  29.0   
1        0  10.0  1072.0  972.0  109.0  25.0  0.0  121.0  0.0  0.0  29.0   
2        0  10.0  1072.0  972.0  109.0  25.0  0.0  121.0  0.0  0.0  29.0   
3        0  10.0  1072.0  972.0  109.0  25.0  0.0  121.0  0.0  0.0  29.0   
4        0  10.0  1072.0  972.0  109.0  25.0  0.0  121.0  0.0  0.0  29.0   

   xCOM  yCOM           il8  il1  il6  il10  tnf  tgf  
0    84   376  8.526701e-10  0.0  0.0   0.0  0.0  0.0  
1   432   181  8.526701e-10  0.0  0.0   0.0  0.0  0.0  
2   409   105  8.526701e-10  0.0  0.0   0.0  0.0  0.0  
3   247   394  8.526701e-10  0.0  0.0   0.0  0.0  0.0  
4   132   141  8.526701e-10  0.0  0.0   0.0  0.0  0.0  

Sample of scaled target variables (Y):
   xCOM  yCOM
0    84   376
1   432   181
2   409   105
3   247   394
4   132   141
