In [None]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas as pd
import os
import random
tf.keras.backend.set_floatx('float32')

**Read data and create arrays**

In [None]:
sorted_concatenated_csv = "50x50.csv"
data = pd.read_csv(sorted_concatenated_csv)
#data.drop(columns=['zCOM'], inplace=True)
print(data.head())
data['time'] = (data['mcsteps'] / 10000).astype(int)
data = data[['time'] + [col for col in data.columns if col != 'time']]
data.drop(columns=['mcsteps'], inplace=True)
print(data)
cytokine_columns = ['il8', 'il1', 'il6', 'il10', 'tnf', 'tgf']
smallest_values = data[cytokine_columns].min()
largest_values = data[cytokine_columns].max()

print("Smallest values for each cytokine:")
print(smallest_values)
print("\nLargest values for each cytokine:")
print(largest_values)
#def replace_negative_with_zero(data):
 #   num_negative_values = (data < 0).sum().sum()
  #  data[data < 0] = 0

   # return num_negative_values

#cytokine_columns = ['il8', 'il1', 'il6', 'il10', 'tnf', 'tgf']

#for col in cytokine_columns:
 #   num_negatives = replace_negative_with_zero(data[col])
  #  print(f"Number of negative values replaced with 0 in '{col}': {num_negatives}")

# define cytokines
cytokines = ['il8', 'il1', 'il6', 'il10', 'tnf', 'tgf']

# Remove brackets and convert to float
for col in cytokines:
    data[col] = data[col].str.strip('[]').astype(float)

# get unique time values
unique_time = data['time'].unique()

arrays = {}

# iterate over unique time values
for time in unique_time:
    # filter data for current value of time
    data_time = data[data['time'] == time]
    
    # initialize 50x50x6 array for current value of time
    array = np.zeros((50, 50, len(cytokines)))
    
    # get X and Y coordinates
    x = data_time['xCOM'].astype(int)
    y = data_time['yCOM'].astype(int)
    
    # get cytokine concentrations
    concentrations = data_time[['il8', 'il1', 'il6', 'il10', 'tnf', 'tgf']].values
    
    # assign cytokine concentrations to corresponding position in array
    array[x, y, :] = concentrations
    
    # store array for current value of time
    arrays[time] = array

**Extract CellType from LatticeData**

In [None]:
def read_vtk_file(filename):
    with open(filename, 'r') as file:
        lines = file.readlines()
    
    dimensions = None
    cell_type_data_start = None
    for i, line in enumerate(lines):
        if line.startswith("DIMENSIONS"):
            dimensions = list(map(int, line.split()[1:]))
        elif line.startswith("CellType"):
            cell_type_data_start = i + 1
            break
    
    if dimensions is None or cell_type_data_start is None:
        raise ValueError("Invalid VTK file format")
    
    grid_data = []
    data_lines = lines[cell_type_data_start:]
    for line in data_lines:
        if line.strip() and not line.startswith("FIELD"):
            try:
                grid_data.extend(map(int, line.split()))
                if len(grid_data) >= dimensions[0] * dimensions[1]:
                    break
            except ValueError:
                continue  # Skip lines that can't be converted to int
    
    expected_size = dimensions[0] * dimensions[1]
    if len(grid_data) != expected_size:
        raise ValueError(f"Data size {len(grid_data)} does not match expected size {expected_size}")

    grid_data = np.array(grid_data[:expected_size]).reshape((dimensions[0], dimensions[1], 1))
    return grid_data

def process_vtk_files(directory):
    vtk_arrays = []
    for filename in sorted(os.listdir(directory)):
        if filename.endswith(".vtk"):
            filepath = os.path.join(directory, filename)
            grid_data = read_vtk_file(filepath)
            vtk_arrays.append(grid_data)
    return vtk_arrays

directory = "neural-agent-models/data/LatticeData/LatticeData(50x50)"
vtk_arrays = process_vtk_files(directory)

# Check the 3D array for a specific coordinate and timestep
timestep = 0  # Timestep index
x, y = 31, 31  # Coordinates to check
cell_type = vtk_arrays[timestep][x, y, 0]
print(f"The cell type at ({x}, {y}) at timestep {timestep} is {cell_type}")

# The vtk_arrays will contain all the 3D arrays for each timestep

In [None]:
print(vtk_arrays[0][31,31])

**Create binary variables from extracted CellType**

In [None]:
def check_cell_presence(vtk_arrays, start_idx, end_idx):
    cellpresente = []
    cellpresentndn = []
    cellpresentna = []
    cellpresentm1 = []
    cellpresentm2 = []

    for i in range(start_idx, end_idx + 1):
        array = vtk_arrays[i].reshape(50, 50)
        cellpresente.append(1 if np.any(array == 1) else 0)
        cellpresentndn.append(1 if np.any(array == 2) else 0)
        cellpresentna.append(1 if np.any(array == 5) else 0)
        cellpresentm1.append(1 if np.any(array == 8) else 0)
        cellpresentm2.append(1 if np.any(array == 9) else 0)
    return cellpresente, cellpresentndn, cellpresentna, cellpresentm1, cellpresentm2

# Adjust the range to 0-100
cellpresente, cellpresentndn, cellpresentna, cellpresentm1, cellpresentm2 = check_cell_presence(vtk_arrays, 0, 100)

# Output the results
for t in range(0, 101):
    idx = t  # Index directly corresponds to the timestep
    print(f"Timestep {t}:")
    print(f"  cellpresente: {cellpresente[idx]}")
    print(f"  cellpresentndn: {cellpresentndn[idx]}")
    print(f"  cellpresentna: {cellpresentna[idx]}")
    print(f"  cellpresentm1: {cellpresentm1[idx]}")
    print(f"  cellpresentm2: {cellpresentm2[idx]}")

output_dir = "neural-agent-models/data/PINN/Results"
os.makedirs(output_dir, exist_ok=True)

results = np.column_stack((range(0, 101), cellpresente, cellpresentndn, cellpresentna, cellpresentm1, cellpresentm2))
np.savetxt(os.path.join(output_dir, 'cell_presence_results.csv'), results, delimiter=',', header='Timestep,cellpresente,cellpresentndn,cellpresentna,cellpresentm1,cellpresentm2', comments='', fmt='%d')