In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import os
from datetime import datetime
from sklearn.preprocessing import MinMaxScaler

In [2]:
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [5]:
# Define file paths
cellcounts_folder = 'C:/Users/Ioannis/Documents/UvA thesis/UvA-thesis/data/cellcounts'
cytokine_folder = 'C:/Users/Ioannis/Documents/UvA thesis/UvA-thesis/data/concentrations'
output_folder = 'C:/Users/Ioannis/Documents/UvA thesis/UvA-thesis/data/merged_data'


# Get list of cellcounts files
cellcounts_files = sorted([os.path.join(cellcounts_folder, file) for file in os.listdir(cellcounts_folder)])

# Get list of cytokine concentration files
cytokine_files = sorted([os.path.join(cytokine_folder, file) for file in os.listdir(cytokine_folder)])

# Define batch size
batch_size = 1  # 1 cellcounts file per batch
cytokine_batch_size = 10  # 10 cytokine concentration files per batch

# Iterate over batches
for batch_index in range(len(cellcounts_files)):
    start_index = batch_index * batch_size
    end_index = min(start_index + batch_size, len(cellcounts_files))
    
    # Read cellcounts data
    cellcounts_data = [pd.read_csv(cellcounts_files[i]) for i in range(start_index, end_index)]
    
    # Read corresponding cytokine concentration files
    cytokine_batch_files = cytokine_files[batch_index * cytokine_batch_size : (batch_index + 1) * cytokine_batch_size]
    cytokine_data_batch = [pd.read_csv(file) for file in cytokine_batch_files]
    
    # Merge data based on 'mcstep' column
    merged_data = cellcounts_data[0]  # Assuming one cellcounts file per batch
    for cytokine_data in cytokine_data_batch:
        merged_data = pd.merge(merged_data, cytokine_data, on='mcstep', how='inner')
    
    # Save merged data to output folder
    output_filename = os.path.join(output_folder, f'merged_data_batch_{batch_index}.csv')
    merged_data.to_csv(output_filename, index=False)


PermissionError: [Errno 13] Permission denied: 'C:/Users/Ioannis/Documents/UvA thesis/UvA-thesis/data/concentrations\\S1'

In [3]:
#read csvs
df1 = pd.read_csv("C:/Users/Ioannis/Documents/UvA thesis/UvA-thesis/data/cellcount S6.csv")
df2 = pd.read_csv("C:/Users/Ioannis/Documents/UvA thesis/UvA-thesis/data/mean_concentration.csv")
df2.rename(columns={'meanconcen': 'mcsteps'}, inplace=True)

#merge dfs using mcsteps as common column
csvfile1 = pd.merge(df1, df2, on='mcsteps')

#save to new csv file
csvfile1.to_csv("combined_data.csv", index=False)

In [3]:
#read csv
df2 = pd.read_csv("C:/Users/Ioannis/Documents/UvA thesis/UvA-thesis/data/conc0.csv")
df3 = pd.read_csv("C:/Users/Ioannis/Documents/UvA thesis/UvA-thesis/data/conc1.csv")
df4 = pd.read_csv("C:/Users/Ioannis/Documents/UvA thesis/UvA-thesis/data/conc2.csv")

csvfile1 = pd.concat([df2, df3, df4], ignore_index=True)
#print(csvfile1)
csvfile1.to_csv("testdata.csv", index=False)

#display the first few rows of df
print(csvfile1.head())
print(csvfile1.info())
print(csvfile1.describe())

First few rows of the DataFrame:
   mcsteps  xCOM  yCOM  zCOM           il8  il1  il6  il10  tnf  tgf
0        0   242   356     0  8.625129e-10  0.0  0.0   0.0  0.0  0.0
1        0    92   241     0  8.638730e-10  0.0  0.0   0.0  0.0  0.0
2        0   228   230     0  8.541914e-10  0.0  0.0   0.0  0.0  0.0
3        0    89   420     0  8.535352e-10  0.0  0.0   0.0  0.0  0.0
4        0   214   406     0  8.612076e-10  0.0  0.0   0.0  0.0  0.0

Information about the DataFrame:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15748 entries, 0 to 15747
Data columns (total 10 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   mcsteps  15748 non-null  int64  
 1   xCOM     15748 non-null  int64  
 2   yCOM     15748 non-null  int64  
 3   zCOM     15748 non-null  int64  
 4   il8      15748 non-null  float64
 5   il1      15748 non-null  float64
 6   il6      15748 non-null  float64
 7   il10     15748 non-null  float64
 8   tnf      15748 non-null  

In [None]:
class Model(tf.keras.Model):
    def __init__(self, model_path, train_mode=True, input_dim=15, lstm_size=256, batch_size=10, e_learning_rate=1e-4):
        super(Model, self).__init__()
        self.model_path = model_path
        self.train_mode = train_mode
        self.input_dim = input_dim
        self.lstm_size = lstm_size
        self.batch_size = batch_size
        self.e_learning_rate = e_learning_rate

        # Define LSTM layer
        self.lstm_layer = tf.keras.layers.LSTM(units=self.lstm_size, return_sequences=True)

        # Define output layer
        self.output_layer = tf.keras.layers.Dense(units=2, activation=None)

        # Define optimizer
        self.optimizer = tf.keras.optimizers.Adam(learning_rate=self.e_learning_rate)

        # Define loss function
        self.loss_fn = tf.keras.losses.MeanSquaredError()

    def call(self, inputs):
        # Input shape: (batch_size, sequence_length, input_dim)
        x = self.lstm_layer(inputs)
        # Output shape: (batch_size, sequence_length, lstm_size)
        output = self.output_layer(x)
        # Output shape: (batch_size, sequence_length, 2) - 2 for tnf_mean and tgf_mean
        return output

    def train_step(self, xtrain, ytrain):
        with tf.GradientTape() as tape:
            y_pred = self(xtrain, training=True)
            loss = self.loss_fn(ytrain, y_pred)
        gradients = tape.gradient(loss, self.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
        return loss

    def calculate_accuracy(self, y_true, y_pred):
    # Calculate mean squared error
       mse = tf.keras.losses.MeanSquaredError()
       mse_loss = mse(y_true, y_pred)

    # Calculate mean squared error of the true values
       mse_true = tf.reduce_mean(tf.square(y_true))

    # Calculate accuracy
       accuracy = 1.0 - mse_loss / mse_true
       return accuracy

    def train(self, train_set, valid_set, maxEpoch=10):
        x_train, y_train = train_set
        x_valid, y_valid = valid_set
        for epoch in range(maxEpoch):
            train_loss = self.train_step(x_train, y_train)
            valid_pred = self(x_valid, training=False)
            valid_loss = self.loss_fn(y_valid, valid_pred)
            y_valid = tf.cast(y_valid, tf.float32)  # Cast y_valid to tf.float32
            accuracy = self.calculate_accuracy(y_valid, valid_pred)
            print(f"Epoch {epoch + 1}, Train Loss: {train_loss}, Valid Loss: {valid_loss}, Accuracy: {accuracy}")


if __name__ == "__main__":
    csvfile1 = pd.read_csv('C:/Users/Ioannis/Documents/UvA thesis/UvA-thesis/data/combined_data.csv', skiprows=[0])
    csvfile1 = csvfile1.iloc[:, :-6]

    # Convert the DataFrame to a NumPy array and cast the data to float
    results = csvfile1.to_numpy(dtype='float')

    # Define input indices and output indices
    input_indices = list(range(15))
    output_indices = [15, 16]  # Assuming 15 is tnf_mean and 16 is tgf_mean

   # Split data into train and valid sets
    train_size = int(len(results) * 0.9)
    train_features = results[:train_size, input_indices]
    train_targets = results[:train_size, output_indices]
    valid_features = results[train_size:, input_indices]
    valid_targets = results[train_size:, output_indices]

# Reshape input features to include a sequence length dimension
    sequence_length = train_features.shape[0]  # Assuming each row is a sequence
    train_features = np.expand_dims(train_features, axis=1)  # Add sequence length dimension
    valid_features = np.expand_dims(valid_features, axis=1)  # Add sequence length dimension

# Create train and valid sets with input features and targets
    train_set = (train_features, train_targets)
    valid_set = (valid_features, valid_targets)
    

    # Initialize and train the model
    mymodel = Model(model_path="saved_model")
    mymodel.train(train_set, valid_set, maxEpoch=500)

In [4]:
class Model(tf.keras.Model):
    def __init__(self, model_path, train_mode=True, input_dim=10, lstm_size=256, batch_size=32, e_learning_rate=1e-5):
        super(Model, self).__init__()
        self.model_path = model_path
        self.train_mode = train_mode
        self.input_dim = input_dim
        self.lstm_size = lstm_size
        self.batch_size = batch_size
        self.e_learning_rate = e_learning_rate

        #define LSTM layer
        self.lstm_layer = tf.keras.layers.LSTM(units=self.lstm_size, return_sequences=True)

        #define output layer
        self.output_layer = tf.keras.layers.Dense(units=2, activation=None)

        #define optimizer
        self.optimizer = tf.keras.optimizers.Adam(learning_rate=self.e_learning_rate)

        #define MSE as loss function
        self.loss_fn = tf.keras.losses.MeanSquaredError()

        #define scaler for target normalization
        self.target_scaler = MinMaxScaler(feature_range=(0, 500))

    def call(self, inputs):
        #reshape inputs to add sequence length dimension
        inputs = tf.expand_dims(inputs, axis=1)
        
        #input shape: (batch_size, sequence_length, input_dim)
        x = self.lstm_layer(inputs)
        #output shape: (batch_size, sequence_length, lstm_size)
        output = self.output_layer(x)
        #output shape: (batch_size, sequence_length, 2) - 2 for outputs
        return output

    def train_step(self, xtrain, ytrain):
        with tf.GradientTape() as tape:
            y_pred = self(xtrain, training=True)
            loss = self.loss_fn(ytrain, y_pred)
        gradients = tape.gradient(loss, self.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
        return loss

    def train(self, train_set, valid_set, maxEpoch=10):
        x_train, y_train = train_set
        x_valid, y_valid = valid_set
        
        #normalize target values
        y_train_scaled = self.target_scaler.fit_transform(y_train.reshape(-1, 1))
        y_valid_scaled = self.target_scaler.transform(y_valid.reshape(-1, 1))

        for epoch in range(maxEpoch):
            train_loss = self.train_step(x_train, y_train_scaled)
            valid_loss = self.loss_fn(y_valid_scaled, self(x_valid, training=False))
            print(f"Epoch {epoch + 1}, Train Loss: {train_loss}, Valid Loss: {valid_loss}")


if __name__ == "__main__":
    csvfile1 = pd.read_csv('C:/Users/Ioannis/Documents/UvA thesis/UvA-thesis/data/testdata.csv')

    #convert the df to numpy array and cast the data to float
    results = csvfile1.to_numpy(dtype='float')

    #define input indices and output indices
    input_indices = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
    output_indices = [1, 2] 

    #split data into train and valid sets
    train_size = int(len(results) * 0.9)
    train_features = results[:train_size, input_indices]
    train_targets = results[:train_size, output_indices]
    valid_features = results[train_size:, input_indices]
    valid_targets = results[train_size:, output_indices]

    #create train and valid sets with input features and targets
    train_set = (train_features, train_targets)
    valid_set = (valid_features, valid_targets)

    #initialize and train the model
    mymodel = Model(model_path="saved_model")
    mymodel.train(train_set, valid_set, maxEpoch=5000)


Epoch 1, Train Loss: 83971.2421875, Valid Loss: 93946.4453125
Epoch 2, Train Loss: 83970.6796875, Valid Loss: 93945.921875
Epoch 3, Train Loss: 83970.140625, Valid Loss: 93945.4453125
Epoch 4, Train Loss: 83969.5859375, Valid Loss: 93944.9609375
Epoch 5, Train Loss: 83969.046875, Valid Loss: 93944.53125
Epoch 6, Train Loss: 83968.5078125, Valid Loss: 93944.0859375
Epoch 7, Train Loss: 83967.96875, Valid Loss: 93943.671875
Epoch 8, Train Loss: 83967.421875, Valid Loss: 93943.28125
Epoch 9, Train Loss: 83966.890625, Valid Loss: 93942.921875
Epoch 10, Train Loss: 83966.3515625, Valid Loss: 93942.5703125
Epoch 11, Train Loss: 83965.8203125, Valid Loss: 93942.21875
Epoch 12, Train Loss: 83965.2890625, Valid Loss: 93941.8984375
Epoch 13, Train Loss: 83964.765625, Valid Loss: 93941.5703125
Epoch 14, Train Loss: 83964.2265625, Valid Loss: 93941.265625
Epoch 15, Train Loss: 83963.6953125, Valid Loss: 93940.9609375
Epoch 16, Train Loss: 83963.1796875, Valid Loss: 93940.65625
Epoch 17, Train Loss

KeyboardInterrupt: 