# Physiologically-Based Pharmacokinetic (PBPK) Model with Machine Learning
In this notebook, we implement a simplified PBPK model to simulate the concentration of a drug in different compartments of the body. We also generate data that can be used for training a machine learning model.

In [1]:
class Patient:
    def __init__(self, age, weight):
        self.age = age
        self.weight = weight
        self.volume = {'blood': 5000, 'liver': 1500, 'kidney': 200}  # volumes in mL

In [2]:
class Therapy:
    def __init__(self, drug_dose):
        self.drug_dose = drug_dose  # in mg

In [3]:
class Encoder:
    def __init__(self, patient, therapy):
        self.patient = patient
        self.therapy = therapy

In [5]:
class StiffSolver:
    def __init__(self, encoder):
        self.encoder = encoder
        self.dt = 0.1  # time step in hours
        self.time = 10  # total time in hours
        self.C = {'blood': 0, 'liver': 0, 'kidney': 0}  # initial concentrations
        self.results = {'blood': [], 'liver': [], 'kidney': []}

    def solve(self):
        ke = 0.1  # elimination rate constant in 1/hour
        F = 1000  # flow rate in mL/hour
        for t in range(int(self.time / self.dt)):
            input_rate = self.encoder.therapy.drug_dose
            for compartment in ['blood', 'liver', 'kidney']:
                dC_dt = 0
                if compartment == 'blood':
                    dC_dt += input_rate / self.encoder.patient.volume[compartment]
                    dC_dt -= F / self.encoder.patient.volume[compartment] * (self.C[compartment] - self.C['liver'])
                    dC_dt -= F / self.encoder.patient.volume[compartment] * (self.C[compartment] - self.C['kidney'])
                    dC_dt -= ke * self.C[compartment]
                else:
                    dC_dt += F / self.encoder.patient.volume[compartment] * (self.C['blood'] - self.C[compartment])

                self.C[compartment] += dC_dt * self.dt
                self.results[compartment].append(self.C[compartment])

    def get_results(self):
        return self.results

## Data Generation
The data generator remains largely the same. You would pull out the concentration values for the compartments you are interested in.

In [8]:
import pandas as pd
import random

def generate_data(num_samples):
    data = []

    for _ in range(num_samples):
        # Initialize random patient and therapy parameters
        age = random.randint(18, 80)
        weight = random.randint(50, 100)
        drug_dose = random.uniform(0.1, 1.0)

        # Run the PBPK model simulation
        patient_model = Patient(age=age, weight=weight)
        therapy_model = Therapy(drug_dose=drug_dose)
        encoder_model = Encoder(patient_model, therapy_model)
        solver_model = StiffSolver(encoder_model)
        solver_model.solve()

        # Extract the simulation results
        # Assuming the solver has a method get_results that returns a dictionary of results
        results = solver_model.get_results()

        # Create a data sample
        sample = {'age': age, 'weight': weight, 'drug_dose': drug_dose, **results}
        data.append(sample)

    return pd.DataFrame(data)


In [9]:
generate_data(1000)

Unnamed: 0,age,weight,drug_dose,blood,liver,kidney
0,57,92,0.581885,"[1.1637692658849684e-05, 2.2825394534890515e-0...","[7.758461772566456e-07, 2.24581606776557e-06, ...","[5.818846329424842e-06, 1.432212043215768e-05,..."
1,20,89,0.430369,"[8.607374129633372e-06, 1.688192979292092e-05,...","[5.738249419755581e-07, 1.661031932038582e-06,...","[4.303687064816686e-06, 1.0592808428868804e-05..."
2,38,50,0.105530,"[2.110590449079747e-06, 4.1395714007950765e-06...","[1.407060299386498e-07, 4.0729705466241164e-07...","[1.0552952245398735e-06, 2.597433312667475e-06..."
3,35,67,0.814035,"[1.628070822293198e-05, 3.193189572791059e-05,...","[1.0853805481954653e-06, 3.141814893509807e-06...","[8.140354111465992e-06, 2.003612491968829e-05,..."
4,48,70,0.677282,"[1.3545635275735054e-05, 2.656750598747502e-05...","[9.030423517156702e-07, 2.614006594099627e-06,...","[6.772817637867527e-06, 1.6670161812671275e-05..."
...,...,...,...,...,...,...
995,25,59,0.971865,"[1.943729968278837e-05, 3.812302377784226e-05,...","[1.2958199788525581e-06, 3.7509668987852048e-0...","[9.718649841394187e-06, 2.3920836809618223e-05..."
996,49,69,0.557637,"[1.115273928092337e-05, 2.1874239309651035e-05...","[7.43515952061558e-07, 2.1522308425675233e-06,...","[5.576369640461685e-06, 1.3725304475056362e-05..."
997,50,96,0.747373,"[1.494745547473113e-05, 2.9316942671105992e-05...","[9.96497031648742e-07, 2.884526740945892e-06, ...","[7.473727737365566e-06, 1.839533520423578e-05,..."
998,22,78,0.205173,"[4.103461293219657e-06, 8.04825541643482e-06, ...","[2.735640862146438e-07, 7.918768415626556e-07,...","[2.0517306466098283e-06, 5.049993031522324e-06..."


## Machine Learning Model
You can use the generated data to train a machine learning model, just like in the previous examples.

In [7]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# Generate data
num_samples = 1000
data = generate_data(num_samples)

# Preprocess the data
# For simplicity, let's assume no preprocessing is needed.
X = data[['age', 'weight', 'drug_dose']]
y = data['some_simulation_result']  # Replace with actual result column name

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Evaluate the model
y_pred = rf_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

# Use the model for prediction
new_data = [[45, 70, 0.5]]
predicted_result = rf_model.predict(new_data)
print(f"Predicted Result: {predicted_result}")


KeyError: 'some_simulation_result'