In [7]:
import numpy as np
import matplotlib.pyplot as plt
import pandas
from sklearn.metrics import mutual_info_score

# discretizes a time series into bins
def discretize_time_series(series, bins):
    discretized_series = np.digitize(series, np.linspace(min(series), max(series), bins + 1)) - 1
    return discretized_series

# calculates shannon entropy
def shannon_entropy(series):
    _, counts = np.unique(series, return_counts=True)
    probabilities = counts / counts.sum()
    entropy = -np.sum(probabilities * np.log2(probabilities))
    return entropy

# get simcov data 
stats_file = 'simcov-gen85.stats'
simcov_data = pandas.read_csv(stats_file, sep='\t', skiprows=1)
simcov_data.columns = ['time', 'incb', 'expr', 'apop', 'dead', 'tvas', 'ttis', 'chem', 'virs', 'chempts', '%infct']

expressing_cells = simcov_data['expr'].values
t_cells = simcov_data['ttis'].values

# discretize simcov data
bins = 80 # 86400 time steps, rice's rule gives 88.41, but 86400 is evenly divisible by 80 bins.
discretized_expressing_cells = discretize_time_series(expressing_cells, bins)
discretized_t_cells = discretize_time_series(t_cells, bins)

# calculate table 2 (simcov data) shannon entropy and MI
H_expressing_cells = shannon_entropy(discretized_expressing_cells)
H_t_cells = shannon_entropy(discretized_t_cells)
MI_expressing_t_cells = mutual_info_score(discretized_expressing_cells, discretized_t_cells)

# save table values to file
with open(f'timeseries/simcovtableentries.txt', 'w') as file:
        file.write(f"Results for {stats_file}:\n")
        file.write(f"H(Expressing Cells):              {H_expressing_cells}\n")
        file.write(f"H(T-Cells):                       {H_t_cells}\n")
        file.write(f"MI(Expressing Cells and T-Cells): {MI_expressing_t_cells}\n")