In [1]:
import numpy as np
import pandas as pd
from mcspace.utils import pickle_load, pickle_save
import matplotlib.pyplot as plt
import seaborn as sns
from mcspace.data_utils import get_mouse_diet_perturbations_dataset
import scipy
from scipy.stats import nbinom
import statsmodels.api as sm
from pathlib import Path 

# Paths

In [2]:
rootpath = Path("../../")
basepath = rootpath / "paper_cluster" / "semi_synthetic_data"

# Load mouse dataset

In [3]:
reads, num_otus, times, subjects, dataset = get_mouse_diet_perturbations_dataset(rootpath=rootpath)

FileNotFoundError: [Errno 2] No such file or directory: '..\\..\\data\\mouse_diet_timeseries_data\\taxonomy.csv'

In [None]:
print(times)
print(subjects)

In [None]:
def pool_data(reads,times,subjects):
    all_particles = None
    for t in times:
        for s in subjects:
            if all_particles is None:
                all_particles = reads[t][s]
            else:
                all_particles = np.concatenate([all_particles, reads[t][s]], axis=0)
    return all_particles

In [None]:
counts = pool_data(reads,[times[0]],subjects)

In [None]:
nparts = np.zeros(len(subjects))
for i,s in enumerate(subjects):
    nparts[i] = reads[times[0]][s].shape[0]

In [None]:
num_particles = int(np.median(nparts))

In [None]:
print(num_particles)
print(num_otus)

## fit negbin

In [None]:
rdepth = counts.sum(axis=1)
X = np.ones_like(rdepth)
res = sm.NegativeBinomial(rdepth,X).fit(start_params=[1,1])
mu = np.exp(res.params[0])
p = 1/(1+mu*res.params[1])
n = mu*p/(1-p)

In [None]:
print(p)
print(n)

### plot fit

In [None]:
minr = np.amin(rdepth)
maxr = np.amax(rdepth)
print(minr)
print(maxr)

In [None]:
x_plot = np.linspace(minr,maxr,int(maxr-minr)+1)
sns.set_theme()
ax=sns.distplot(rdepth, kde=False, norm_hist=True, label='Data')
ax.plot(x_plot, nbinom.pmf(x_plot, n, p), 'g-', lw=2, label='Fit')
ax.legend()

In [None]:
median = np.median(rdepth)
iqr = np.percentile(rdepth, q=75) - np.percentile(rdepth, q=25) 

In [None]:
print(median)
print(iqr)

## save results

In [None]:
pickle_save(basepath / "negbin_fit_params_Mouse_data.pkl", {'negbin_n': n, 'negbin_p': p, 'median': median, 'iqr': iqr,
                                                           "num_particles": num_particles, "num_otus": num_otus})