Aims:
- Do AR to fit model to time series and get features.  Trying frequency of oscillations for now, potential to expand to quality (height of peak of periodogram).
- Do it with Causton strains.  With potential to switch to the experiment with the new CEN.PK if I have time.
- Produce some plots for BYG201 (panel 6)

Specify file name and sampling period

In [60]:
filename_prefix = './data/arin/Omero19979_'
sampling_period = 5
remain = 0.8

%matplotlib

Using matplotlib backend: TkAgg


Main shebang

In [61]:
#!/usr/bin/env python3
import os

import numpy as np
import scipy as sp
import pandas as pd
import itertools
import matplotlib.pyplot as plt
import sklearn.metrics
import igraph as ig

import pipeline.dataexport
import pipeline.dataimport
import pipeline.periodogram
import pipeline.score
import pipeline.tsman
import pipeline.vis

import featext.tsman
import featext.graph
#import featext.vis

#import catch22
#import leidenalg

def add_classicalAttr(cell, oversampling_factor = 1):
    """Computes classical periodogram and adds PdgramAttr attributes"""
    cell.flavin.classical.freqs, cell.flavin.classical.power = \
            pipeline.periodogram.classical(cell.time, cell.flavin.reading_processed,
                                oversampling_factor = oversampling_factor)

def add_bglsAttr(cell):
    """Computes BGLS and adds PdgramAttr attributes"""
    cell.flavin.bgls = pipeline.PdgramAttr()
    cell.flavin.bgls.label = 'Bayesian General Lomb-Scargle Periodogram'
    cell.flavin.bgls.power_label = 'Probability'
    err = np.ones(len(cell.flavin.reading_processed))*\
            np.sqrt(np.max(cell.flavin.reading_processed))
    cell.flavin.bgls.freqs, cell.flavin.bgls.power = \
            pipeline.periodogram.bgls(cell.time, cell.flavin.reading_processed, err,
                    plow = 30.0, phigh = 360.0, ofac = 5)

def add_autoregAttr(cell):
    """
    Computes autoregressive model-based periodogram and adds PdgramAttr
    attributes
    """
    cell.flavin.autoreg = pipeline.PdgramAttr()
    cell.flavin.autoreg.label = \
            'Autogressive Model-Based Periodogram (Jia & Grima, 2020)'
    cell.flavin.autoreg.power_label = 'Power'
    freq_npoints = 1000
    cell.flavin.autoreg.freqs, cell.flavin.autoreg.power = \
            pipeline.periodogram.autoreg(cell.time,
                                         cell.flavin.reading_processed,
                                         freq_npoints)

# FLAVIN: import data and process objects

# Import fluorescence info from CSVs
Dset_flavin = pipeline.dataimport.import_timeseries(
    filename_prefix+'flavin.csv', remain = remain)
# dummy so I get code to not complain; will be re-factored later
Dset_dcategory = [3] * len(Dset_flavin)
Dset_births = pipeline.dataimport.import_births(
    filename_prefix+'births.csv')

# Arranges information into DatasetAttr objects
Dset_data = pipeline.dataimport.CellAttr_from_datasets( \
        timeseries_df = Dset_flavin,
        categories_array = Dset_dcategory,
        births_df = Dset_births,
        sampling_pd = sampling_period)
Dset = pipeline.DatasetAttr(Dset_data)

# Add labels
strainlookup = pd.read_csv(filename_prefix+'strains.csv', \
                          index_col = 'position')
for ii, cell in enumerate(Dset.cells):
    cell.source = filename_prefix
    cell.medium.base = 'Delft'
    cell.medium.nutrients = {'glucose': 10}

    cell.strain = strainlookup.loc[cell.position].strain

    cell.flavin = pipeline.Fluo('flavin')
    cell.flavin.exposure = 60
    cell.flavin.reading = cell.y
    cell.flavin.category = Dset_dcategory[ii]


# mCherry: import data and process objects
try:
    Dset_mCherry_unsliced = pipeline.dataimport.import_timeseries(
        filename_prefix+'mCherry.csv', remain = remain)
    # restrict to cells with flavin readings
    idx_both = list(set(Dset_flavin.cellID) & set(Dset_mCherry_unsliced.cellID))
    Dset_mCherry = \
            Dset_mCherry_unsliced.loc[Dset_mCherry_unsliced.cellID.isin(idx_both)]

    # Arranges information into DatasetAttr objects
    # dummy -- will be better when I re-structure things... am just re-using a 
    # function for quick-and-dirty purposes, and it's obviously redundant
    mCherry_data = pipeline.dataimport.CellAttr_from_datasets( \
            timeseries_df = Dset_mCherry,
            categories_array = Dset_dcategory,
            births_df = Dset_births,
            sampling_pd = sampling_period)
    mCherry = pipeline.DatasetAttr(mCherry_data)
    mCherry_MATLABids = [cell.MATLABid for cell in mCherry.cells]

    # Add labels
    for ii, cell in enumerate(Dset.cells):
        cell.mCherry = pipeline.Fluo('mCherry')
        if cell.strain == 'htb2_mCherry_CRISPR':
            cell.mCherry.exposure = 100
        else:
            cell.mCherry.exposure = 0

        # loads in reading, cross-referencing by MATLABid.  This is awful, I know.
        if cell.MATLABid in mCherry_MATLABids:
            cell.mCherry.reading = \
                mCherry.cells[mCherry_MATLABids.index(cell.MATLABid)].y
except FileNotFoundError as error:
    print(error)
    print(f'No mCherry time series associated with this experiment: {filename_prefix}')

[Errno 2] No such file or directory: './data/arin/Omero19979_mCherry.csv'
No mCherry time series associated with this experiment: ./data/arin/Omero19979_


Define working dataset (list of cells)

In [80]:
Wlist = Dset.cells
#Wlist = [cell for cell in Dset.cells if cell.strain == 'swe1_Del']
len(Wlist)

589

Chop up time series (exclude the end in which there is starvation)

In [81]:
interval_start = 0
interval_end = 168

for cell in Wlist:
    cell.time = cell.time[interval_start:interval_end]
    cell.flavin.reading = cell.flavin.reading[interval_start:interval_end]

Remove cells than have NaNs.  AR doesn't like it.

In [82]:
Wlist = [cell for cell in Wlist if not np.isnan(cell.flavin.reading).any()]
len(Wlist)

299

Add spectra

In [83]:
for cell in Wlist:
    cell.flavin.reading_processed = cell.flavin.reading
    #add_classicalAttr(cell, oversampling_factor = 1)
    add_autoregAttr(cell)
    #print(cell.cellid)

In [84]:
Wlist[10].flavin.autoreg.power

array([1.        , 1.00000225, 1.000009  , 1.00002027, 1.00003606,
       1.00005641, 1.00008135, 1.00011091, 1.00014514, 1.0001841 ,
       1.00022784, 1.00027644, 1.00032996, 1.00038848, 1.0004521 ,
       1.0005209 , 1.00059499, 1.00067447, 1.00075947, 1.00085009,
       1.00094647, 1.00104875, 1.00115705, 1.00127154, 1.00139236,
       1.00151967, 1.00165364, 1.00179444, 1.00194225, 1.00209725,
       1.00225963, 1.00242959, 1.00260733, 1.00279306, 1.00298699,
       1.00318933, 1.00340032, 1.00362017, 1.00384913, 1.00408744,
       1.00433534, 1.00459307, 1.0048609 , 1.00513908, 1.00542788,
       1.00572756, 1.00603841, 1.00636069, 1.00669468, 1.00704068,
       1.00739898, 1.00776986, 1.00815363, 1.00855059, 1.00896105,
       1.0093853 , 1.00982367, 1.01027647, 1.01074402, 1.01122664,
       1.01172466, 1.01223841, 1.01276821, 1.0133144 , 1.01387732,
       1.0144573 , 1.01505469, 1.01566982, 1.01630305, 1.01695472,
       1.01762518, 1.01831478, 1.01902388, 1.01975282, 1.02050

In [85]:
plt.plot(Wlist[60].flavin.autoreg.freqs,
        Wlist[60].flavin.autoreg.power)

[<matplotlib.lines.Line2D at 0x7f0e2aca0b50>]

In [95]:
mylist = [cell.flavin.autoreg.freqs[np.argmax(cell.flavin.autoreg.power)] for cell in Wlist]
foo = 1/np.array(mylist)
print(foo)

[          inf           inf           inf           inf           inf
           inf           inf           inf           inf           inf
  225.                   inf           inf           inf           inf
           inf           inf           inf           inf           inf
  287.06896552           inf           inf  175.26315789           inf
  256.15384615  587.64705882           inf           inf           inf
           inf           inf           inf           inf           inf
           inf           inf           inf  367.27941176  406.09756098
           inf           inf           inf           inf           inf
           inf           inf  242.47572816           inf           inf
           inf           inf           inf           inf           inf
           inf           inf           inf           inf           inf
           inf           inf           inf           inf           inf
           inf           inf           inf           inf           inf
      

  


In [100]:
Wlist[298].plot_ts()

(<Figure size 640x480 with 1 Axes>,
 <AxesSubplot:title={'center':'Autofluorescence of cell 588 over time'}, xlabel='Time (min)', ylabel='Autofluorescence (AU)'>)

In [99]:
Wlist[298].flavin.plot_ps(pdgram='autoreg', pd=False)

(<Figure size 640x480 with 1 Axes>,
 <AxesSubplot:title={'center':'Autogressive Model-Based Periodogram (Jia & Grima, 2020)'}, xlabel='Frequency ($min^{-1}$)', ylabel='Power'>)

PROBLEM: there's only one swe1Δ cell that the AR identifies as oscillating.  Changes definitely need to be made to the algorithm.  Perhaps this is where the model selection comes in, but there's _no way_ I'll be able to explore this in time for the conference.