Aims:
- Do AR to fit model to time series and get features.  Trying frequency of oscillations for now, potential to expand to quality (height of peak of periodogram).
- Do it with Causton strains.  With potential to switch to the experiment with the new CEN.PK if I have time.
- Produce some plots for BYG201 (panel 6)

Specify file name and sampling period

In [212]:
#filename_prefix = './data/arin/Omero19979_'
filename_prefix = './data/arin/Omero20016_'
sampling_period = 5
remain = 0.8

%matplotlib

Using matplotlib backend: TkAgg


Main shebang

In [213]:
#!/usr/bin/env python3
import os

import numpy as np
import scipy as sp
import pandas as pd
import itertools
import matplotlib.pyplot as plt
import sklearn.metrics
import igraph as ig

import pipeline.dataexport
import pipeline.dataimport
import pipeline.periodogram
import pipeline.score
import pipeline.tsman
import pipeline.vis

import featext.tsman
import featext.graph
#import featext.vis

#import catch22
#import leidenalg

def add_classicalAttr(cell, oversampling_factor = 1):
    """Computes classical periodogram and adds PdgramAttr attributes"""
    cell.flavin.classical.freqs, cell.flavin.classical.power = \
            pipeline.periodogram.classical(cell.time, cell.flavin.reading_processed,
                                oversampling_factor = oversampling_factor)

def add_bglsAttr(cell):
    """Computes BGLS and adds PdgramAttr attributes"""
    cell.flavin.bgls = pipeline.PdgramAttr()
    cell.flavin.bgls.label = 'Bayesian General Lomb-Scargle Periodogram'
    cell.flavin.bgls.power_label = 'Probability'
    err = np.ones(len(cell.flavin.reading_processed))*\
            np.sqrt(np.max(cell.flavin.reading_processed))
    cell.flavin.bgls.freqs, cell.flavin.bgls.power = \
            pipeline.periodogram.bgls(cell.time, cell.flavin.reading_processed, err,
                    plow = 30.0, phigh = 360.0, ofac = 5)

def add_autoregAttr(cell):
    """
    Computes autoregressive model-based periodogram and adds PdgramAttr
    attributes
    """
    cell.flavin.autoreg = pipeline.PdgramAttr()
    cell.flavin.autoreg.label = \
            'Autogressive Model-Based Periodogram (Jia & Grima, 2020)'
    cell.flavin.autoreg.power_label = 'Power'
    freq_npoints = 1000
    cell.flavin.autoreg.freqs, cell.flavin.autoreg.power = \
            pipeline.periodogram.autoreg(cell.time,
                                         cell.flavin.reading_processed,
                                         freq_npoints)

# FLAVIN: import data and process objects

# Import fluorescence info from CSVs
Dset_flavin = pipeline.dataimport.import_timeseries(
    filename_prefix+'flavin.csv', remain = remain)
# dummy so I get code to not complain; will be re-factored later
Dset_dcategory = [3] * len(Dset_flavin)
Dset_births = pipeline.dataimport.import_births(
    filename_prefix+'births.csv')

# Arranges information into DatasetAttr objects
Dset_data = pipeline.dataimport.CellAttr_from_datasets( \
        timeseries_df = Dset_flavin,
        categories_array = Dset_dcategory,
        births_df = Dset_births,
        sampling_pd = sampling_period)
Dset = pipeline.DatasetAttr(Dset_data)

# Add labels
strainlookup = pd.read_csv(filename_prefix+'strains.csv', \
                          index_col = 'position')
for ii, cell in enumerate(Dset.cells):
    cell.source = filename_prefix
    cell.medium.base = 'Delft'
    cell.medium.nutrients = {'glucose': 10}

    cell.strain = strainlookup.loc[cell.position].strain

    cell.flavin = pipeline.Fluo('flavin')
    cell.flavin.exposure = 60
    cell.flavin.reading = cell.y
    cell.flavin.category = Dset_dcategory[ii]


# mCherry: import data and process objects
try:
    Dset_mCherry_unsliced = pipeline.dataimport.import_timeseries(
        filename_prefix+'mCherry.csv', remain = remain)
    # restrict to cells with flavin readings
    idx_both = list(set(Dset_flavin.cellID) & set(Dset_mCherry_unsliced.cellID))
    Dset_mCherry = \
            Dset_mCherry_unsliced.loc[Dset_mCherry_unsliced.cellID.isin(idx_both)]

    # Arranges information into DatasetAttr objects
    # dummy -- will be better when I re-structure things... am just re-using a 
    # function for quick-and-dirty purposes, and it's obviously redundant
    mCherry_data = pipeline.dataimport.CellAttr_from_datasets( \
            timeseries_df = Dset_mCherry,
            categories_array = Dset_dcategory,
            births_df = Dset_births,
            sampling_pd = sampling_period)
    mCherry = pipeline.DatasetAttr(mCherry_data)
    mCherry_MATLABids = [cell.MATLABid for cell in mCherry.cells]

    # Add labels
    for ii, cell in enumerate(Dset.cells):
        cell.mCherry = pipeline.Fluo('mCherry')
        if cell.strain == 'htb2_mCherry_CRISPR':
            cell.mCherry.exposure = 100
        else:
            cell.mCherry.exposure = 0

        # loads in reading, cross-referencing by MATLABid.  This is awful, I know.
        if cell.MATLABid in mCherry_MATLABids:
            cell.mCherry.reading = \
                mCherry.cells[mCherry_MATLABids.index(cell.MATLABid)].y
except FileNotFoundError as error:
    print(error)
    print(f'No mCherry time series associated with this experiment: {filename_prefix}')

[Errno 2] No such file or directory: './data/arin/Omero20016_mCherry.csv'
No mCherry time series associated with this experiment: ./data/arin/Omero20016_


Define working dataset (list of cells)

In [214]:
Wlist = Dset.cells
#Wlist = [cell for cell in Dset.cells if cell.strain == 'swe1_Del']
len(Wlist)

1330

Chop up time series (exclude the end in which there is starvation)

In [215]:
interval_start = 0
interval_end = 168

for cell in Wlist:
    cell.time = cell.time[interval_start:interval_end]
    cell.flavin.reading = cell.flavin.reading[interval_start:interval_end]

Remove cells than have NaNs.  AR doesn't like it.

In [216]:
Wlist = [cell for cell in Wlist if not np.isnan(cell.flavin.reading).any()]
len(Wlist)

668

In [217]:
from collections import Counter
count_strain = Counter([cell.strain for cell in Wlist])
print(count_strain)

Counter({'zwf1_Del': 446, 'by4741': 222})


Add spectra

In [218]:
for cell in Wlist:
    cell.flavin.reading_processed = cell.flavin.reading
    #add_classicalAttr(cell, oversampling_factor = 1)
    add_autoregAttr(cell)
    #print(cell.cellid)

In [87]:
Wlist[10].flavin.autoreg.power

array([1.        , 0.99960487, 0.99842137, 0.99645508, 0.99371524,
       0.99021466, 0.98596953, 0.98099925, 0.97532622, 0.9689756 ,
       0.96197501, 0.95435428, 0.94614513, 0.93738086, 0.92809602,
       0.91832615, 0.90810745, 0.89747647, 0.88646988, 0.87512415,
       0.86347538, 0.85155903, 0.83940974, 0.82706118, 0.81454586,
       0.80189504, 0.78913861, 0.77630502, 0.76342121, 0.75051256,
       0.73760292, 0.7247145 , 0.71186799, 0.69908249, 0.68637558,
       0.67376338, 0.66126053, 0.6488803 , 0.63663463, 0.62453418,
       0.61258841, 0.60080564, 0.5891931 , 0.57775703, 0.56650271,
       0.55543456, 0.54455618, 0.5338704 , 0.52337937, 0.51308459,
       0.50298701, 0.493087  , 0.48338449, 0.47387894, 0.46456943,
       0.4554547 , 0.44653315, 0.43780291, 0.42926187, 0.42090769,
       0.41273785, 0.40474966, 0.39694031, 0.38930683, 0.3818462 ,
       0.37455529, 0.36743092, 0.36046984, 0.3536688 , 0.34702449,
       0.34053361, 0.33419286, 0.32799893, 0.32194855, 0.31603

In [88]:
plt.plot(Wlist[60].flavin.autoreg.freqs,
        Wlist[60].flavin.autoreg.power)

[<matplotlib.lines.Line2D at 0x7f36caf79c10>]

Compute period of 'smoothed periodogram', if appropriate

In [219]:
for cell in Wlist:
    cell.flavin.autoreg.add_pd()

  self.pd = (1/self.freqs[self.power == max(self.power)])[0]


In [220]:
oscillating_cells = [(list_position, cell.strain, cell.flavin.autoreg.pd, max(cell.flavin.autoreg.power))
         for (list_position, cell) in enumerate(Wlist)
         if np.isfinite(cell.flavin.autoreg.pd)]
for element in oscillating_cells:
    print(element)

(60, 'by4741', 324.35064935064935, 1.033093917126172)
(84, 'by4741', 76.2595419847328, 1.2917321989502435)
(88, 'by4741', 396.4285714285714, 2.7606926919762866)
(113, 'by4741', 846.6101694915253, 1.2438885301372997)
(118, 'by4741', 423.30508474576266, 3.3845169916223217)
(129, 'by4741', 1611.2903225806451, 1.0254685975843596)
(168, 'by4741', 205.55555555555554, 1.633464329977339)
(185, 'by4741', 832.4999999999999, 1.1269088217744476)
(210, 'by4741', 117.80660377358488, 1.8954716255748314)
(248, 'zwf1_Del', 177.12765957446808, 1.342261097226908)
(253, 'zwf1_Del', 132.14285714285714, 7.8700392523428615)
(254, 'zwf1_Del', 1427.142857142857, 1.000145969380893)
(263, 'zwf1_Del', 306.441717791411, 1.0394663786216307)
(265, 'zwf1_Del', 247.27722772277224, 1.0616378716155064)
(268, 'zwf1_Del', 129.06976744186045, 3.4440913990685487)
(270, 'zwf1_Del', 225.0, 4.965025580877776)
(285, 'zwf1_Del', 354.25531914893617, 1.060761284654342)
(304, 'zwf1_Del', 212.5531914893617, 3.088823947068518)
(322, 

In [225]:
list_position = 502

Wlist[list_position].plot_ts()
Wlist[list_position].flavin.plot_ps(pdgram='autoreg', pd=False)
from scipy.signal import find_peaks
peaks, _ = find_peaks(Wlist[list_position].flavin.autoreg.power)
print(Wlist[list_position].flavin.autoreg.freqs[peaks])
print(1/np.array(Wlist[list_position].flavin.autoreg.freqs[peaks]))

[0.0029029]
[344.48275862]


PROBLEM: there's only one swe1Δ cell that the AR identifies as oscillating.  Changes definitely need to be made to the algorithm.  Perhaps this is where the model selection comes in, but there's _no way_ I'll be able to explore this in time for the conference.

# For poster

Causton - tsa1 tsa2

In [165]:
Wlist[264].births

array([  41.75      ,   82.85      ,  152.83333333,  202.85      ,
        247.88333333,  337.88333333,  347.86666667,  422.88333333,
        492.86666667,  502.85      ,  577.88333333,  592.9       ,
        652.88333333,  707.9       ,  737.91666667,  812.93333333,
        857.88333333,  892.93333333, 1017.93333333])

In [177]:
from pipeline.ar_grima2020 import AR_Fit, AR_Power, optimise_ar_order

# Inputs
births = np.array([41.75, 82.85, 152.83, 202.85, 247.88, 347.87, 422.88, 502.85, 577.88, 652.88, 707.9, 737.92, 812.93])
timeaxis = Wlist[264].time
timeseries = Wlist[264].flavin.reading - np.mean(Wlist[264].flavin.reading)

# Model TS
optimal_ar_order = optimise_ar_order(timeseries, int(3*np.sqrt(len(timeseries))))
print(optimal_ar_order)
model = AR_Fit(timeseries, optimal_ar_order)
timeseries_modelled = np.empty(model.length)
for index in range(model.length):
    if index < optimal_ar_order:
        timeseries_modelled[index] = timeseries[index]
    else:
        preceding_points = timeseries[index-optimal_ar_order:index]
        linear_combination = np.dot(model.ar_coeffs[1::], preceding_points[::-1])
        timeseries_modelled[index] = linear_combination

18


Text(0, 0.5, 'Fluorescence, zero-centred (AU)')

In [201]:
# Plot time series
fig, ax = plt.subplots()
fig.set_size_inches((10,4))
ax.plot(timeaxis, timeseries, '#b785d5', label = 'Biological time series')
ax.plot(timeaxis, timeseries_modelled, '#430467', label = 'Autoregressive model')
for birth_count, birth in enumerate(births):
    if birth_count == 0:
        ax.axvline(birth, ymin = 0, ymax = 1, color = '#6f0aaa', linestyle = '--', label = 'Birth event')
    else:
        ax.axvline(birth, ymin = 0, ymax = 1, color = '#6f0aaa', linestyle = '--')
ax.set_xlim([0,840])
ax.set_xticks(np.linspace(0,800,9))
ax.legend()
plt.title('Autoregressive model overlaid on biological time series')
plt.title('tsa1Δ tsa2Δ')
plt.xlabel('Time (min)')
plt.ylabel('Fluorescence, zero-centred (AU)')

Text(0, 0.5, 'Fluorescence, zero-centred (AU)')

In [209]:
# Plot periodogram
freqs = Wlist[264].flavin.autoreg.freqs
power = Wlist[264].flavin.autoreg.power
peak_indices, _ = find_peaks(Wlist[264].flavin.autoreg.power)
peak_locs = freqs[peak_indices]

fig, ax = plt.subplots()
ax.plot(freqs, power, '#430467')
for peak_index in peak_indices:
    ax.axvline(freqs[peak_index], ymin = 0, ymax = power[peak_index],
               color = '#6f0aaa', linestyle = ':')
ax.set_xlim([0,0.02])
ax.set_xticks(np.linspace(0,0.02,5))
#ax.set_ylim([0,14])
ax.set_xlabel('Frequency ($min^{-1}$)')
ax.set_ylabel('Power (dimensionless)')
ax.set_title('Autoregressive Model-Based Periodogram')
plt.show()