# General

Specify file name and sampling period

In [79]:
filename_prefix = './data/arin/Omero20016_'
sampling_period = 5
remain = 0.8

%matplotlib

Using matplotlib backend: Qt5Agg


<module 'pipeline.vis' from '/home/arin/phd/phd-time-series-pipeline/pipeline/vis.py'>

Main shebang (this probably isn't the purpose of a jupyter notebook)

In [84]:
#!/usr/bin/env python3
import os

import numpy as np
import scipy as sp
import pandas as pd
import itertools
import matplotlib.pyplot as plt
import sklearn.metrics
import igraph as ig

import pipeline.dataexport
import pipeline.dataimport
import pipeline.periodogram
import pipeline.score
import pipeline.tsman
import pipeline.vis

import featext.tsman
import featext.graph
import featext.vis

import catch22
import leidenalg

def add_classicalAttr(cell, oversampling_factor = 1):
    """Computes classical periodogram and adds PdgramAttr attributes"""
    cell.flavin.classical.freqs, cell.flavin.classical.power = \
            pipeline.periodogram.classical(cell.time, cell.flavin.reading_processed,
                                oversampling_factor = oversampling_factor)

def add_bglsAttr(cell):
    """Computes BGLS and adds PdgramAttr attributes"""
    cell.flavin.bgls = pipeline.PdgramAttr()
    cell.flavin.bgls.label = 'Bayesian General Lomb-Scargle Periodogram'
    cell.flavin.bgls.power_label = 'Probability'
    err = np.ones(len(cell.flavin.reading_processed))*\
            np.sqrt(np.max(cell.flavin.reading_processed))
    cell.flavin.bgls.freqs, cell.flavin.bgls.power = \
            pipeline.periodogram.bgls(cell.time, cell.flavin.reading_processed, err,
                    plow = 30.0, phigh = 360.0, ofac = 5)

def add_autoregAttr(cell):
    """
    Computes autoregressive model-based periodogram and adds PdgramAttr
    attributes
    """
    cell.flavin.autoreg = pipeline.PdgramAttr()
    cell.flavin.autoreg.label = \
            'Autogressive Model-Based Periodogram (Jia & Grima, 2020)'
    cell.flavin.autoreg.power_label = 'Power'
    freq_npoints = 1000
    cell.flavin.autoreg.freqs, cell.flavin.autoreg.power = \
            pipeline.periodogram.autoreg(cell.time,
                                         cell.flavin.reading_processed,
                                         freq_npoints)

# FLAVIN: import data and process objects

# Import fluorescence info from CSVs
Dset_flavin = pipeline.dataimport.import_timeseries(
    filename_prefix+'flavin.csv', remain = remain)
# dummy so I get code to not complain; will be re-factored later
Dset_dcategory = [3] * len(Dset_flavin)
Dset_births = pipeline.dataimport.import_births(
    filename_prefix+'births.csv')

# Arranges information into DatasetAttr objects
Dset_data = pipeline.dataimport.CellAttr_from_datasets( \
        timeseries_df = Dset_flavin,
        categories_array = Dset_dcategory,
        births_df = Dset_births,
        sampling_pd = sampling_period)
Dset = pipeline.DatasetAttr(Dset_data)

# Add labels
strainlookup = pd.read_csv(filename_prefix+'strains.csv', \
                          index_col = 'position')
for ii, cell in enumerate(Dset.cells):
    cell.source = filename_prefix
    cell.medium.base = 'Delft'
    cell.medium.nutrients = {'glucose': 10}

    cell.strain = strainlookup.loc[cell.position].strain

    cell.flavin = pipeline.Fluo('flavin')
    cell.flavin.exposure = 60
    cell.flavin.reading = cell.y
    cell.flavin.category = Dset_dcategory[ii]


# mCherry: import data and process objects
try:
    Dset_mCherry_unsliced = pipeline.dataimport.import_timeseries(
        filename_prefix+'mCherry.csv', remain = remain)
    # restrict to cells with flavin readings
    idx_both = list(set(Dset_flavin.cellID) & set(Dset_mCherry_unsliced.cellID))
    Dset_mCherry = \
            Dset_mCherry_unsliced.loc[Dset_mCherry_unsliced.cellID.isin(idx_both)]

    # Arranges information into DatasetAttr objects
    # dummy -- will be better when I re-structure things... am just re-using a 
    # function for quick-and-dirty purposes, and it's obviously redundant
    mCherry_data = pipeline.dataimport.CellAttr_from_datasets( \
            timeseries_df = Dset_mCherry,
            categories_array = Dset_dcategory,
            births_df = Dset_births,
            sampling_pd = sampling_period)
    mCherry = pipeline.DatasetAttr(mCherry_data)
    mCherry_MATLABids = [cell.MATLABid for cell in mCherry.cells]

    # Add labels
    for ii, cell in enumerate(Dset.cells):
        cell.mCherry = pipeline.Fluo('mCherry')
        if cell.strain == 'htb2_mCherry_CRISPR':
            cell.mCherry.exposure = 100
        else:
            cell.mCherry.exposure = 0

        # loads in reading, cross-referencing by MATLABid.  This is awful, I know.
        if cell.MATLABid in mCherry_MATLABids:
            cell.mCherry.reading = \
                mCherry.cells[mCherry_MATLABids.index(cell.MATLABid)].y
except FileNotFoundError as error:
    print(error)
    print(f'No mCherry time series associated with this experiment: {filename_prefix}')

Define working dataset (list of cells)

In [19]:
Wlist = Dset.cells
#Wlist = [cell for cell in Dset.cells if cell.strain == 'FY4']
len(Wlist)

1330

Optional: filters

In [None]:
for cell in itertools.chain(Wlist):
    cell.flavin.reading_processed = \
            pipeline.tsman.stdfilter(cell.flavin.reading, Fs = 1/sampling_period)
pipeline.tsman.population_detrend(Wlist, 'flavin.reading_processed')

Optional: chop up time series

In [127]:
interval_start = 0
interval_end = 168

for cell in Wlist:
    cell.time = cell.time[interval_start:interval_end]
    cell.flavin.reading = cell.flavin.reading[interval_start:interval_end]

Optional: normalise for heatmap

In [40]:
for cell in Wlist:
    ts_mean = np.nanmean(cell.flavin.reading)
    ts_range = np.nanmax(cell.flavin.reading) - np.nanmin(cell.flavin.reading)
    cell.flavin.reading_processed = (cell.flavin.reading - ts_mean)/ts_range

Optional: add spectra

In [None]:
for cell in Wlist:
    cell.flavin.reading_processed = cell.flavin.reading
    add_classicalAttr(cell, oversampling_factor = 1)
    #add_bglsAttr(cell)
    add_autoregAttr(cell)

General plotting

In [20]:
cell_index = 1001
y_attr = 'flavin.reading'

Wlist[cell_index].plot_ts(y_attr=y_attr)
print(Wlist[cell_index].strain)
print(Wlist[cell_index].MATLABid)
print(Wlist[cell_index].position)

zwf1_Del
1002
14


In [21]:
# With CSHL2021/BYG2021 colour palette
timeseries = Wlist[cell_index].flavin.reading
timeaxis = Wlist[cell_index].time
strain_name = 'zwf1Δ'


fig, ax = plt.subplots()
fig.set_size_inches((10,4))

ax.plot(timeaxis, timeseries, '#3714b0')
if Wlist[cell_index].births.any():
    for birth in Wlist[cell_index].births:
        ax.axvline(birth, ymin = 0, ymax = 1,
                   color = '#cb0077', linestyle = '--')
        
plt.title(strain_name)
plt.xlabel('Time (min)')
plt.ylabel('Fluorescence (AU)')

Text(0, 0.5, 'Fluorescence (AU)')

Kymograph

In [51]:
cell_attr = 'flavin.reading_processed'

pipeline.vis.kymograph(Wlist, cell_attr=cell_attr,
                      order_by='distfromcentre')

# Plots for presentation

Synchrony of YMCs

In [83]:
filename_prefix = './data/arin/Omero19972_'
remain = 1
# THEN RUN MAIN SHEBANG

In [85]:
Wlist = [cell for cell in Dset.cells if cell.strain == 'FY4']
print(len(Wlist[0].time))
print(len(Wlist))

264
111


In [7]:
# Glucose: 0 - 83
# Starvation: 84 - 180
# Recovery: 181 - 264

def kymograph_chopped(Wlist, interval_start, interval_end):
    # Chops time series
    for cell in Wlist:
        cell.time = cell.time[interval_start:interval_end]
        cell.flavin.reading = cell.flavin.reading[interval_start:interval_end]
    # Normalise for kymograph
    for cell in Wlist:
        ts_mean = np.nanmean(cell.flavin.reading)
        ts_range = np.nanmax(cell.flavin.reading) - np.nanmin(cell.flavin.reading)
        cell.flavin.reading_processed = (cell.flavin.reading - ts_mean)/ts_range
    # Draws kymograph
    pipeline.vis.kymograph(Wlist, cell_attr='flavin.reading_processed',
                          order_by='distfromcentre')

kymograph_chopped(Wlist, 181, 264)

In [51]:
# Spellman et al (1998)

def kymograph_chopped(Wlist, interval_start, interval_end):
    # Chops time series
    for cell in Wlist:
        cell.time = cell.time[interval_start:interval_end]
        cell.flavin.reading = cell.flavin.reading[interval_start:interval_end]
    # Normalise for kymograph
    for cell in Wlist:
        ts_mean = np.nanmean(cell.flavin.reading)
        cell.flavin.reading_processed = np.log2(cell.flavin.reading / ts_mean)
    # Draws kymograph
    pipeline.vis.kymograph(Wlist, cell_attr='flavin.reading_processed',
                          order_by='distfromcentre')

kymograph_chopped(Wlist, 84, 180)

  cell.flavin.reading_processed = np.log2(cell.flavin.reading / ts_mean)


In [54]:
cell.flavin.reading
ts_mean = np.nanmean(cell.flavin.reading)
cell.flavin.reading_processed = np.log2(cell.flavin.reading / ts_mean)

In [56]:
fig, ax = plt.subplots()
ax.plot(cell.flavin.reading)
plt.show()

In [57]:
fig, ax = plt.subplots()
ax.plot(cell.flavin.reading_processed)
plt.show()

In [135]:
# For BYG 2021
from pipeline.ar_grima2020 import AR_Fit, AR_Power, optimise_ar_order

# Specify number of time series to overlay.
# Chosen based on distance from centre -- the closer to the centre,
# chances are it's higher-quality.
num_timeseries = 15
# Choose whether to do the same normalisation done for the kymograph
normalise = True
# Choose whether to use AR model to smooth time series
arma_fit = True

# working with a subset of the main dataset,
# so re-writing cellids so that they start from 0
# so I can use these ids for plotting
for new_cellid, cell in enumerate(Wlist):
    cell.cellid = new_cellid

plot_rank = pipeline.vis.order_cells(Wlist, get='cellid', by='distfromcentre')

if normalise:
    for cell in Wlist:
        ts_mean = np.nanmean(cell.flavin.reading)
        cell.flavin.reading_processed = np.log2(cell.flavin.reading / ts_mean)
    mean_timeseries = np.nanmean(np.array([cell.flavin.reading_processed for cell in Wlist]), axis=0)
    ylabel = ('Normalised fluorescence (AU)')
else:
    mean_timeseries = np.nanmean(np.array([cell.flavin.reading for cell in Wlist]), axis=0)
    ylabel = ('Fluorescence (AU)')

for cellid in plot_rank[0:num_timeseries]:
    if arma_fit:
        if Wlist[cellid].flavin.reading_processed.any():
            timeseries = Wlist[cellid].flavin.reading_processed
        else:
            timeseries = Wlist[cellid].flavin.reading
        timeaxis = Wlist[cellid].time
        optimal_ar_order = optimise_ar_order(timeseries, int(3*np.sqrt(len(timeseries))))
        print(optimal_ar_order)
        model = AR_Fit(timeseries, optimal_ar_order)
        timeseries_modelled = np.empty(model.length)
        for index in range(model.length):
            if index < optimal_ar_order:
                timeseries_modelled[index] = timeseries[index]
            else:
                preceding_points = timeseries[index-optimal_ar_order:index]
                linear_combination = np.dot(model.ar_coeffs[1::], preceding_points[::-1])
                timeseries_modelled[index] = linear_combination
        Wlist[cellid].flavin.reading_plot = timeseries_modelled
    else:
        if Wlist[cellid].flavin.reading_processed.any():
            Wlist[cellid].flavin.reading_plot = Wlist[cellid].flavin.reading_processed
        else:
            Wlist[cellid].flavin.reading_plot = Wlist[cellid].flavin.reading

# Plotting
fig, ax = plt.subplots()
fig.set_size_inches((10,4))
# regime
ax.axvspan(420, 900, facecolor='#eae5fc', alpha=1, label='Glucose starvation')
# individual time series
for cellid in plot_rank[0:num_timeseries]:
    ax.plot(Wlist[cellid].time, Wlist[cellid].flavin.reading_plot, linewidth=1, color='#9d8cd7')
# mean time series
ax.plot(Wlist[0].time, mean_timeseries, color='#1f096b', linewidth=3, label='Mean time series')
plt.title('Response of individual cells\' flavin oscillations to glucose starvation')
plt.xlabel('Time (min)')
plt.ylabel(ylabel)
ax.legend()
plt.show()

1
5
1
4
1
2
1
1
9
5
10
1
5
6
1


In [76]:
Wlist[0].y

array([28.83970856, 28.4446461 , 28.95571956, 29.45387454, 27.        ,
       26.32437276, 26.1923775 , 25.3962963 , 24.89705882, 23.63653484,
       24.375     , 24.4501845 , 24.73484848, 23.21575985, 23.81783681,
       23.39886578, 22.54898336, 23.89345794, 23.4232902 , 24.62201835,
       23.98333333, 24.44485294, 24.7192029 , 25.68784029, 25.85635359,
       25.65120594, 25.78754579, 26.06776557, 25.17352415, 26.59464286,
       25.27598566, 25.92226148, 26.57469244, 26.04480287, 25.39756944,
       26.16550523, 26.87348354, 26.63194444, 27.11703959, 26.97619048,
       27.13993174, 26.65060241, 26.97836938, 26.560477  , 27.40695652,
       27.1958042 , 26.97321429, 27.50181818, 26.89292196, 25.98018018,
       25.6416819 , 24.38829787, 25.23561151, 24.91858407, 25.24772313,
       24.8294849 , 25.75224417, 24.61749571, 25.04095563, 25.39261745,
       24.8202995 , 24.63745704, 25.71452703, 25.38087248, 24.90562914,
       25.25      , 26.03482587, 25.89144737, 25.81862745, 25.10

Causton strains

In [124]:
filename_prefix = './data/arin/Omero19979_'
# THEN RUN MAIN SHEBANG

In [120]:
# Chops time series - i.e. before glucose limitation
# WARNING: DO THIS ONCE
interval_start = 0
interval_end = 168
for cell in Dset.cells:
    cell.time = cell.time[interval_start:interval_end]
    cell.flavin.reading = cell.flavin.reading[interval_start:interval_end]
strain_list = ['swe1_Del', 'tsa1_Del_tsa2_Del', 'rim11_Del']
for strain in strain_list:
    # Choose cells corresponding to strain
    Wlist = [cell for cell in Dset.cells if cell.strain == strain]
    # Normalise for kymograph
    for cell in Wlist:
        ts_mean = np.nanmean(cell.flavin.reading)
        ts_range = np.nanmax(cell.flavin.reading) - np.nanmin(cell.flavin.reading)
        cell.flavin.reading_processed = (cell.flavin.reading - ts_mean)/ts_range
    # Draws kymograph
    pipeline.vis.kymograph(Wlist, cell_attr='flavin.reading_processed',
                          order_by='distfromcentre')

ZWF1

In [134]:
filename_prefix = './data/arin/Omero20016_'
# THEN RUN MAIN SHEBANG

In [123]:
# Chops time series - i.e. before glucose limitation
# WARNING: DO THIS ONCE
interval_start = 0
interval_end = 168
for cell in Dset.cells:
    cell.time = cell.time[interval_start:interval_end]
    cell.flavin.reading = cell.flavin.reading[interval_start:interval_end]
strain_list = ['zwf1_Del', 'by4741']
for strain in strain_list:
    # Choose cells corresponding to strain
    Wlist = [cell for cell in Dset.cells if cell.strain == strain]
    # Normalise for kymograph
    for cell in Wlist:
        ts_mean = np.nanmean(cell.flavin.reading)
        ts_range = np.nanmax(cell.flavin.reading) - np.nanmin(cell.flavin.reading)
        cell.flavin.reading_processed = (cell.flavin.reading - ts_mean)/ts_range
    # Draws kymograph
    pipeline.vis.kymograph(Wlist, cell_attr='flavin.reading_processed',
                          order_by='distfromcentre')