In [None]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
    
%reload_ext autoreload
%autoreload 2
%matplotlib inline

Imports

In [None]:
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import numpy as np
import pandas as pd
import seaborn as sns

# Load data

In [None]:
data_dir = "../data/raw/"
group1_name = "is20016_zwf1egf"

In [None]:
filepath1 = data_dir + group1_name
timeseries1_filepath = filepath1 + "_timeseries.csv"
labels1_filepath = filepath1 + "_labels.csv"

timeseries_df = pd.read_csv(timeseries1_filepath, index_col=[0,1,2])
labels_df = pd.read_csv(labels1_filepath, index_col=[0,1,2])

In [None]:
timeseries_df

In [None]:
timeseries_dropna = timeseries_df.dropna()

In [None]:
labels_df = labels_df == 1

# Compute periodogram

## Using aliby (inflexible)

In [None]:
from postprocessor.core.processes.autoreg import autoreg

**TODO: Do on whole dataset** (time-consuming)

In [None]:
#freqs_df, power_df, order_df = autoreg.as_function(timeseries_dropna.iloc[0:100])
freqs_df, power_df, order_df = autoreg.as_function(timeseries_dropna)

freqs_df.to_csv("../data/processed/freqs_df.csv")
power_df.to_csv("../data/processed/power_df.csv")
order_df.to_csv("../data/processed/order_df.csv")

In [None]:
order_df

## Using decomposed functions

### Fit time series

In [None]:
from src.autoreg.autoreg import fit_autoreg, optimise_ar_order, autoreg_periodogram

In [None]:
timeseries = timeseries_dropna.iloc[2].to_numpy()

In [None]:
order = 25
params = fit_autoreg(timeseries, order)

In [None]:
params

In [None]:
model = np.zeros(len(timeseries))
for idx, tp in enumerate(timeseries):
    if idx < order:
        model[idx] = timeseries[idx]
    else:
        preceding_points = timeseries[idx-order:idx]
        linear_combination = np.dot(params["ar_coeffs"][1::], preceding_points[::-1])
        model[idx] = linear_combination

In [None]:
fig, ax = plt.subplots(figsize=(10,3))
ax.plot(timeseries, label='Original')
ax.plot(model, label=f'Model (order {order})')
ax.legend(loc="lower left")
ax.set_xlabel("Time point")
ax.set_ylabel("Flavin autofluorescence,\n normalised (AU)")

### Optimise order

In [None]:
max_order = int(3 * np.sqrt(len(timeseries)))
opt_order = optimise_ar_order(timeseries, max_order)

In [None]:
print(max_order)
print(opt_order)

### Periodogram

In [None]:
freqs, power = autoreg_periodogram(timeseries, sampling_period=5, freq_npoints=100, ar_order=opt_order)

In [None]:
plt.plot(freqs, power)

In [None]:
print(np.max(power))

# Get classifications

In [None]:
from scipy.signal import argrelextrema

In [None]:
def get_ar_type(power_array):
    local_max_list = argrelextrema(power_array, np.greater)[0]
    local_min_list = argrelextrema(power_array, np.less)[0]
    type = 5
    if len(local_max_list) == 0:
        type = 1
    elif len(local_min_list) == 0:
        type = 4
    elif (power_array[local_max_list] > 1).any():
        type = 3
    elif (power_array[local_max_list] < 1).all():
        type = 2
    else:
        type = 0
    return type

In [None]:
idx = 0

power_array = power_df.iloc[idx].to_numpy()
min_power = np.min(power_array)
max_power = np.max(power_array)
freqs_array = freqs_df.iloc[idx].to_numpy()

local_max_list = argrelextrema(power_array, np.greater)[0]
local_min_list = argrelextrema(power_array, np.less)[0]

print(get_ar_type(power_array))

plt.plot(freqs_array, power_array)
plt.vlines(freqs_array[local_max_list], ymin=min_power, ymax=max_power, colors='g')
plt.vlines(freqs_array[local_min_list], ymin=min_power, ymax=max_power, colors='r')

In [None]:
types = power_df.apply(get_ar_type, axis=1, raw=True)

In [None]:
types

In [None]:
types.value_counts()

In [None]:
classifications = types != 1

In [None]:
classifications

In [None]:
predicted_labels = classifications.to_list()
true_labels = labels_df.loc[classifications.index].to_numpy().T[0].tolist()

In [None]:
from sklearn.metrics import confusion_matrix
conf_matrix = confusion_matrix(true_labels, predicted_labels)
true_negative, false_positive, false_negative, true_positive = conf_matrix.ravel()

In [None]:
conf_matrix

In [None]:
conf_matrix.ravel()