# Time series

* https://docs.scipy.org/doc/scipy/reference/generated/scipy.fft.fft.html
* https://docs.scipy.org/doc/scipy/tutorial/fft.html

* y1 - pure white noise
* y2 - random walk noise
* y3 - a linear drift (with noise)
* y4 - oscaillating function, with noise
* y5 - secret combination of the above

y4 oscillates accoring to

$$
  y_4(t) = A_1 \cos(2\pi f_1 t) + A_2 \sin(2\pi f_2 t)
$$


In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
filename = "timeseries.csv"
data = np.genfromtxt(filename, delimiter=",", skip_header=1)

[t, y1, y2, y3, y4, y5] = [data[:, i] for i in range(6)]

In [None]:
plt.plot(t, y1, label="White")
plt.plot(t, y2, label="Drift")
plt.plot(t, y3, label="Random Walk")
plt.plot(t, y4, label="Oscillate")
plt.plot(t, y5, label="Combo")
plt.xlabel("time (s)")
plt.legend()
plt.show()

### Histogram

* Can sometimes be useful for seeing distribution of data
* For white noise, the histrogram should be a Gaussian shape
* In fact, any of the univariate techniques from previous weeks can be useful!

In [None]:
fig, [ax1, ax2] = plt.subplots(ncols=2, sharex=True, sharey=True)

labels = ["white", "randWalk", "drift", "osc", "mix"]
for i, y in enumerate([y1, y2, y3, y4, y5]):
    name = labels[i]
    tax = ax1 if name == "white" or name == "osc" else ax2
    tax.hist(y, bins=20, density=True, histtype="step", color=f"C{i}", lw=2, label=name)
    tax.hist(y, bins=20, density=True, histtype="stepfilled", color=f"C{i}", alpha=0.25)
    # tax.set_yscale("log")
    tax.legend()
plt.show()

------------------------------------
## Fourier Transform

Descrete Fourier transform (DFT):

$$
FT[y]_k = \sum_n x_n^N \exp(-2\pi i \frac{n k}{N})
$$

* Implemented using _fast Fourier transform_ algorithm (FFT)
* Frequency: $k$ cycles per $N$ samples
* `scipy.fft.fft`: evaluates the DFT
* `scipy.fft.fftfreq(N, d=dt)`: evaluates the frequencies
  * Does this from $[-f_{\rm max}, f_{\rm max}]$
  * For real data: symmetric/anti-symmetric


* FT is generally complex valued, even for real valued data
* For real valued data, ${\rm re}(FT)$ gives the cosine amplitudes
* For real valued data, ${\rm im}(FT)$ gives the sine amplitudes
  * both: assuming zero phase: i.e., $A\cos (\omega t) + B\sin (\omega t)$

Often: only need the _amplitudes_

$$
A_k = \frac{2\,|FT[y]_k|}{N}
$$


In [None]:
# Plot the data again, befor FFT
plt.plot(t, y4, label="Example data")
plt.ylabel("y(t)")
plt.xlabel("t (s)")
plt.title("Data")
plt.legend()
plt.show()

In [None]:
from scipy.fft import fft, fftshift, fftfreq

ft = fft(y4)

dt = t[1] - t[0]

f = fftfreq(len(t), d=dt)


plt.plot(f, ft.real, "x-", label="real")
plt.plot(f, ft.imag, ".-", label="imag")
plt.ylabel("FT(y)")
plt.xlabel("f (Hz)")
plt.title("FT of data")
plt.legend()
plt.show()

### Use 'fftshift' to properly center on 0

In [None]:
ft = fftshift(fft(y4))
f = fftshift(fftfreq(t.size, d=dt))

plt.plot(f, ft.real, "x-", label="real")
plt.plot(f, ft.imag, ".-", label="imag")
plt.xlim(0)  # Only need one half
plt.ylabel("FT(y)")
plt.xlabel("f (Hz)")
plt.legend()
plt.show()

### Amplitude spectrum from DFT:

$$
  A(f) = \frac{2}{N} |FT(y)|
$$

In [None]:
ft = fftshift(fft(y4))
freq = fftshift(fftfreq(y4.size, d=dt))


amp = 2.0 * np.abs(ft) / len(t)

plt.plot(freq, amp, "-", label="amplitude")
plt.xlim(0)
plt.ylabel("Amplitude")
plt.xlabel("f (Hz)")
plt.legend()
plt.show()

### Zoom in, with inset

In [None]:
from matplotlib import patches

fig, ax1 = plt.subplots()

ft = fftshift(fft(y4))
freq = fftshift(fftfreq(y4.size, d=dt))
amp = 2.0 * np.abs(ft) / len(t)


plt.plot(freq, amp, "-", label="amplitude")
plt.xlim(0)
plt.ylabel("Amplitude")
plt.xlabel("f (Hz)")
plt.ylim(-0.05, 1.1)
plt.legend()

# Define area to zoom in on
x0 = 0.5
dx = 2.5
y0 = 0
dy = 1.05

# Draw rectangle for zoom area:
ax1.add_patch(plt.Rectangle((x0, y0), dx, dy, ls="-", fc="None", ec="red"))

# Add second axis, at relative position x1,y1 and rel height,width
ax2 = fig.add_axes([0.3, 0.4, 0.5, 0.34])
plt.setp(ax2.spines.values(), ls="-", color="red")

# Draw secondary plot
ax2.plot(freq, amp, "-o")
ax2.axhline(y=1.0, color="magenta", linestyle="dotted", label="$f_1$ actual")
ax2.axvline(x=4.0 / 3, color="magenta", linestyle="dotted")
ax2.axhline(y=0.5, color="g", linestyle="dashdot", label="$f_2$ actual")
ax2.axvline(x=7.0 / 3, color="g", linestyle="dashdot")
ax2.set_xlim(x0, x0 + dx)
ax2.set_ylim(y0, y0 + dy)
ax2.legend()

# Draw lines from zoom box to inset plot
con1 = patches.ConnectionPatch(
    xyA=(x0, y0),  # actual position in full plot
    coordsA=ax1.transData,
    xyB=(0.0, 0.0),  # relative position on inset plot
    coordsB=ax2.transAxes,
    color="r",
    linestyle="dotted",
    linewidth=0.5,
)
con3 = patches.ConnectionPatch(
    xyA=(x0 + dx, y0),
    coordsA=ax1.transData,
    xyB=(1.0, 0.0),
    coordsB=ax2.transAxes,
    color="r",
    ls="dotted",
    lw=0.5,
)
fig.add_artist(con1)
fig.add_artist(con3)

fig.suptitle("Zoom in, with inset", fontsize=15)
plt.show()

### Compare for different data

In [None]:
[a1, a2, a3, a4, a5] = [
    2.0 * np.abs(fftshift(fft(y))) / len(t) for y in [y1, y2, y3, y4, y5]
]

f = fftshift(fftfreq(t.size, d=dt))

plt.plot(f, a1, label="White")
plt.plot(f, a2, label="Random Walk")
plt.plot(f, a3, label="Drift")
plt.plot(f, a4, label="Oscillate")
plt.plot(f, a5, label="Combination")
plt.xlim(0, 8)
plt.ylabel("Amplitude")
plt.xlabel("f (Hz)")
plt.legend()
plt.show()

-----------------
## Autocorrelation

$$
  a(\tau) = \frac{1}{\sigma^2} \langle x(t) \, x (t+\tau) \rangle
$$

Or, in terms of arrays:

$$
  a(\tau) = \frac{1}{\sigma^2} \frac{1}{N}\sum  x_i \, x_{i+\tau}
$$
where $N$ is the number of data points included in the sum (will be less than the full data size for $\tau\neq0$)

In [None]:
# We can define this ourselves:


def t_acf(data, lag):
    mean = np.mean(data)
    var = np.var(data)
    a = 0.0
    for i in range(len(data) - lag):
        a += (data[i] - mean) * (data[i + lag] - mean) / var
    return a / (len(data) - lag)


def acf(data):
    return [t_acf(data, lag) for lag in range(len(y1))]

In [None]:
# Or, we can make use of numpy's correlate function
# We have to manipulate the output a little.
# numpy's version is more numerically stable for large lag times
def acf2(data):
    tdata = data - np.mean(data)
    tmp = np.correlate(tdata, tdata, "full") / np.var(tdata) / tdata.size
    # This is reversed and repeated. Take first half, and reverse it:
    return np.flip(tmp[: int(len(tmp) / 2) + 1])

In [None]:
a0 = acf(y1)

a2 = acf2(y1)


plt.plot(a0, label="manual")
plt.plot(a2, "--", label="via numpy")
plt.legend()
plt.show()

### Understand autocorrelation:

In [None]:
from ipywidgets import interact

# For x values:
dt = t[1] - t[0]
lags = np.arange(len(t)) * dt

data_dic = {"White": y1, "Drift": y2, "Random Walk": y3, "Oscillate": y4}


def series(data_set, lag):
    data = data_dic[data_set]
    fig, axs = plt.subplots(nrows=2, sharex=True, figsize=(8, 4))
    t_acf = acf2(data)
    axs[0].set_xlim(0, 2.0)
    ilag = int(lag / dt)
    axs[0].plot(t, data)
    axs[0].plot(t[: len(t) - ilag], data[ilag:], linestyle="dotted", alpha=0.85)
    axs[0].set_xlabel("t (s)")
    axs[1].plot(lags, t_acf)
    axs[1].axvline(x=lag, color="k")
    axs[1].axhline(y=t_acf[ilag], linestyle="dotted", color="k")
    axs[1].set_xlabel("lag (s)")
    axs[1].text(0.75, 0.85, f"acf({lag:.2f}) = {t_acf[ilag]:.2f}")


itr = interact(
    series, data_set=["Random Walk", "White", "Drift", "Oscillate"], lag=(0, 1.5, 0.01)
)

In [None]:
# Put lag into correct units:
lag = np.arange(len(t)) * dt

plt.title("Autocorrelation")
plt.plot(lag, acf2(y1), label="white")
plt.plot(lag, acf2(y2), label="drift")
plt.plot(lag, acf2(y3), label="random walk")
plt.plot(lag, acf2(y4), label="oscilate")
plt.xlabel("lag (s)")
plt.legend()
plt.show()