# Notebook Setup

In [None]:
import pandas as pd
import seaborn as sb

from IPython.display import display

In [None]:
MEASURES_FILENAME = "./measures.csv"
CLOCK_ADJUST_RECTYPE = "CLOCK_ADJUSTED"
MEASURE_RECTYPE = "REGULAR"

COSC_LIMITS_SEC_DAY = (-4, 6)

GRAPH_STYLE = "whitegrid"

In [None]:
sb.set_theme("notebook", GRAPH_STYLE)
sb.set_style()

pd.options.display.max_rows = 6

# Read Data

In [None]:
# Read the csv
measures = pd.read_csv(MEASURES_FILENAME)

# Put data back in chronological order
measures = measures.iloc[::-1].reset_index(drop=True)

In [None]:
# Rename columns to make coding comfy
measures = measures.rename(columns={
    "Type of record":"rectype",
    "Offset (seconds)":"offset"
})

In [None]:
measures = measures.astype({"rectype":"category"})

## Extract machine readable timestamps (utc)

In [None]:
measures["timestamp"] = pd.to_datetime(measures["Timestamp (Epoch Time)"], unit="ms")

## Create Unique Chain Numbers

In [None]:
measures["seriesID"] = measures.rectype.eq(CLOCK_ADJUST_RECTYPE).cumsum().shift(1, fill_value=0)

In [None]:
measures.groupby("seriesID").head(1)

In [None]:
seriesStarts = measures.groupby("seriesID").timestamp.min()
measures["adjustmentAge"] = measures.timestamp - measures.seriesID.map(seriesStarts)
measures["adjustmentDays"] = measures.adjustmentAge.dt.total_seconds() / 3600 / 24
measures




# Plot Offsets

In [None]:
f = sb.relplot(data=measures, x="timestamp", y="offset", hue="seriesID", kind="line", aspect=2)
f.figure.autofmt_xdate()

# Plot Superposed Adjustment Cycles

In [None]:
sb.set_style("ticks")
f = sb.lmplot(data=measures, x="adjustmentDays", y="offset", hue="seriesID")

maxDays = measures.adjustmentDays.max()
minOffset = measures.offset.min()

limitsX = [0, maxDays]
limitYMin = [0, maxDays * COSC_LIMITS_SEC_DAY[0]]
limitYMax = [0, maxDays * COSC_LIMITS_SEC_DAY[1]]

f.ax.fill_between(limitsX, limitYMin, limitYMax, color="green", alpha=0.2, zorder=0)

for i in range(-30,31):
    f.ax.axline((0,0), (1/100, i/100), c="lightgrey", lw=0.5, ls="--")

f.ax.axhline(0, c="black", lw=2, ls="--")

# Get Drift Rates

In [None]:
from scipy.stats import linregress

def chainStats(df):
    print(f"Doing chain stats on {df.seriesID.unique()[0]}")
    res = linregress(df.adjustmentDays, df.offset)
    print(res)
    return pd.Series({"slope":res.slope, "intercept":res.intercept, "R":res.rvalue})

measures.query("rectype == @MEASURE_RECTYPE").groupby("seriesID").apply(chainStats)