In [1]:
# One Dimensional Time Series Data - Regularity Measure
# Math 448, UBC 2019

f = open("timestamps_n20_60mins_std5_acc100.txt", "r")
timestamps = []
for timestamp in f:
    timestamps.append(timestamp.strip())

In [2]:
timestamps

['01/01/2019 12:00 AM',
 '01/01/2019 01:00 AM',
 '01/01/2019 01:59 AM',
 '01/01/2019 02:59 AM',
 '01/01/2019 04:05 AM',
 '01/01/2019 05:09 AM',
 '01/01/2019 06:06 AM',
 '01/01/2019 07:01 AM',
 '01/01/2019 08:03 AM',
 '01/01/2019 09:12 AM',
 '01/01/2019 10:15 AM',
 '01/01/2019 11:12 AM',
 '01/01/2019 12:20 PM',
 '01/01/2019 01:13 PM',
 '01/01/2019 02:18 PM',
 '01/01/2019 03:26 PM',
 '01/01/2019 04:24 PM',
 '01/01/2019 05:25 PM',
 '01/01/2019 06:17 PM',
 '01/01/2019 07:14 PM']

In [3]:
import time
import datetime

# convert from standard time format to timestamp
for i in range(len(timestamps)):
    timestamps[i] = time.mktime(datetime.datetime.strptime(timestamps[i], "%m/%d/%Y %I:%M %p").timetuple())

In [4]:
timestamps

[1546329600.0,
 1546333200.0,
 1546336740.0,
 1546340340.0,
 1546344300.0,
 1546348140.0,
 1546351560.0,
 1546354860.0,
 1546358580.0,
 1546362720.0,
 1546366500.0,
 1546369920.0,
 1546374000.0,
 1546377180.0,
 1546381080.0,
 1546385160.0,
 1546388640.0,
 1546392300.0,
 1546395420.0,
 1546398840.0]

In [5]:
# shift data so first event is at 0
timestamps[:] = [x - timestamps[0] for x in timestamps]

In [6]:
# Thus timestamps gives the point in time of occurence for each event from the first event
timestamps

[0.0,
 3600.0,
 7140.0,
 10740.0,
 14700.0,
 18540.0,
 21960.0,
 25260.0,
 28980.0,
 33120.0,
 36900.0,
 40320.0,
 44400.0,
 47580.0,
 51480.0,
 55560.0,
 59040.0,
 62700.0,
 65820.0,
 69240.0]

In [7]:
# To detect regularity in timestamps given a certain temporal periodicity, we can consider the more general problem
# of detecting regularity of timestamps of a certain event type, and thereby consider this an instance where we
# discretize time by the second, and label the occurence of an event as '1', and label every other point in time
# as '0'

# e.g. if timestamps = [0, 1, 5, 7] then event_sequence = [1, 1, 0, 0, 1, 0, 1]

import matplotlib.pyplot as plt
import numpy as np

event_sequence = np.zeros(int(timestamps[len(timestamps)-1])+1)

j = 0 # iterator over timestamps
for i in range(len(event_sequence)):
    if i == timestamps[j]:
        event_sequence[i] = 1
        j += 1

In [8]:
period = 3600 # Model where period is provided 

In [9]:
# One possible loss function, sum over deviations of timestamps from 0, period, 2*period, ..., n-1*period, respectively

error = 0
n = int(np.sum(event_sequence))

# computes residual difference between expected location assuming perfect regularity and actuality
# residual = |expected-actuality| if |expected-actuality| < period/2, o.w. residual = period/2
def residual(expected, period, event_sequence):
    for i in range(int(period/2)):
        if ((expected + i < len(event_sequence)) and event_sequence[expected + i] == 1):
            return i
        elif ((expected - i > 0) and event_sequence[expected - i] == 1):
            return i
    return period/2

for i in range(n):
    error += residual(period*i, period, event_sequence)
error

13320

In [10]:
# Loss function we will employ: sum over |(timestamp_i+1 - timestamp_i) - period|

error = 0
prev = 0

for i in range(len(event_sequence)):
    if event_sequence[i] == 1:
        error += abs(i-prev-period)
        prev = i
error

8280

In [11]:
# Model where we detect period
# First compute differences between timestamps
differences = np.zeros(n - 1)

count = 1
j = 0 # index of differences
for i in range(len(event_sequence)):
    if i == 0:
        i += 1
    elif event_sequence[i] == 1:
        differences[j] = count
        j += 1
        count = 1
    else:
        count += 1

In [12]:
differences

array([ 3600.,  3540.,  3600.,  3960.,  3840.,  3420.,  3300.,  3720.,
        4140.,  3780.,  3420.,  4080.,  3180.,  3900.,  4080.,  3480.,
        3660.,  3120.,  3420.])

In [13]:
# period = argmin f(p) = sum_{1 to n-1}[|differences[i] - p|]
# optimization problem can be formulated as a linear program

from scipy.optimize import linprog

c = np.ones(n)
c[n-1] = 0

b = np.zeros(2*(n-1))
for i in range(n-1):
    b[2*i] = -differences[i]
    b[2*i+1] = differences[i]

A = np.zeros((2*(n-1), n))
for i in range(n-1):
    A[2*i, i] = -1
    A[2*i, n-1] = -1
    A[2*i+1, i] = -1
    A[2*i+1, n-1] = 1
    
res = linprog(c, A_ub=A, b_ub=b)

In [14]:
minimizing_period = res.x[n-1]
minimizing_period

3600.0

In [15]:
# Regularity measure: gaussian
np.exp(-np.power(error, 2.) / (2*n * np.power(period, 2.)))

0.87612193721397758