In [1]:
import os
import glob
import datetime
import pandas as pd
import json
import dateutil
pd.set_option('display.max_rows', 500)

In [2]:
files = glob.glob("data/apr2021/*")
files = [f for f in files if f !='data/apr2021/run.sh']

In [3]:
ts = [x.split('.')[1] for x in files]

In [4]:
ts =  [x for x in ts if x != 'sh']

In [5]:
len(ts)

880

In [6]:
uts = list(set(ts))

In [7]:
uts = [int(x) for x in uts]

In [8]:
uts.sort()

In [9]:
dates = [datetime.datetime.fromtimestamp(x).strftime('%Y-%m-%d %H:%M:%S') for x in uts]

In [10]:
dates

['2021-04-06 19:58:35',
 '2021-04-06 20:01:38',
 '2021-04-06 20:04:41',
 '2021-04-06 20:07:44',
 '2021-04-06 20:10:47',
 '2021-04-06 20:13:51',
 '2021-04-06 20:16:54',
 '2021-04-06 20:19:57',
 '2021-04-06 20:23:01',
 '2021-04-06 20:26:04',
 '2021-04-06 20:29:09',
 '2021-04-06 20:32:13',
 '2021-04-06 20:35:16',
 '2021-04-06 20:38:21',
 '2021-04-06 20:41:25',
 '2021-04-06 20:44:28',
 '2021-04-06 20:47:32',
 '2021-04-06 20:50:37',
 '2021-04-06 20:53:41',
 '2021-04-06 20:56:44',
 '2021-04-06 20:59:48',
 '2021-04-06 21:02:52',
 '2021-04-06 21:05:56',
 '2021-04-06 21:09:00',
 '2021-04-06 21:12:03',
 '2021-04-06 21:15:07',
 '2021-04-06 21:18:10',
 '2021-04-06 21:21:13',
 '2021-04-06 21:24:16',
 '2021-04-06 21:27:19',
 '2021-04-06 21:30:24',
 '2021-04-06 21:33:28',
 '2021-04-06 21:36:31',
 '2021-04-06 21:39:35',
 '2021-04-06 21:42:38',
 '2021-04-06 21:45:43',
 '2021-04-06 21:48:46',
 '2021-04-06 21:51:50',
 '2021-04-06 21:54:54',
 '2021-04-06 21:57:57',
 '2021-04-06 22:01:00',
 '2021-04-06 22:

In [11]:
def readRace(file):
    #print("reading "+ file)
    with open(file) as f:
        j = json.load(f)
        return j['Election']['LastPublished']

def readElectionResults(file):
    with open(file) as f:
        j = json.load(f)
        return j['LastPublished']
    
def readLastPublished(file):
    with open(file) as f:
        line = f.readlines()[0]
        #print(line)
        return(line[1:-1])
    
    

In [12]:
def perStep(timestep):
    cb = readRace('data/apr2021/cb12_p.{}'.format(timestep))
    dpi = readRace('data/apr2021/dpi_p.{}'.format(timestep))
    lastPublished = readLastPublished('data/apr2021/lastPublished.{}'.format(timestep))
    electionResults = readElectionResults('data/apr2021/electionResults.{}'.format(timestep))
    return {'dpi': dpi, 'cb': cb, 'electionResults': electionResults, 'lastPublished': lastPublished, "ts": timestep}

In [13]:
events = {}
for timestep in uts:
    x = perStep(timestep)
    events[datetime.datetime.fromtimestamp(timestep)] = x

In [14]:
df = pd.DataFrame.from_dict(events, orient='index')

In [15]:
df.head(100)

Unnamed: 0,dpi,cb,electionResults,lastPublished,ts
2021-04-06 19:58:35,2021-04-06T16:05:30,2021-04-06T16:05:30,2021-04-06T16:05:30,2021-04-06T16:05:30,1617757115
2021-04-06 20:01:38,2021-04-06T16:05:30,2021-04-06T16:05:30,2021-04-06T16:05:30,2021-04-06T16:05:30,1617757298
2021-04-06 20:04:41,2021-04-06T16:05:30,2021-04-06T16:05:30,2021-04-06T16:05:30,2021-04-06T16:05:30,1617757481
2021-04-06 20:07:44,2021-04-06T16:05:30,2021-04-06T16:05:30,2021-04-06T16:05:30,2021-04-06T16:05:30,1617757664
2021-04-06 20:10:47,2021-04-06T20:10:25,2021-04-06T20:10:25,2021-04-06T20:10:25,2021-04-06T20:10:25,1617757847
2021-04-06 20:13:51,2021-04-06T20:10:25,2021-04-06T20:10:25,2021-04-06T20:10:25,2021-04-06T20:10:25,1617758031
2021-04-06 20:16:54,2021-04-06T20:10:25,2021-04-06T20:10:25,2021-04-06T20:10:25,2021-04-06T20:10:25,1617758214
2021-04-06 20:19:57,2021-04-06T20:19:28,2021-04-06T20:19:28,2021-04-06T20:19:28,2021-04-06T20:19:59,1617758397
2021-04-06 20:23:01,2021-04-06T20:19:59,2021-04-06T20:19:59,2021-04-06T20:19:59,2021-04-06T20:19:59,1617758581
2021-04-06 20:26:04,2021-04-06T20:19:59,2021-04-06T20:19:59,2021-04-06T20:19:59,2021-04-06T20:19:59,1617758764


In [16]:
len(df['lastPublished'].unique())


22

In [17]:
len(df['cb'].unique())

24

In [18]:
len(df['dpi'].unique())

24

In [19]:
len(df['electionResults'].unique())

22

In [20]:
ts_df = pd.DataFrame({"timestamp": uts, "date": dates})

In [21]:
ts_df

Unnamed: 0,timestamp,date
0,1617757115,2021-04-06 19:58:35
1,1617757298,2021-04-06 20:01:38
2,1617757481,2021-04-06 20:04:41
3,1617757664,2021-04-06 20:07:44
4,1617757847,2021-04-06 20:10:47
5,1617758031,2021-04-06 20:13:51
6,1617758214,2021-04-06 20:16:54
7,1617758397,2021-04-06 20:19:57
8,1617758581,2021-04-06 20:23:01
9,1617758764,2021-04-06 20:26:04


In [22]:
df['lastPublished'].unique()

array(['2021-04-06T16:05:30', '2021-04-06T20:10:25',
       '2021-04-06T20:19:59', '2021-04-06T20:27:24',
       '2021-04-06T20:29:38', '2021-04-06T20:32:26',
       '2021-04-06T20:37:24', '2021-04-06T21:00:56',
       '2021-04-06T21:11:59', '2021-04-06T21:12:26',
       '2021-04-06T21:21:09', '2021-04-06T21:24:13',
       '2021-04-06T21:24:54', '2021-04-06T21:28:51',
       '2021-04-06T21:43:16', '2021-04-06T22:03:48',
       '2021-04-06T22:16:09', '2021-04-06T22:17:05',
       '2021-04-06T22:30:56', '2021-04-06T22:35:18',
       '2021-04-06T23:03:52', '2021-04-06T23:44:31'], dtype=object)

In [23]:
df['lastPublished']

2021-04-06 19:58:35    2021-04-06T16:05:30
2021-04-06 20:01:38    2021-04-06T16:05:30
2021-04-06 20:04:41    2021-04-06T16:05:30
2021-04-06 20:07:44    2021-04-06T16:05:30
2021-04-06 20:10:47    2021-04-06T20:10:25
2021-04-06 20:13:51    2021-04-06T20:10:25
2021-04-06 20:16:54    2021-04-06T20:10:25
2021-04-06 20:19:57    2021-04-06T20:19:59
2021-04-06 20:23:01    2021-04-06T20:19:59
2021-04-06 20:26:04    2021-04-06T20:19:59
2021-04-06 20:29:09    2021-04-06T20:27:24
2021-04-06 20:32:13    2021-04-06T20:29:38
2021-04-06 20:35:16    2021-04-06T20:32:26
2021-04-06 20:38:21    2021-04-06T20:37:24
2021-04-06 20:41:25    2021-04-06T20:37:24
2021-04-06 20:44:28    2021-04-06T20:37:24
2021-04-06 20:47:32    2021-04-06T20:37:24
2021-04-06 20:50:37    2021-04-06T20:37:24
2021-04-06 20:53:41    2021-04-06T20:37:24
2021-04-06 20:56:44    2021-04-06T20:37:24
2021-04-06 20:59:48    2021-04-06T20:37:24
2021-04-06 21:02:52    2021-04-06T21:00:56
2021-04-06 21:05:56    2021-04-06T21:00:56
2021-04-06 

In [24]:
df["isStatusChanged"] = df["lastPublished"].shift(1, fill_value=df["lastPublished"].head(1)) != df["lastPublished"]

In [25]:
df

Unnamed: 0,dpi,cb,electionResults,lastPublished,ts,isStatusChanged
2021-04-06 19:58:35,2021-04-06T16:05:30,2021-04-06T16:05:30,2021-04-06T16:05:30,2021-04-06T16:05:30,1617757115,False
2021-04-06 20:01:38,2021-04-06T16:05:30,2021-04-06T16:05:30,2021-04-06T16:05:30,2021-04-06T16:05:30,1617757298,False
2021-04-06 20:04:41,2021-04-06T16:05:30,2021-04-06T16:05:30,2021-04-06T16:05:30,2021-04-06T16:05:30,1617757481,False
2021-04-06 20:07:44,2021-04-06T16:05:30,2021-04-06T16:05:30,2021-04-06T16:05:30,2021-04-06T16:05:30,1617757664,False
2021-04-06 20:10:47,2021-04-06T20:10:25,2021-04-06T20:10:25,2021-04-06T20:10:25,2021-04-06T20:10:25,1617757847,True
2021-04-06 20:13:51,2021-04-06T20:10:25,2021-04-06T20:10:25,2021-04-06T20:10:25,2021-04-06T20:10:25,1617758031,False
2021-04-06 20:16:54,2021-04-06T20:10:25,2021-04-06T20:10:25,2021-04-06T20:10:25,2021-04-06T20:10:25,1617758214,False
2021-04-06 20:19:57,2021-04-06T20:19:28,2021-04-06T20:19:28,2021-04-06T20:19:28,2021-04-06T20:19:59,1617758397,True
2021-04-06 20:23:01,2021-04-06T20:19:59,2021-04-06T20:19:59,2021-04-06T20:19:59,2021-04-06T20:19:59,1617758581,False
2021-04-06 20:26:04,2021-04-06T20:19:59,2021-04-06T20:19:59,2021-04-06T20:19:59,2021-04-06T20:19:59,1617758764,False


In [26]:
df[df['isStatusChanged']]['ts']

2021-04-06 20:10:47    1617757847
2021-04-06 20:19:57    1617758397
2021-04-06 20:29:09    1617758949
2021-04-06 20:32:13    1617759133
2021-04-06 20:35:16    1617759316
2021-04-06 20:38:21    1617759501
2021-04-06 21:02:52    1617760972
2021-04-06 21:12:03    1617761523
2021-04-06 21:15:07    1617761707
2021-04-06 21:21:13    1617762073
2021-04-06 21:24:16    1617762256
2021-04-06 21:27:19    1617762439
2021-04-06 21:30:24    1617762624
2021-04-06 21:45:43    1617763543
2021-04-06 22:04:04    1617764644
2021-04-06 22:16:18    1617765378
2021-04-06 22:19:22    1617765562
2021-04-06 22:31:34    1617766294
2021-04-06 22:37:41    1617766661
2021-04-06 23:05:08    1617768308
2021-04-06 23:44:59    1617770699
Name: ts, dtype: int64