In [1]:
import numpy as np
import matplotlib.pyplot as plt

import datetime

from pathlib import Path

from utils import ref_date, read_data

In [2]:
data_dir = Path('../data')
data = read_data(
    data_dir / 'The Big Beep Theory - BEEP.csv', 
    absolute_dates=True, 
    absolute_times=True
    )

Below we reformat the data such that columns correspond to the observer, and entries are the absolute time in seconds of the observation.

In [3]:
observer_times = {}
for day, entries in data.items():
    day_seconds = day*24*60*60
    for time, observer in entries:
        if observer in observer_times:
            observer_times[observer].append(day_seconds + time)
        else:
            observer_times[observer] = [day_seconds + time]

In [4]:
import pickle

# Save dictionary as a pickle
with open(data_dir / 'beep_data.pickle', 'wb') as f:
    pickle.dump(observer_times, f)

In [5]:
import pandas as pd

max_length = max(len(d) for d in observer_times.values())

data_save = {}
for observer, times in observer_times.items():
    data_save[observer] = times + (max_length - len(times))*[np.NaN]
    
df = pd.DataFrame(data_save)

In [6]:
df

Unnamed: 0,unknown,TB,DR,EF,CC,ER,LT,AA,DG,AG,NW
0,36960.0,2552880.0,4533600.0,4618920.0,4969620.0,5153640.0,6270060.0,6444900.0,11877540.0,13006800.0,15163800.0
1,40200.0,,4962660.0,4630080.0,4970160.0,7993500.0,7305780.0,9113220.0,12227760.0,27952800.0,
2,54900.0,,4963980.0,4630260.0,5067900.0,7994100.0,7901700.0,9114120.0,12745800.0,27953040.0,
3,57360.0,,4971780.0,4617960.0,5143320.0,7996020.0,7902900.0,10340880.0,12830220.0,,
4,209400.0,,4972140.0,4965480.0,6170640.0,7996200.0,7907700.0,12485520.0,12830460.0,,
...,...,...,...,...,...,...,...,...,...,...,...
140,,,28557300.0,,,,,,,,
141,,,28565520.0,,,,,,,,
142,,,28653600.0,,,,,,,,
143,,,28654680.0,,,,,,,,


In [7]:
# Save as a csv using pandas
df.to_csv(data_dir / 'beep_data.csv', index=False)