Preprocess data and save it into pickle files for easier usage

In [5]:
# imports
import json
from datetime import datetime
import pickle

In [2]:
# load data
filepath = 'raw_data.json'
with open(filepath) as f:
    data = json.load(f)

# test access
print(data.keys())

dict_keys(['05. Dec (Thursday)', '18. Dec (Wednesday)'])


In [3]:
# Cut data (Code from Elias)
def cut_data(data, cut_start, cut_end):
    return [timestamp for timestamp in data if timestamp >= cut_start and timestamp <= cut_end]

cut_start = 1734519960000 # 12:06
cut_end = 1734520799000 # 12:19:59

for spot in data["18. Dec (Wednesday)"]:
    if spot == "Metadata":
        continue
    data["18. Dec (Wednesday)"][spot] = cut_data(data["18. Dec (Wednesday)"][spot], cut_start, cut_end)

In [4]:
# group into groups of 30 seconds
def group_timestamps(timestamps, group_window=30, starter=None, end=None):
    groups = []
    # First, convert all timestamps into seconds
    timestamps = [t // 1000 for t in timestamps]
    # Use sorted list to make it easier
    timestamps = sorted(timestamps) 
    if starter is None:
        group_starter = timestamps[0]
    else:
        group_starter = starter // 1000
    if end is None:
        group_end = timestamps[-1]
    else:
        group_end = end // 1000
    group = []
    for t in timestamps:
        if t - group_starter < group_window:
            group.append(t)
        else:
            groups.append(group)
            group_starter = group_starter + group_window
            group = []
            
            while t - group_starter >= group_window:
                groups.append([])
                group_starter = group_starter + group_window

            group.append(t)

    # pad until end
    while group_starter + group_window <= group_end:
        groups.append([])
        group_starter = group_starter + group_window

    # Add the last group
    groups.append(group)
    return groups

In [9]:
# group and save the groupings
second_day_grouped = {}
for spot in data["18. Dec (Wednesday)"].keys():
    if spot == "Metadata":
        continue
    second_day_grouped[spot] = group_timestamps(data["18. Dec (Wednesday)"][spot], group_window=30, starter=cut_start, end=cut_end)

filename = "data/second_day_g30.pkl"
with open(filename, "wb") as f:
    pickle.dump(second_day_grouped, f)

In [11]:
# Save as [timframes][spots] -> value instead of:
#         [spots][timeframes] -> value
filename = "data/second_day_g30_transposed.pkl"
with open(filename, "wb") as f:
    data_transposed = []
    for i in range(len(second_day_grouped["Auswahl"])):
        data_transposed.append({})
        for spot in second_day_grouped.keys():
            data_transposed[i][spot] = second_day_grouped[spot][i]

    pickle.dump(data_transposed, f)

In [13]:
# save keys:
filename = "data/second_day_keys.pkl"
with open(filename, "wb") as f:
    pickle.dump(list(second_day_grouped.keys()), f)