In [34]:
import pandas as pd
import os

In [35]:
import csv

def parse_txt_to_csv(txt_file, csv_file):
    with open(txt_file, 'r') as in_file:
        stripped = (line.strip() for line in in_file)
        lines = (line.split("|")[1:-1] for line in stripped if line)  # Exclude first and last empty strings
        cleaned_lines = ([item.strip() for item in line] for line in lines)
        with open(csv_file, 'w', newline='') as out_file:
            writer = csv.writer(out_file)
            writer.writerow(('ID', 'Timestamp', 'Value'))  # writing headers
            writer.writerows(cleaned_lines)  # writing content

# Call the function like this:
parse_txt_to_csv('../data/original/building/temperature.txt', '../data/original/building/temperature.csv')

In [36]:
data = pd.read_csv('../data/original/building/temperature.csv', index_col='ID')

In [19]:
def jsons_to_csv(directory, csv_file):
    data_frames = []  # list to store data frames

    for filename in os.listdir(directory):
        if filename.endswith('.json'):
            file_path = os.path.join(directory, filename)
            df = pd.read_json(file_path)  # directly read json file into dataframe
            data_frames.append(df)  # add dataframe to list

    # Concatenate all dataframes, and write to CSV
    final_df = pd.concat(data_frames, ignore_index=True)
    final_df.to_csv(csv_file, index=False)

# Call the function like this:
jsons_to_csv('../data/original/building/occupation/', '../data/original/building/occupation.csv')

In [38]:
mapping = pd.read_csv('../data/original/building/place_ids.csv', delimiter=';')

In [39]:
occupancy = pd.read_csv('../data/original/building/occupation.csv')

In [40]:
merge = pd.merge(occupancy, mapping, left_on='asset', right_on='id', how='left' )

In [41]:
def add_place_id_column(df):
    place_id = []
    for name in df['name']:
        if name.startswith('0.B'):
            place_id.append(1)
        elif name.startswith('1.D'):
            place_id.append(3)
        elif name.startswith('L1.'):
            place_id.append(4)
        else:
            place_id.append('')
    df['place_id'] = place_id
    return df

merge = add_place_id_column(merge)

In [42]:
def add_atrium_column(df):
    atrium = []
    for name in df['name']:
        if name.startswith('0.B') or name.startswith('1.D') or name.startswith('L1.') or name.startswith('1.C'):
            atrium.append(True)
        else:
            atrium.append(False)
    df['atrium'] = atrium
    return df

merge = add_atrium_column(merge)

In [43]:
merge['utc_time'] = pd.to_datetime(merge['utc_time'])
merge['utc_time'] = merge['utc_time'].dt.tz_convert('Europe/Amsterdam')
merge['utc_time'] = merge['utc_time'].dt.tz_localize(None)

In [44]:
merge.rename(columns={'utc_time': 'timestamp'}, inplace=True)

In [28]:
merge.to_csv('../data/processed/building/occupation.csv', index=False)