In [53]:
import math
import numpy as np
import pandas as pd

In [54]:
# Retrieve Neel's "clean" files
appa1 = pd.read_csv('neel_exports/appa_hr_crop_withCreated.csv')
appa2 = pd.read_csv('neel_exports/appa_hr_2_crop_withCreated.csv')
appa1_gt = pd.read_csv('neel_exports/appa1_gt_clean_withCO.csv')
appa2_gt = pd.read_csv('data/appa2_gt.csv')

# Remove confusing indices in appa1_gt
appa1_gt = appa1_gt.drop(columns='Unnamed: 0')

In [55]:
# Convert time columns from Timestamp format to DateTime format
appa1.Time = pd.to_datetime(appa1.Time)
appa2.Time = pd.to_datetime(appa2.Time)

appa1_gt.Time = pd.to_datetime(appa1.Time)
appa2_gt.Time = pd.to_datetime(appa2.Time)

In [56]:
def add_times(df):
    # Add periodical time features to the dataset
    
    # HOUR OF THE DAY
    df["sin_hour"] = df["Time"].dt.hour.apply(
        lambda x: math.sin(x) * 2 * (math.pi / 24)
    )
    df["cos_hour"] = df["Time"].dt.hour.apply(
        lambda x: math.cos(x) * 2 * (math.pi / 24)
    )

    # DAY OF THE WEEK
    df["sin_weekday"] = df["Time"].dt.weekday.apply(
        lambda x: math.sin(x) * 2 * (math.pi / 7)
    )
    df["cos_weekday"] = df["Time"].dt.weekday.apply(
        lambda x: math.cos(x) * 2 * (math.pi / 7)
    )

    # MONTH OF THE YEAR
    df["sin_month"] = df["Time"].dt.month.apply(
        lambda x: math.sin(x) * 2 * (math.pi / 12)
    )

    df["cos_month"] = df["Time"].dt.month.apply(
        lambda x: math.cos(x) * 2 * (math.pi / 12)
    )

    # DAY OF THE YEAR (ORDINAL DATE)
    df["sin_ordate"] = df["Time"].apply(
        lambda x: math.sin(x.toordinal()) * 2 * (math.pi / 366)
    )
    df["cos_ordate"] = df["Time"].apply(
        lambda x: math.cos(x.toordinal()) * 2 * (math.pi / 366)
    )

    # Add the year feature
    df["year"] = df["Time"].dt.year

add_times(appa1)
add_times(appa2)

In [72]:
# Match column names to the convention
def rename_columns(df):
    df.columns = df.columns.str.replace('TimeSinceCreated', 'Age')

    df.columns = df.columns.str.replace('heatR_1', '1_heatR')
    df.columns = df.columns.str.replace('heatV_1', '1_heatV')
    df.columns = df.columns.str.replace('heatR_2', '2_heatR')
    df.columns = df.columns.str.replace('heatV_2', '2_heatV')

rename_columns(appa1)
rename_columns(appa2)

In [82]:
# Add appa2_gt data to appa2 (The good old Samu's way)
for col in appa2_gt.columns:
    if col == 'Time':
        continue
    appa2[col] = appa2_gt[col]

In [85]:
# Move ground truth labels to the end
def move_labels (df, labels):
    cols = df.columns.tolist()
    indices = [df.columns.get_loc(column) for column in labels]
    for index in indices:
        cols = cols[0:index] + cols[index+1:] + cols[index:index+1]
    df = df[cols]

appa1_labels, appa2_labels = ['PM10', 'NO2', 'SO2', 'O3', 'CO'], ['PM10', 'CO', 'NO2']
move_labels(appa1, appa1_labels)
move_labels(appa2, appa2_labels)

In [87]:
# Export
appa1.to_csv('noam_exports/appa1.csv')
appa2.to_csv('noam_exports/appa2.csv')