# Aranet Time-series Feature Extraction
The purpose of this notebook is to use feature extraction methods for picking our strongest regressors for predicting sequential time-series data for co2 levels.



In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime as datetime

# Load Data

In [2]:
# Load the data from the CSV file
aranet4 = pd.read_csv('../datasets/aranet4.csv')
aranetExp = pd.read_csv('../datasets/aranetExp.csv')

# Convert the date column to datetime
aranetExp['date'] = pd.to_datetime(aranetExp['date'])
aranetExp = aranetExp.rename(columns={'date': 'Datetime'})
aranetExp = aranetExp.drop(columns=['id','time'])
aranetExp = aranetExp.set_index('Datetime')

# Convert the date column to datetime 
aranet4['Datetime'] = pd.to_datetime(aranet4['Datetime'])
aranet4 = aranet4.set_index('Datetime')

In [24]:
# Load the .npz file
loaded_data = np.load('../datasets/windows.npz', allow_pickle=True)

# Convert the loaded data back to a dictionary of lists of NumPy arrays
windows = {label: list(arrays) for label, arrays in loaded_data.items()}

for label, windows_list in windows.items():
    print(f"Label: {label}, Number of windows: {len(windows_list)}")

# Extract the column names
column_names = loaded_data['column_names']

# Convert the loaded data back to a dictionary of lists of DataFrames, using the column names
windows_df = {label: [pd.DataFrame(array, columns=column_names) for array in arrays_list] 
              for label, arrays_list in loaded_data.items() if label != 'column_names'}


Label: column_names, Number of windows: 9
Label: Chen, Number of windows: 12
Label: Song, Number of windows: 13


In [25]:
windows['Chen'][0][:]

array([[595.0, 65.8, 40.0, 1017.0, 'open', 'open', 'on', 10.0, 'Chen'],
       [597.0, 66.4, 39.0, 1016.9, 'open', 'open', 'on', 10.0, 'Chen'],
       [584.0, 66.8, 39.0, 1016.9, 'open', 'open', 'off', 16.0, 'Chen'],
       [611.0, 67.2, 39.0, 1017.0, 'open', 'open', 'off', 16.0, 'Chen'],
       [577.0, 67.5, 39.0, 1017.0, 'open', 'open', 'off', 16.0, 'Chen'],
       [587.0, 67.8, 39.0, 1016.9, 'open', 'open', 'off', 16.0, 'Chen'],
       [596.0, 67.9, 38.0, 1017.0, 'open', 'open', 'off', 16.0, 'Chen'],
       [577.0, 68.2, 38.0, 1017.0, 'open', 'open', 'off', 16.0, 'Chen'],
       [586.0, 68.4, 38.0, 1017.0, 'open', 'open', 'off', 18.0, 'Chen'],
       [588.0, 68.5, 38.0, 1017.1, 'open', 'open', 'off', 18.0, 'Chen'],
       [587.0, 68.5, 38.0, 1017.1, 'open', 'open', 'off', 18.0, 'Chen'],
       [599.0, 68.6, 37.0, 1017.2, 'open', 'open', 'off', 18.0, 'Chen'],
       [616.0, 68.7, 37.0, 1017.2, 'open', 'open', 'off', 18.0, 'Chen'],
       [596.0, 68.9, 37.0, 1017.2, 'open', 'open', 'o

In [26]:
windows_df['Chen'][0]

Unnamed: 0,Carbon dioxide(ppm),Temperature(°F),Relative humidity(%),Atmospheric pressure(hPa),door1,door2,hvac,subject_count,lecturer
0,595.0,65.8,40.0,1017.0,open,open,on,10.0,Chen
1,597.0,66.4,39.0,1016.9,open,open,on,10.0,Chen
2,584.0,66.8,39.0,1016.9,open,open,off,16.0,Chen
3,611.0,67.2,39.0,1017.0,open,open,off,16.0,Chen
4,577.0,67.5,39.0,1017.0,open,open,off,16.0,Chen
...,...,...,...,...,...,...,...,...,...
71,543.0,71.9,37.0,1017.5,open,open,on,18.0,Chen
72,545.0,72.1,36.0,1017.5,open,open,on,18.0,Chen
73,540.0,72.4,37.0,1017.5,open,open,on,18.0,Chen
74,515.0,72.9,37.0,1017.5,open,open,on,18.0,Chen


# Numpy Tensor

In [6]:
# Initialize a list to store the 3D arrays and labels
X_list = []
y_list = []

for label, windows_list in windows.items():
    for window in windows_list:
        # Convert each DataFrame to a NumPy array and append to the list
        X_list.append(window)
        # Append the corresponding label to the label list
        y_list.append(label)

# Convert the list of 3D arrays to a single 3D array (tensor)
X = np.array(X_list)

# Convert the label list to a NumPy array
y = np.array(y_list)

# Print the shapes of the resulting arrays
print(f"Shape of X: {X.shape}")
print(f"Shape of y: {y.shape}")


Shape of X: (25, 76, 9)
Shape of y: (25,)


In [7]:
y

array(['Chen', 'Chen', 'Chen', 'Chen', 'Chen', 'Chen', 'Chen', 'Chen',
       'Chen', 'Chen', 'Chen', 'Chen', 'Song', 'Song', 'Song', 'Song',
       'Song', 'Song', 'Song', 'Song', 'Song', 'Song', 'Song', 'Song',
       'Song'], dtype='<U4')

In [10]:
y[0],X[0]

('Chen',
 array([[595.0, 65.8, 40.0, 1017.0, 'open', 'open', 'on', 10.0, 'Chen'],
        [597.0, 66.4, 39.0, 1016.9, 'open', 'open', 'on', 10.0, 'Chen'],
        [584.0, 66.8, 39.0, 1016.9, 'open', 'open', 'off', 16.0, 'Chen'],
        [611.0, 67.2, 39.0, 1017.0, 'open', 'open', 'off', 16.0, 'Chen'],
        [577.0, 67.5, 39.0, 1017.0, 'open', 'open', 'off', 16.0, 'Chen'],
        [587.0, 67.8, 39.0, 1016.9, 'open', 'open', 'off', 16.0, 'Chen'],
        [596.0, 67.9, 38.0, 1017.0, 'open', 'open', 'off', 16.0, 'Chen'],
        [577.0, 68.2, 38.0, 1017.0, 'open', 'open', 'off', 16.0, 'Chen'],
        [586.0, 68.4, 38.0, 1017.0, 'open', 'open', 'off', 18.0, 'Chen'],
        [588.0, 68.5, 38.0, 1017.1, 'open', 'open', 'off', 18.0, 'Chen'],
        [587.0, 68.5, 38.0, 1017.1, 'open', 'open', 'off', 18.0, 'Chen'],
        [599.0, 68.6, 37.0, 1017.2, 'open', 'open', 'off', 18.0, 'Chen'],
        [616.0, 68.7, 37.0, 1017.2, 'open', 'open', 'off', 18.0, 'Chen'],
        [596.0, 68.9, 37.0, 101