# Aranet Time-series Feature Extraction
The purpose of this notebook is to use feature extraction methods for picking our strongest regressors for predicting sequential time-series data for co2 levels.



In [39]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime as datetime

# Load Data

In [40]:
# Load the .npz file
loaded_data = np.load('../datasets/windows.npz', allow_pickle=True)

# Convert the loaded data back to a dictionary of lists of NumPy arrays, excluding 'column_names'
windows = {label: list(arrays) for label, arrays in loaded_data.items() if label != 'column_names'}


for label, windows_list in windows.items():
    print(f"Label: {label}, Number of windows: {len(windows_list)}")

# Extract the column names
column_names = loaded_data['column_names']

# Convert the loaded data back to a dictionary of lists of DataFrames, using the column names
windows_df = {label: [pd.DataFrame(array, columns=column_names) for array in arrays_list] 
              for label, arrays_list in loaded_data.items() if label != 'column_names'}


Label: Chen, Number of windows: 12
Label: Song, Number of windows: 13


In [41]:
# Print the shape of each DataFrame in the windows dictionary
for label, windows_list in windows.items():
    print(f"Label: {label}")
    for i, window in enumerate(windows_list):
        print(f"Window {i}: {window.shape}")


Label: Chen
Window 0: (76, 9)
Window 1: (76, 9)
Window 2: (76, 9)
Window 3: (76, 9)
Window 4: (76, 9)
Window 5: (76, 9)
Window 6: (76, 9)
Window 7: (76, 9)
Window 8: (76, 9)
Window 9: (76, 9)
Window 10: (76, 9)
Window 11: (76, 9)
Label: Song
Window 0: (76, 9)
Window 1: (76, 9)
Window 2: (76, 9)
Window 3: (76, 9)
Window 4: (76, 9)
Window 5: (76, 9)
Window 6: (76, 9)
Window 7: (76, 9)
Window 8: (76, 9)
Window 9: (76, 9)
Window 10: (76, 9)
Window 11: (76, 9)
Window 12: (76, 9)


In [42]:
windows['Chen'][0][:]

array([[5.9500e+02, 6.5800e+01, 4.0000e+01, 1.0170e+03, 1.0000e+00,
        1.0000e+00, 1.0000e+00, 1.0000e+01, 0.0000e+00],
       [5.9700e+02, 6.6400e+01, 3.9000e+01, 1.0169e+03, 1.0000e+00,
        1.0000e+00, 1.0000e+00, 1.0000e+01, 0.0000e+00],
       [5.8400e+02, 6.6800e+01, 3.9000e+01, 1.0169e+03, 1.0000e+00,
        1.0000e+00, 0.0000e+00, 1.6000e+01, 0.0000e+00],
       [6.1100e+02, 6.7200e+01, 3.9000e+01, 1.0170e+03, 1.0000e+00,
        1.0000e+00, 0.0000e+00, 1.6000e+01, 0.0000e+00],
       [5.7700e+02, 6.7500e+01, 3.9000e+01, 1.0170e+03, 1.0000e+00,
        1.0000e+00, 0.0000e+00, 1.6000e+01, 0.0000e+00],
       [5.8700e+02, 6.7800e+01, 3.9000e+01, 1.0169e+03, 1.0000e+00,
        1.0000e+00, 0.0000e+00, 1.6000e+01, 0.0000e+00],
       [5.9600e+02, 6.7900e+01, 3.8000e+01, 1.0170e+03, 1.0000e+00,
        1.0000e+00, 0.0000e+00, 1.6000e+01, 0.0000e+00],
       [5.7700e+02, 6.8200e+01, 3.8000e+01, 1.0170e+03, 1.0000e+00,
        1.0000e+00, 0.0000e+00, 1.6000e+01, 0.0000e+00],


In [43]:
windows_df['Chen'][0]

Unnamed: 0,Carbon dioxide(ppm),Temperature(°F),Relative humidity(%),Atmospheric pressure(hPa),door1,door2,hvac,subject_count,lecturer
0,595.0,65.8,40.0,1017.0,1.0,1.0,1.0,10.0,0.0
1,597.0,66.4,39.0,1016.9,1.0,1.0,1.0,10.0,0.0
2,584.0,66.8,39.0,1016.9,1.0,1.0,0.0,16.0,0.0
3,611.0,67.2,39.0,1017.0,1.0,1.0,0.0,16.0,0.0
4,577.0,67.5,39.0,1017.0,1.0,1.0,0.0,16.0,0.0
...,...,...,...,...,...,...,...,...,...
71,543.0,71.9,37.0,1017.5,1.0,1.0,1.0,18.0,0.0
72,545.0,72.1,36.0,1017.5,1.0,1.0,1.0,18.0,0.0
73,540.0,72.4,37.0,1017.5,1.0,1.0,1.0,18.0,0.0
74,515.0,72.9,37.0,1017.5,1.0,1.0,1.0,18.0,0.0


# Numpy Tensor

In [44]:
# Initialize a list to store the 3D arrays and labels
X_list = []
y_list = []

for label, windows_list in windows.items():
    for window in windows_list:
        # Convert each DataFrame to a NumPy array and append to the list
        X_list.append(window)
        # Append the corresponding label to the label list
        y_list.append(label)

# Convert the list of 3D arrays to a single 3D array (tensor)
X = np.array(X_list)

# Convert the label list to a NumPy array
y = np.array(y_list)

# Print the shapes of the resulting arrays
print(f"Shape of X: {X.shape}")
print(f"Shape of y: {y.shape}")


Shape of X: (25, 76, 9)
Shape of y: (25,)


In [45]:
# Drop the last column from the 3D array X
X = X[:, :, :-1]

# Verify the new shape of X
print(f"New shape of X: {X.shape}")


New shape of X: (25, 76, 8)


In [46]:
X_reshaped = X.reshape(-1, X.shape[-1])
print(f"Reshaped X to: {X_reshaped.shape}")
X_original_shape = X_reshaped.reshape(X.shape)
print(f"Reshaped back to original shape: {X_original_shape.shape}")


Reshaped X to: (1900, 8)
Reshaped back to original shape: (25, 76, 8)


In [47]:
y

array(['Chen', 'Chen', 'Chen', 'Chen', 'Chen', 'Chen', 'Chen', 'Chen',
       'Chen', 'Chen', 'Chen', 'Chen', 'Song', 'Song', 'Song', 'Song',
       'Song', 'Song', 'Song', 'Song', 'Song', 'Song', 'Song', 'Song',
       'Song'], dtype='<U4')

In [48]:
y[0],X[0]

('Chen',
 array([[5.9500e+02, 6.5800e+01, 4.0000e+01, 1.0170e+03, 1.0000e+00,
         1.0000e+00, 1.0000e+00, 1.0000e+01],
        [5.9700e+02, 6.6400e+01, 3.9000e+01, 1.0169e+03, 1.0000e+00,
         1.0000e+00, 1.0000e+00, 1.0000e+01],
        [5.8400e+02, 6.6800e+01, 3.9000e+01, 1.0169e+03, 1.0000e+00,
         1.0000e+00, 0.0000e+00, 1.6000e+01],
        [6.1100e+02, 6.7200e+01, 3.9000e+01, 1.0170e+03, 1.0000e+00,
         1.0000e+00, 0.0000e+00, 1.6000e+01],
        [5.7700e+02, 6.7500e+01, 3.9000e+01, 1.0170e+03, 1.0000e+00,
         1.0000e+00, 0.0000e+00, 1.6000e+01],
        [5.8700e+02, 6.7800e+01, 3.9000e+01, 1.0169e+03, 1.0000e+00,
         1.0000e+00, 0.0000e+00, 1.6000e+01],
        [5.9600e+02, 6.7900e+01, 3.8000e+01, 1.0170e+03, 1.0000e+00,
         1.0000e+00, 0.0000e+00, 1.6000e+01],
        [5.7700e+02, 6.8200e+01, 3.8000e+01, 1.0170e+03, 1.0000e+00,
         1.0000e+00, 0.0000e+00, 1.6000e+01],
        [5.8600e+02, 6.8400e+01, 3.8000e+01, 1.0170e+03, 1.0000e+00,
  