In [201]:
import pandas as pd
import numpy as np

# 1. Load CSV full_sensor_data to DataFrame

In [202]:
history = pd.read_csv("data/full_sensor_data.csv", parse_dates=True, index_col="UTC time")

## 1.1. Data group by 1Day

In [203]:
history_aggegated_1D = history.groupby(pd.Grouper(freq='1D')).mean()

### Creating MultiIndex

In [204]:
history_aggegated_1D.columns = pd.MultiIndex.from_tuples([(c.split('_')[0], c.split('_')[1]) for c in history_aggegated_1D.columns])

### Merging data from pairs of sensors with the similar locations

In [205]:
sensor_pairs = [('3','140'),('227','857'),('178','713'),('142','895'),('187','808'),('211','622')]
combinedes = pd.DataFrame()
for pair in sensor_pairs:
    combined = history_aggegated_1D[pair[0]].combine_first(history_aggegated_1D[pair[1]])
    combined.columns = pd.MultiIndex.from_product([[pair[0]+'_'+pair[1]],combined.columns])
    combinedes = pd.concat([combinedes,combined], axis=1)
history_aggegated_1D = pd.concat([history_aggegated_1D, combinedes], axis=1)
history_aggegated_1D = history_aggegated_1D.drop(columns=['3','140','227','857','178','713','142','895','187','808','211','622'], level=0)

### 1.1.1. Imputing using mean

In [206]:
filled_mean_history_aggegated_1D = history_aggegated_1D.fillna(history_aggegated_1D.mean())

### 1.1.2. Imputing using the meadian

In [207]:
filled_median_history_aggegated_1D = history_aggegated_1D.fillna(history_aggegated_1D.median())

### 1.1.3 Imputing using the rolling mean + imputing using mean for Nan for which rolling mean doesn't work

In [208]:
filled_rolling_mean_history_aggegated_1D = history_aggegated_1D.fillna(history_aggegated_1D.rolling(24, min_periods=1).mean())
filled_rolling_mean_history_aggegated_1D = filled_rolling_mean_history_aggegated_1D.fillna(filled_rolling_mean_history_aggegated_1D.mean())

### 1.1.4 Imputing using the rolling median + imputing using median for Nan for which rolling median doesn't work

In [209]:
filled_rolling_median_history_aggegated_1D = history_aggegated_1D.fillna(history_aggegated_1D.rolling(24, min_periods=1).median())
filled_rolling_median_history_aggegated_1D = filled_rolling_median_history_aggegated_1D.fillna(filled_rolling_median_history_aggegated_1D.median())

### 1.1.5 Imputing using the interpolation with linear method

In [210]:
filled_interpolation_linear_history_aggegated_1D = history_aggegated_1D.interpolate(method='linear')

### 1.1.6 Imputing using the interpolation with time method

In [211]:
filled_interpolation_time_history_aggegated_1D = history_aggegated_1D.interpolate(method='time')

### 1.1.7 Imputing using the interpolation with quadratic method

In [212]:
filled_interpolation_quadratic_history_aggegated_1D = history_aggegated_1D.interpolate(method='quadratic')

## 1.2. Data group by 2Day

In [213]:
history_aggegated_2D = history.groupby(pd.Grouper(freq='2D')).mean()

### Creating MultiIndex

In [214]:
history_aggegated_2D.columns = pd.MultiIndex.from_tuples([(c.split('_')[0], c.split('_')[1]) for c in history_aggegated_2D.columns])

### Merging data from pairs of sensors with the similar locations

In [215]:
sensor_pairs = [('3','140'),('227','857'),('178','713'),('142','895'),('187','808'),('211','622')]
combinedes = pd.DataFrame()
for pair in sensor_pairs:
    combined = history_aggegated_2D[pair[0]].combine_first(history_aggegated_2D[pair[1]])
    combined.columns = pd.MultiIndex.from_product([[pair[0]+'_'+pair[1]],combined.columns])
    combinedes = pd.concat([combinedes,combined], axis=1)
history_aggegated_2D = pd.concat([history_aggegated_2D, combinedes], axis=1)
history_aggegated_2D = history_aggegated_2D.drop(columns=['3','140','227','857','178','713','142','895','187','808','211','622'], level=0)

### 1.2.1. Imputing using mean

In [216]:
filled_mean_history_aggegated_2D = history_aggegated_2D.fillna(history_aggegated_1D.mean())

### 1.2.2. Imputing using the meadian

In [217]:
filled_median_history_aggegated_2D = history_aggegated_2D.fillna(history_aggegated_2D.median())

### 1.2.3 Imputing using the rolling mean + imputing using mean for Nan for which rolling mean doesn't work

In [218]:
filled_rolling_mean_history_aggegated_2D = history_aggegated_2D.fillna(history_aggegated_2D.rolling(24, min_periods=1).mean())
filled_rolling_mean_history_aggegated_2D = filled_rolling_mean_history_aggegated_2D.fillna(filled_rolling_mean_history_aggegated_2D.mean())

### 1.2.4 Imputing using the rolling median + imputing using median for Nan for which rolling median doesn't work

In [219]:
filled_rolling_median_history_aggegated_2D = history_aggegated_2D.fillna(history_aggegated_2D.rolling(24, min_periods=1).median())
filled_rolling_median_history_aggegated_2D = filled_rolling_median_history_aggegated_2D.fillna(filled_rolling_median_history_aggegated_2D.median())

### 1.2.5 Imputing using the interpolation with linear method

In [220]:
filled_interpolation_linear_history_aggegated_2D = history_aggegated_2D.interpolate(method='linear')

### 1.2.6 Imputing using the interpolation with time method

In [221]:
filled_interpolation_time_history_aggegated_2D = history_aggegated_2D.interpolate(method='time')

### 1.2.7 Imputing using the interpolation with quadratic method

In [222]:
filled_interpolation_quadratic_history_aggegated_2D = history_aggegated_2D.interpolate(method='quadratic')