In [1]:
import pandas as pd
import numpy as np

In [2]:
from tqdm import tqdm

def interp_data(input_csv):
    data = pd.read_csv(input_csv, index_col=[0], header=[0, 1, 2])
    results = []
    
    for col in tqdm(data.columns.get_level_values(0).unique()):
        # Interpolate means
        col_data = data.loc[:, (col, 'mask')] \
                                        .replace({0: np.nan}) \
                                        .multiply(data.loc[:, (col, 'mean')]) \
                                        .interpolate(axis=1, limit_area='inside') \
                                        .fillna(method='ffill', axis=1) \
                                        .fillna(method='bfill', axis=1) \
                                        .fillna(data.loc[:, (col, 'mean')])
        col_data.columns = [f'{col};mean;{i}' for i in range(24)]
        
        # Add mask data
        mask_data = data.loc[:, (col, 'mask')]
        mask_data.columns = [f'{col};mask;{i}' for i in range(24)]
        
        results.append(col_data)
        results.append(mask_data)
        
    return np.asarray(pd.concat(results, axis=1)).reshape((data.shape[0], -1, 24))

In [3]:
full_train_data_ordered = interp_data('X_train.csv')
np.save('full_train_data_ordered.npy', full_train_data_ordered)

full_valid_data_ordered = interp_data('X_valid.csv')
np.save('full_valid_data_ordered.npy', full_valid_data_ordered)

full_test_data_ordered = interp_data('X_test.csv')
np.save('full_test_data_ordered.npy', full_test_data_ordered)

100%|█████████████████████████████████████████████████████████████████████████████████| 104/104 [00:45<00:00,  2.27it/s]
100%|█████████████████████████████████████████████████████████████████████████████████| 104/104 [00:05<00:00, 20.14it/s]
100%|█████████████████████████████████████████████████████████████████████████████████| 104/104 [00:10<00:00,  9.61it/s]
