In [None]:
import os
import gc
import glob
import sklearn
import numpy as np
import pandas as pd

from tqdm.notebook import tqdm
from matplotlib import pyplot as plt

from joblib import parallel_backend
from joblib import Parallel, delayed

from tensorflow.keras.layers import Conv1D

In [None]:
PATH = './drive/MyDrive/SisFall_Preprocessed'

target = dict(pd.read_csv(os.path.join(PATH, 'target_values.csv')).values)
users = pd.read_csv(os.path.join(PATH, 'user_details.csv'))

In [None]:
def collect(data):
  global_feats.append(data)

def extract_aggregations(file_path):
  feats = []
  df = pd.read_csv(file_path, 
                   header=None, sep = ' ',
                   names = [f'C{i}' for i in range(9)])
  
  user_id = file_path.split('/')[-1]
  feats.append(user_id)

  for c in df.columns:
    feats.append(np.sum(df[c]))
    feats.append(np.mean(df[c]))
    feats.append(np.std(df[c]))

  file_name = file_path.split('/')[-1]
  feats.append(target[file_name])
  
  return feats

In [None]:
global_feats = []
columns = [f'C{i}' for i in range(9)]
column_names = []
column_names.append('user_test')
for c in columns:
  column_names.append(f'sum_{c}')
  column_names.append(f'mean_{c}')
  column_names.append(f'std_{c}')
column_names.append('target')

files = [x for x in tqdm(glob.glob(f'{PATH}/*/*')) if 'txt' in x]

with parallel_backend('threading', n_jobs=20):
  global_feats = Parallel()(delayed(extract_aggregations)(f) for f in tqdm(files[:]))

  0%|          | 0/4447 [00:00<?, ?it/s]

  0%|          | 0/4447 [00:00<?, ?it/s]

In [None]:
global_df = pd.DataFrame(global_feats, columns=column_names)
global_df.head()

Unnamed: 0,user_test,sum_C0,mean_C0,std_C0,sum_C1,mean_C1,std_C1,sum_C2,mean_C2,std_C2,sum_C3,mean_C3,std_C3,sum_C4,mean_C4,std_C4,sum_C5,mean_C5,std_C5,sum_C6,mean_C6,std_C6,sum_C7,mean_C7,std_C7,sum_C8,mean_C8,std_C8,target
0,D12_SA01_R04.txt,-305202.0,-127.1675,121.615248,-397540.0,-165.641667,94.263054,2517.0,1.04875,25.960493,-68428.0,-28.511667,111.05311,60237.0,25.09875,157.082555,-11128.0,-4.636667,336.317063,-1185583.0,-493.992917,471.549925,-1537463.0,-640.609583,369.52643,314525.0,131.052083,95.799792,0
1,D14_SA01_R05.txt,292954.0,122.064167,121.333521,-737.0,-0.307083,31.299831,340142.0,141.725833,92.096443,-116420.0,-48.508333,149.153422,36555.0,15.23125,435.055552,8792.0,3.663333,117.404773,1128874.0,470.364167,463.540138,-6551.0,-2.729583,123.397997,1671455.0,696.439583,374.545569,0
2,D14_SA01_R02.txt,-258286.0,-107.619167,132.607431,-56660.0,-23.608333,23.74856,310273.0,129.280417,107.104615,-103645.0,-43.185417,155.422136,41451.0,17.27125,502.259205,-21041.0,-8.767083,149.830033,-999109.0,-416.295417,517.39227,-221131.0,-92.137917,93.802002,1568284.0,653.451667,428.172743,0
3,D17_SA01_R04.txt,-109386.0,-21.8772,24.578749,-1161355.0,-232.271,18.500766,97741.0,19.5482,78.004944,-30814.0,-6.1628,268.670483,532264.0,106.4528,399.896366,-907.0,-0.1814,126.266457,-468355.0,-93.671,94.672133,-4429972.0,-885.9944,71.284085,885057.0,177.0114,308.837236,0
4,F02_SA01_R04.txt,72877.0,24.292333,71.92373,-450492.0,-150.164,177.814292,153816.0,51.272,180.494596,163172.0,54.390667,637.437903,497076.0,165.692,516.06929,10970.0,3.656667,330.487391,44147.0,14.715667,277.981864,-1700732.0,-566.910667,653.473849,1112249.0,370.749667,711.701931,1


In [None]:
global_df.to_csv('./drive/MyDrive/SisFall_Preprocessed/user_compressed_feats.csv',index=False)