In [1]:
import pandas as pd
import os
import numpy as np

# func 1, merge two CSVs with the same structure
def merge_csv(path1, path2, new_file_name):
    df1 = pd.read_csv(path1)
    df2 = pd.read_csv(path2)
    merged_df = pd.concat([df1, df2])
    merged_df.to_csv(new_file_name, index=False)
    
    
# func 2, spilt a CSV to n equal length sub CSVs, and store them in a new folder
def split_csv_equal(path, target_dir, n):
    df = pd.read_csv(path)
    rows_per_file = len(df) // n
    if len(df) % n != 0:
        rows_per_file += 1
    if not os.path.exists(target_dir):
        os.makedirs(target_dir)
    for i in range(n):
        start_row = i * rows_per_file
        end_row = start_row + rows_per_file
        df_part = df.iloc[start_row:end_row]
        df_part.to_csv(os.path.join(target_dir, f'split_{i+1}.csv'), index=False)
        
# func 2, spilt a CSV to random length sub CSVs, and store them in a new folder        
def split_csv_randomly(path, target_dir, n):
    df = pd.read_csv(path)
    total_rows = len(df)
    avg_rows = total_rows / n
    min_rows = 0.6 * avg_rows
    max_rows = 1.6 * avg_rows
    
    if not os.path.exists(target_dir):
        os.makedirs(target_dir)

    start_idx = 0
    for i in range(n):
        if i == n - 1:
            end_idx = total_rows
        else:
            rows_for_file = np.random.randint(min_rows, max_rows + 1)
            end_idx = start_idx + rows_for_file
            if end_idx > total_rows:
                end_idx = total_rows
                break

        df_part = df.iloc[start_idx:end_idx]
        df_part.to_csv(os.path.join(target_dir, f'split_{i + 1}.csv'), index=False)
        
        start_idx = end_idx
        
        
def load_data(path, averaging=True, window=2):

    data = np.genfromtxt(path, delimiter=',', names=True)
    x = data['x']
    y = data['y']
    z = data['z']
    data = np.concatenate((x,y,z), axis=0).reshape(3,-1)
    
    if averaging == True:
        n, T = data.shape
        averaged_data = np.zeros((n, T))
        for t in range(T):
            start_index = max(0, t - window + 1)
            end_index = min(T, t + 1)
            averaged_data[:, t] = np.mean(data[:, start_index:end_index], axis=1)
        return averaged_data
    
    else:
        return data

In [5]:
split_csv_randomly('run_1.csv', 'running', 100)

In [69]:
standing_path = 'standing/'
for root, dirs, files in os.walk(standing_path):
    for file in files:
        single_csv = os.path.join(root, file)
        a = load_data(single_csv, averaging=True, window=2)
c = (a, 1)


1