In [None]:
def process_file(filename, dirname):
    """
    Processes a single Parquet file to compute summary statistics.
    Includes only one instance of 'count' across all columns.
    """
    df = pd.read_parquet(os.path.join(dirname, filename, 'part-0.parquet'))
    
    #Step isn't very relevant, accomplishes similar to count, but we include an instance of count
    if 'step' in df.columns:
        df.drop('step', axis=1, inplace=True)
    
    # Compute summary statistics
    stats = df.describe()
    
    # Extract count only once, redundant counts otherwise
    count = stats.iloc[0, :].mean()
    stats = stats.iloc[1:, :].values.reshape(-1)  # Exclude counts

    stats = [count] + stats.tolist()
    
    return stats, filename.split('=')[1]

def load_time_series(dirname):
    """
    Loads all time-series files and computes modified summary statistics for each.
    """
    ids = os.listdir(dirname)
    
    with ThreadPoolExecutor() as executor:
        results = list(tqdm(executor.map(lambda fname: process_file(fname, dirname), ids), total=len(ids)))
    
    stats, indexes = zip(*results)
    
    #stats df
    df = pd.DataFrame(stats, columns=[f"stat_{i}" for i in range(len(stats[0]))])
    df['id'] = indexes
    return df


train_csv = pd.read_csv('/kaggle/input/child-mind-institute-problematic-internet-use/train.csv')
test_csv = pd.read_csv('/kaggle/input/child-mind-institute-problematic-internet-use/test.csv')

train_ts = load_time_series("/kaggle/input/child-mind-institute-problematic-internet-use/series_train.parquet")
test_ts = load_time_series("/kaggle/input/child-mind-institute-problematic-internet-use/series_test.parquet")

# If we want to just build model for parquet on sii
# train = pd.merge(train_ts, train_csv[['id', 'sii']], on='id', how='left')
# test = test_ts

train = pd.merge(train_ts, train_csv, on = 'id', how = 'left')
test = pd.merge(test_ts, test_csv, on='id', how = 'left')


# Print sample outputs
print("Training Dataset:")
print(train.head())

print("\nTesting Dataset:")
print(test.head())
