In [65]:
#train

import os
import pandas as pd
from glob import glob


directory_path = 'data/537_combine_result_tables/'
output_file = 'train_final_results.csv'


def process_files(directory, prefix):
    files = glob(os.path.join(directory, f'{prefix}_*.csv'))
    merged_df = None
    for file in files:
        df = pd.read_csv(file).drop('score', axis=1)
        if merged_df is None:
            merged_df = df
        else:
            merged_df = pd.merge(merged_df, df, on='username', how='outer')
    score_columns = [col for col in merged_df.columns if col != 'username']
    merged_df['score'] = merged_df[score_columns].sum(axis=1, skipna=True)
    return merged_df


def process_all_files(directory):
    all_files = glob(os.path.join(directory, '*.csv'))
    prefixes = sorted(set('_'.join(os.path.basename(f).split('_')[:2]) for f in all_files))
    result_dfs = []
    for prefix in prefixes[1:]:
        result_dfs.append(process_files(directory, prefix))
    final_result = pd.concat(result_dfs, ignore_index=True)
    final_result = final_result.sort_values(by=['score', 'username'], ascending=[False, True]).reset_index(drop=True)
    return final_result


final_df = process_all_files(directory_path + 'train')
final_df.to_csv(directory_path + output_file, index=False, float_format='%.0f')
print(f'The file "{output_file}" has been successfully written to "{directory_path}".')

The file "train_final_results.csv" has been successfully written to "data/537_combine_result_tables/".


In [None]:
#test

import os
import pandas as pd
from glob import glob


directory_path = 'data/537_combine_result_tables/'
output_file = 'test_final_results.csv'


def process_files(directory, prefix):
    files = glob(os.path.join(directory, f'{prefix}_*.csv'))
    merged_df = None
    for file in files:
        df = pd.read_csv(file).drop('score', axis=1)
        if merged_df is None:
            merged_df = df
        else:
            merged_df = pd.merge(merged_df, df, on='username', how='outer')
    score_columns = [col for col in merged_df.columns if col != 'username']
    merged_df['score'] = merged_df[score_columns].sum(axis=1, skipna=True)
    return merged_df


def process_all_files(directory):
    all_files = glob(os.path.join(directory, '*.csv'))
    prefixes = sorted(set('_'.join(os.path.basename(f).split('_')[:2]) for f in all_files))
    result_dfs = []
    for prefix in prefixes:
        result_dfs.append(process_files(directory, prefix))
    final_result = pd.concat(result_dfs, ignore_index=True)
    final_result = final_result.sort_values(by=['score', 'username'], ascending=[False, True]).reset_index(drop=True)
    return final_result


final_df = process_all_files(directory_path + 'test')
final_df.to_csv(directory_path + output_file, index=False, float_format='%.0f')
print(f'The file "{output_file}" has been successfully written to "{directory_path}".')