In [7]:
import os
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
import joblib

def read_csv_files(directory):
    data_frames = {}
    for filename in os.listdir(directory):
        if filename.endswith('.csv'):
            df = pd.read_csv(os.path.join(directory, filename))
            parts = filename.split('_')
            # parts[0] = which data, parts[1] = who, parts[2] = difficulty
            file_type = parts[0]
            participant = parts[-2] if len(parts) > 2 else 'unknown'
            difficulty = parts[-1].split('.')[0] if len(parts) > 1 else 'unknown'
            if participant == 'participant1'and not file_type == 'E4 ACC data':
                key = (participant, difficulty, file_type)
                data_frames[key] = df
                
    return data_frames

def preprocess_data(df):
    idx = df[df['Participant Name'] == 'game start'].index[0]
    df = df.drop(index=df.index[:idx])
    if 'E4 TMP' in df.columns:
        df = df.drop(columns = ['E4 TMP'])
    elif 'E4 BVP' in df.columns:
        df = df.drop(columns = ['E4 BVP'])
    
    return df

# 1초 단위로 나눈 dataframe을 받아서 1초 단위로 달라지는 10초짜리 프레임을 가진 데이터 프레임 만들고 arithmetic feature 추가
def make_value(df):
    column_name = df.columns.tolist()
    column_name.remove('Participant Name')
    column_name.remove('UNIX Time')
    column_name_result = []
    for i in column_name:
        column_name_result.extend([f'{i} 1', f'{i} 2', f'{i} 3', f'{i} 4', f'{i} 5', f'{i} 6', f'{i} 7', f'{i} 8', f'{i} 9', f'{i} 10'])
        column_name_result.extend([f'{i} Mean', f'{i} Min', f'{i} Max', f'{i} Std'])
        
    data_frame = pd.DataFrame(columns = column_name_result)
    
    df = df.drop(columns = ['Participant Name', 'UNIX Time'])
    
    x = 0
    while x + 10 < len(df):
        new_row = {}
        for i, col in enumerate(df.columns):
            arith = []
            for j in range(x, x + 10):
                new_row[f'{col} {j - x + 1}'] = df.iloc[j, i]
                arith.append(df.iloc[j, i])
            new_row[f'{col} Mean'] = np.mean(arith)
            new_row[f'{col} Min'] = np.min(arith)
            new_row[f'{col} Max'] = np.max(arith)
            new_row[f'{col} Std'] = np.std(arith)
        x += 1
        data_frame.loc[len(data_frame)] = new_row
    
    return data_frame

# 1초 단위로 나눈 dataframe 만들기
def process_data(df):
    time_window = 1
    df = preprocess_data(df)
    
    data_frame = pd.DataFrame(columns = df.columns.tolist())
    
    idx_before = df[df['Participant Name'] == 'game start'].index[0] + 1
    
    while idx_before + 1 < len(df):
        time = df.loc[idx_before + 1, 'UNIX Time']
        idx_range = df[(df['UNIX Time'] >= time) & (df['UNIX Time'] < time + time_window)].index
        if len(idx_range) == 0:
            break
    
        value = df.loc[idx_before + 1 : idx_range[-1]]
        value = value.median().to_dict()
        
        idx_before = idx_range[-1]

        data_frame.loc[len(data_frame)] = value
    
    result = make_value(data_frame)
    return result
    
if __name__ == "__main__":
    directory = r'C:\Users\USER\Downloads\CSV'  # Use raw string literal
    data_frames = read_csv_files(directory)
    df_data = []
    difficulty_data = []
    
    for (participant, difficulty, file_type), df in data_frames.items():
        df = process_data(df)
        df = df.reset_index(drop=True)
        df_data.append(df)
        difficulty_data.append(difficulty)
    
    # Difficulty = [difficult, easy, moderate]
    
    df_difficult = pd.concat([df_data[0], df_data[3], df_data[6], df_data[9]], axis = 1)
    df_easy = pd.concat([df_data[1], df_data[4], df_data[7], df_data[10]], axis = 1)
    df_moderate = pd.concat([df_data[2], df_data[5], df_data[8], df_data[11]], axis = 1)
    
    df_easy['Difficulty'] = 'easy'
    df_moderate['Difficulty'] = 'moderate'
    df_difficult['Difficulty'] = 'difficult'
    
    df_all = pd.concat([df_easy, df_moderate, df_difficult], axis = 0)
    
    model = RandomForestClassifier()
    x = df_all.drop(columns = ['Difficulty'])
    x.fillna(x.mean(), inplace=True)
    y = df_all[['Difficulty']]
    
    x_list = [x.iloc[i].tolist() for i in range(len(x))]
    y_list = [y.iloc[i].tolist() for i in range(len(y))]
    
    x_train = np.array(x_list)
    y_train = np.array(y_list)
    
    model.fit(x_train, y_train)
    
    result = model.predict(np.array(x_list[0]).reshape(1, -1))
    
    print(result)
    
    joblib_file = "random_forest_model.joblib"
    joblib.dump(model, joblib_file)
    print(f"Model saved as {joblib_file}")

  return fit_method(estimator, *args, **kwargs)


['easy']
Model saved as random_forest_model.joblib
