In [64]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings(action='ignore')

sns.set_theme(style="whitegrid", palette="Set2")

# 한글 설정
plt.rc("font", family="AppleGothic")
plt.rcParams['axes.unicode_minus'] = False   

## data load

In [89]:
train_label = pd.read_csv("data/processed/train_label.csv")
test_label = pd.read_csv("data/processed/test_label.csv")

activity = pd.read_csv("./data/processed/activity.csv", parse_dates=['activity_date'])
met_class = pd.read_csv("./data/processed/log/met_class.csv", parse_dates=['activity_date'])
met_log = pd.read_csv("./data/processed/log/met_log.csv", parse_dates=['activity_date'])

sleep = pd.read_csv("./data/processed/sleep.csv", parse_dates=['sleep_bedtime_start','sleep_bedtime_end'])
sleep_log = pd.read_csv("./data/processed/log/sleep_log.csv", parse_dates=['sleep_bedtime_start'])

In [90]:
# train set
train_activity = activity.merge(train_label, how="inner", on="EMAIL")
train_metclass = met_class.merge(train_label, how="inner", on="EMAIL")
train_metlog = met_log.merge(train_label, how="inner", on="EMAIL")
train_sleep = sleep.merge(train_label, how="inner", on="EMAIL")

# test set
test_activity = activity.merge(test_label, how="inner", on="EMAIL")
test_metclass = met_class.merge(test_label, how="inner", on="EMAIL")
test_metlog = met_log.merge(test_label, how="inner", on="EMAIL")
test_sleep = sleep.merge(test_label, how='inner', on=["EMAIL"])

In [91]:
print("Train Activity: ", train_activity.shape)
print("Train MET Class: ", train_metclass.shape)
print("Train MET Log: ", train_metlog.shape)
print("="*60)
print("Test Activity: ", test_activity.shape)
print("Test MET Class: ", test_metclass.shape)
print("Test MET Log: ", test_metlog.shape)

Train Activity:  (9705, 27)
Train MET Class:  (2780866, 5)
Train MET Log:  (13975200, 5)
Test Activity:  (2478, 27)
Test MET Class:  (711217, 5)
Test MET Log:  (3568320, 5)


## activity 이상치 제거

In [68]:
import pandas as pd

def is_outlier(row):
    """이상치를 판별하는 함수"""
    return (
        10000 <= row['activity_steps'] <= 20000 and
        row['activity_total'] <= 2000 and
        row['activity_cal_active'] <= 100 and
        row['activity_daily_movement'] <= 1000
    )

def calculate_median(train_data, col):
    """이상치가 아닌 데이터의 중앙값을 계산"""
    return train_data.loc[~train_data['is_outlier'], col].median()

def get_low_steps_group(train_data):
    """낮은 activity_steps 그룹을 추출"""
    return train_data[(450 < train_data['activity_steps']) & (train_data['activity_steps'] <= 700)]

def remove_outliers(data, reference_data=None):
    """
    이상치를 탐지하고 적절한 값으로 대체하는 함수
    - data: 이상치를 처리할 데이터셋 (train/test)
    - reference_data: 중앙값 및 대체값을 계산할 기준 데이터셋 (train 사용 권장)
    """
    df = data.copy()
    
    # 이상치 탐지
    df['is_outlier'] = df.apply(is_outlier, axis=1)

    # 중앙값 및 평균값 계산을 위한 기준 데이터 선택
    ref = reference_data if reference_data is not None else df
    low_steps = get_low_steps_group(ref)
    
    # 이상치 대체값 계산 (낮은 steps 그룹의 평균)
    replace_value = int(low_steps['activity_steps'].mean()) if not low_steps.empty else df['activity_steps'].median()

    # 이상치 처리
    df.loc[df['is_outlier'], 'activity_steps'] = replace_value
    df.drop(columns=['is_outlier'], inplace=True)  # 이상치 여부 컬럼 제거

    return df


## activity met log 전처리

In [69]:
# import pandas as pd
# import numpy as np

def remove_high_zero_ratio_entries(df, threshold=0.8):
    """ EMAIL, activity_date별 MET 0 비율이 일정 비율(threshold) 이상이면 제거 """
    zero_ratio = df.groupby(["EMAIL", "activity_date"])["met"].apply(lambda x: (x == 0).mean())
    remove_idx = zero_ratio[zero_ratio >= threshold].index
    df = df[~df.set_index(["EMAIL", "activity_date"]).index.isin(remove_idx)]
    return df, remove_idx

def correct_met_values(df, remove=True):
    """
    MET < 1 값 보정 처리 함수
    - 0이 80% 이상이면 해당 그룹 삭제
    - 0.9 → 1로 변경
    - 0.9 미만 → 선형 보간
    - 보간 후 남은 결측값 → 1로 변경
    """
    df = df.copy()
    df["met_interpolated"] = df["met"]  # 보정용 컬럼 추가
    
    # MET 0 비율이 80% 이상인 데이터 삭제
    if remove:
        df, removed_entries = remove_high_zero_ratio_entries(df)
        print("제거된 데이터 그룹:", removed_entries)

    # 0.9 → 1 변환
    df["met_interpolated"] = df["met_interpolated"].replace(0.9, 1)

    # MET < 0.9 값 → NaN 처리 후 선형 보간
    df.loc[df['met_interpolated'] < 0.9, 'met_interpolated'] = np.nan
    df = df.groupby(["EMAIL", "activity_date"]).apply(
        lambda x: x.interpolate(method="linear", limit_direction="both", limit_area="inside", axis=0)
    ).reset_index(drop=True)

    # 보간 후 남은 NaN 값 → 1로 대체
    df.loc[df['met_interpolated'].isna(), 'met_interpolated'] = 1.0

    return df


## chronotype

In [70]:
def get_time_period(minutes):
    """ 분 단위 시간을 morning/daytime/evening/night로 매핑 """
    if 0 <= minutes <= 359:
        return "morning"
    elif 360 <= minutes <= 719:
        return "daytime"
    elif 720 <= minutes <= 1079:
        return "evening"
    else:
        return "night"

# import pandas as pd
# from time_utils import get_time_period

def compute_activity_pattern(df):
    """
    사람별 시간대별 평균 MET 및 최대 활동 시간대 계산
    - df: MET 로그 데이터 (train_metlog 등)
    - 반환값: activity_pattern (사람별 시간대별 평균 MET, max_activity_time 포함)
    """
    df["time_period"] = df["minutes_time"].apply(get_time_period)
    
    # 시간대별 평균 MET 계산
    activity_pattern = df.groupby(["EMAIL", "time_period"])["met_interpolated"].mean().unstack()
    
    # 각 사람별 최대 활동 시간대
    activity_pattern["max_activity_time"] = activity_pattern.idxmax(axis=1)

    return activity_pattern

def classify_activity_type(activity_pattern):
    """
    특정 시간대 비율이 일정 기준 이상이면 그 유형으로 분류
    - activity_pattern: compute_activity_pattern의 결과 데이터
    - 반환값: activity_pattern (activity_type 추가됨)
    """
    activity_pattern = activity_pattern.copy()
    
    # 날짜별 시간대 평균 MET 계산
    activity_pattern = activity_pattern.groupby(["EMAIL", "activity_date", "time_period"])["met_interpolated"].mean().unstack()
    
    # 특정 시간대 비율 계산
    activity_pattern["total_met"] = activity_pattern.sum(axis=1)
    activity_pattern["morning_ratio"] = (activity_pattern["morning"] + activity_pattern["daytime"]) / activity_pattern["total_met"]
    activity_pattern["evening_ratio"] = (activity_pattern["evening"] + activity_pattern["night"]) / activity_pattern["total_met"]

    # 활동 유형 분류
    activity_pattern["activity_type"] = "intermediate"
    activity_pattern.loc[activity_pattern["morning_ratio"] > 0.55, "activity_type"] = "morning"
    activity_pattern.loc[activity_pattern["evening_ratio"] > 0.55, "activity_type"] = "evening"

    return activity_pattern.reset_index()

def merge_activity_patterns(train_activity, activity_pattern, train_label):
    """
    활동 패턴을 train_activity 및 train_label 데이터와 병합
    - train_activity: 원본 활동 데이터
    - activity_pattern: classify_activity_type의 결과
    - train_label: 라벨 데이터
    - 반환값: 병합된 train_activity, train_label 데이터
    """
    train_activity = train_activity.merge(activity_pattern, how='inner', on=['EMAIL', 'activity_date'])
    
    # 사람별 대표적인 활동 유형 (가장 많은 유형 기준)
    pattern_gr = activity_pattern.groupby(['EMAIL', 'activity_type'])['EMAIL'].count().unstack().idxmax(axis=1).reset_index(name='chrono_3type')
    
    train_label = train_label.merge(pattern_gr, how='inner', on='EMAIL')

    return train_activity, train_label


## chronotype 계산 through Sleep data

In [71]:
import pandas as pd

def classify_chronotype(wake_time):
    """
    기상 시간을 기준으로 chronotype을 분류합니다.
    
    Parameters:
        wake_time (datetime.time): 기상 시간 (시간, 분 정보 포함)

    Returns:
        str: 'morning', 'intermediate', 'evening' 중 하나
    """
    if wake_time.hour < 6:  # 06:00 이전
        return "morning"
    elif 6 <= wake_time.hour < 9:  # 06:00~08:59
        return "intermediate"
    else:  # 09:00 이후
        return "evening"

def calculate_wakeup_time(row):
    """
    개인의 chronotype과 실제 기상 시간의 차이를 계산하여 기상시간 점수를 생성합니다.
    
    Parameters:
        row (pd.Series): sleep 데이터의 한 행 (chronotype, sleep_bedtime_end 포함)
    
    Returns:
        int: 기상 시간 점수 (이상적인 기상 시간과의 차이)
    """
    ideal_wake_times = {
        "morning": 6 * 60,  # 06:00 (360분)
        "intermediate": 7 * 60,  # 07:00 (420분)
        "evening": 9 * 60  # 09:00 (540분)
    }
    
    actual_wake_time = row["sleep_bedtime_end"].hour * 60 + row["sleep_bedtime_end"].minute
    ideal_wake_time = ideal_wake_times[row["chronotype"]]
    
    return abs(actual_wake_time - ideal_wake_time)

def assign_chronotype(df):
    """
    주어진 데이터프레임에서 각 사용자의 chronotype을 할당하고 wakeup 점수를 계산합니다.
    
    Parameters:
        df (pd.DataFrame): sleep 데이터 (sleep_bedtime_end 포함)
    
    Returns:
        pd.DataFrame: chronotype과 wakeup_time_score가 추가된 데이터프레임
    """
    df = df.copy()
    df["chronotype"] = df["sleep_bedtime_end"].apply(classify_chronotype)
    df["wakeup_time_score"] = df.apply(calculate_wakeup_time, axis=1)
    return df


## 개인별 수면적정시간

In [72]:
# import pandas as pd
# import numpy as np

# Chronotype별 적정 수면 시간 (초 단위)
CHRONOTYPE_SLEEP_GOAL = {
    "morning": 7 * 60 * 60,  # 7시간
    "intermediate": 7.5 * 60 * 60,  # 7시간 30분
    "evening": 8 * 60 * 60  # 8시간
}

def calculate_sleep_alignment(df):
    """
    Chronotype별 적정 수면 시간과 실제 수면 시간의 차이를 계산합니다.
    
    Parameters:
        df (pd.DataFrame): sleep 데이터 (chronotype, sleep_total 포함)
    
    Returns:
        pd.Series: sleep_alignment (초 단위) 컬럼
    """
    return df["sleep_total"] - df["chronotype"].map(CHRONOTYPE_SLEEP_GOAL)

def calculate_abnormal_wake_time(df):
    """
    비정상 기상 여부를 판별합니다 (평균 기상시간과 1.5표준편차 이상 차이).
    
    Parameters:
        df (pd.DataFrame): sleep 데이터 (평균 기상시간, 표준편차 포함)
    
    Returns:
        pd.Series: abnormal_wake_time (0 또는 1)
    """
    threshold = 1.5 * df["std_wake_time"]
    return np.where(np.abs(df["sleep_bedtime_end"].dt.hour - df["avg_wake_time"]) > threshold, 1, 0)

def process_sleep_data(df):
    """
    수면 데이터 전처리 및 분석을 수행합니다.
    
    Parameters:
        df (pd.DataFrame): sleep 데이터 (sleep_total, sleep_duration 포함)
    
    Returns:
        pd.DataFrame: 분석된 sleep 데이터
    """
    df = df.copy()
    
    # 수면 정렬도 (Sleep Alignment) 계산
    df["sleep_alignment_chronotype"] = calculate_sleep_alignment(df)
    df["sleep_alignment_chronotype_mins"] = df["sleep_alignment_chronotype"] / 60  # 초 -> 분 변환

    # 수면 시간 변환 (초 → 시간)
    df["sleep_duration_hours"] = df["sleep_duration"] / 60 / 60

    # 기상시간 및 취침시간 변환
    df["wake_time"] = df["sleep_bedtime_end"].dt.time
    df["bed_time"] = df["sleep_bedtime_start"].dt.time

    # 사람별 평균 수면 패턴 계산
    person_avg = df.groupby("EMAIL").agg(
        avg_wake_time=("sleep_bedtime_end", lambda x: x.dt.hour.mean()),  # 평균 기상시간
        avg_sleep_duration=("sleep_duration_hours", "mean"),  # 평균 수면시간
        std_wake_time=("sleep_bedtime_end", lambda x: x.dt.hour.std()),  # 기상시간 표준편차
        std_sleep_duration=("sleep_duration_hours", "std")  # 수면시간 표준편차
    ).reset_index()

    # 원본 데이터와 병합
    df = df.merge(person_avg, on="EMAIL", how="left")

    # Sleep Alignment 계산
    df["sleep_alignment"] = np.abs(df["sleep_duration_hours"] - df["avg_sleep_duration"])

    # 비정상 기상 여부 계산
    df["abnormal_wake_time"] = calculate_abnormal_wake_time(df)

    return df


In [73]:
def fill_na_with_user_median(df, column_name):
    """
    shift, moving average 계산으로 비어있는 앞의 값 평균으로 채우기 개별 사용자의 중앙값으로 채운다.
    """
    # 사용자별 평균 계산
    user_medians = df.groupby("EMAIL")[column_name].transform("median")

    # NaN 값을 해당 사용자의 중앙값으로 채움
    df[column_name].fillna(user_medians, inplace=True)
    return df

def generate_time_lag_features(df, column, lags=[2, 3], rolling_windows=[3, 7, 30]):
    """
    특정 컬럼(column)에 대해 Time Lag 변수와 이동 평균 변수를 생성하는 함수.

    Parameters:
    - df: DataFrame
    - column: 타겟 컬럼명 (예: 'deep_ratio_5h' 또는 'awake_ratio_5h')
    - lags: Time Lag을 만들 간격 리스트 (기본값: [2, 3])
    - rolling_windows: 이동 평균을 계산할 윈도우 크기 리스트 (기본값: [3, 7, 30])

    Returns:
    - 변형된 DataFrame (원본 df에 새로운 컬럼 추가)
    """
    for lag in lags:
        df[f"{column}_lag{lag}"] = df.groupby("EMAIL")[column].shift(lag)
        df = fill_na_with_user_median(df, f"{column}_lag{lag}")

    for window in rolling_windows:
        df[f"{column}_{window}d_avg"] = df.groupby("EMAIL")[column].transform(
            lambda x: x.rolling(window, min_periods=1).mean()
        )
        df = fill_na_with_user_median(df, f"{column}_{window}d_avg")
    return df

## Main.py

In [92]:
# train set
clean_train = remove_outliers(train_activity)
clean_train_metlog = correct_met_values(train_metlog)

activity_personal_type = compute_activity_pattern(clean_train_metlog)
activity_pattern = classify_activity_type(clean_train_metlog)
train_activity, train_label = merge_activity_patterns(clean_train, activity_pattern, train_label)

train_sleep = assign_chronotype(train_sleep)
train_sleep = process_sleep_data(train_sleep)


제거된 데이터 그룹: MultiIndex([('nia+411@rowan.kr', '2020-12-05')],
           names=['EMAIL', 'activity_date'])


In [93]:
# test set
# clean_test = remove_outliers(test_activity)
clean_test_metlog = correct_met_values(test_metlog, remove=False)

activity_personal_type = compute_activity_pattern(clean_test_metlog)
activity_pattern = classify_activity_type(clean_test_metlog)
test_activity, test_label = merge_activity_patterns(test_activity, activity_pattern, test_label)

test_sleep = assign_chronotype(test_sleep)
test_sleep = process_sleep_data(test_sleep)

In [94]:
print("Train Activity: ", train_activity.shape)
print("Train Sleep: ", train_sleep.shape)
print("="*30)
print("Test Activity: ", test_activity.shape)
print("Test Sleep: ", test_sleep.shape)

Train Activity:  (9704, 35)
Train Sleep:  (9705, 44)
Test Activity:  (2478, 35)
Test Sleep:  (2478, 44)


In [97]:
# 활동량 관련 컬럼의 이동평균 추가
rag_cols = ['activity_average_met', 'activity_inactive', 'total_met', 'morning_ratio', 'evening_ratio']

for col_name in rag_cols:
    train_activity = generate_time_lag_features(train_activity, col_name, lags=[2, 3], rolling_windows=[3, 7, 30])
    test_activity = generate_time_lag_features(test_activity, col_name, lags=[2, 3], rolling_windows=[3, 7, 30])

In [98]:
log_folder="data/processed"
# train_label_merged.to_csv(log_folder+"/train_label_merged.csv", index=False)
train_activity.to_csv(log_folder+"/train_activity_add.csv", index=False)
train_sleep.to_csv(log_folder+"/train_sleep_add.csv", index=False)
test_activity.to_csv(log_folder+"/test_activity_add.csv", index=False)
test_sleep.to_csv(log_folder+"/test_sleep_add.csv", index=False)

## Merge Data

In [99]:
train_activity = pd.read_csv("data/processed/train_activity_add.csv", parse_dates=['activity_date'])
train_sleep = pd.read_csv("data/processed/train_sleep_add.csv", parse_dates=['sleep_bedtime_start', 'sleep_bedtime_end'])
test_activity = pd.read_csv("data/processed/test_activity_add.csv", parse_dates=['activity_date'])
test_sleep = pd.read_csv("data/processed/test_sleep_add.csv", parse_dates=['sleep_bedtime_start', 'sleep_bedtime_end'])

sleep_train_add = pd.read_csv("data/processed/sleep_train_add.csv")
sleep_test_add = pd.read_csv("data/processed/sleep_test_add.csv")

In [100]:
print("Train Activity: ", train_activity.shape)
print("Train Sleep: ", train_sleep.shape)
print("="*30)
print("Test Activity: ", test_activity.shape)
print("Test Sleep: ", test_sleep.shape)
print("=s"*30)
print("Train Sleep: ", sleep_train_add.shape)
print("Test Sleep: ", sleep_test_add.shape)

Train Activity:  (9704, 60)
Train Sleep:  (9705, 44)
Test Activity:  (2478, 60)
Test Sleep:  (2478, 44)
=s=s=s=s=s=s=s=s=s=s=s=s=s=s=s=s=s=s=s=s=s=s=s=s=s=s=s=s=s=s
Train Sleep:  (9705, 22)
Test Sleep:  (2478, 22)


In [101]:
train_sleep['activity_date'] = train_sleep['sleep_bedtime_start'].dt.tz_localize(None).dt.normalize()
idx = train_sleep[(train_sleep['EMAIL']=='nia+411@rowan.kr')&(train_sleep['activity_date']=='2020-12-05')].index
train_sleep = train_sleep.drop(idx, axis=0).reset_index(drop=True)
sleep_train_add = sleep_train_add.drop(idx, axis=0).reset_index(drop=True)

In [102]:
print("Train Activity: ", train_activity.shape)
print("Train Sleep: ", train_sleep.shape)
print("="*30)
print("Test Activity: ", test_activity.shape)
print("Test Sleep: ", test_sleep.shape)
print("=s"*30)
print("Train Sleep: ", sleep_train_add.shape)
print("Test Sleep: ", sleep_test_add.shape)

Train Activity:  (9704, 60)
Train Sleep:  (9704, 45)
Test Activity:  (2478, 60)
Test Sleep:  (2478, 44)
=s=s=s=s=s=s=s=s=s=s=s=s=s=s=s=s=s=s=s=s=s=s=s=s=s=s=s=s=s=s
Train Sleep:  (9704, 22)
Test Sleep:  (2478, 22)


In [104]:

train_sleep_cols=['sleep_bedtime_start', 'sleep_bedtime_end', 'sleep_period_id',
       'sleep_duration', 'sleep_total', 'sleep_awake', 'sleep_rem',
       'sleep_light', 'sleep_deep', 'sleep_efficiency',
       'sleep_midpoint_at_delta', 'sleep_midpoint_time', 'sleep_onset_latency',
       'sleep_is_longest', 'sleep_breath_average', 'sleep_hr_average',
       'sleep_hr_lowest', 'sleep_restless', 'sleep_rmssd', 'sleep_score',
       'sleep_score_alignment', 'sleep_score_deep', 'sleep_score_disturbances',
       'sleep_score_efficiency', 'sleep_score_latency', 'sleep_score_rem',
       'sleep_score_total', 'sleep_temperature_delta',
       'sleep_temperature_deviation', 'chronotype',
       'wakeup_time_score', 'sleep_alignment_chronotype',
       'sleep_alignment_chronotype_mins', 'sleep_duration_hours', 'wake_time',
       'bed_time', 'avg_wake_time', 'avg_sleep_duration', 'std_wake_time',
       'std_sleep_duration', 'sleep_alignment', 'abnormal_wake_time','DIAG_NM']
sleep_train_add_cols=['sleep_consistency', 'sleep_consistency_30d_avg',
       'sleep_startpoint_at_delta_norm', 'sleep_midpoint_at_delta_norm',
       'sleep_end_hour', 'sleep_end_weekday', 'sleep_deep_ratio',
       'sleep_light_ratio_3d_avg', 'deep_ratio_5h', 'deep_ratio_5h_lag2',
       'deep_ratio_5h_3d_avg', 'deep_ratio_5h_30d_avg', 'awake_ratio_5h',
       'awake_ratio_5h_3d_avg', 'awake_longest_duration_lag2',
       'sleep_hr_lowest_lag2', 'hr_pattern_0', 'hr_pattern_3', 'hr_pattern_5',
       'time_to_reach_mean_min']
train_activity_cols=['EMAIL', 'activity_date', 'activity_non_wear', 'activity_total',
       'activity_cal_active', 'activity_cal_total', 'activity_daily_movement',
       'activity_steps', 'activity_inactivity_alerts', 'activity_rest',
       'activity_inactive', 'activity_low', 'activity_medium', 'activity_high',
       'activity_met_min_inactive', 'activity_met_min_low',
       'activity_met_min_medium', 'activity_met_min_high',
       'activity_average_met', 'activity_score',
       'activity_score_meet_daily_targets', 'activity_score_move_every_hour',
       'activity_score_recovery_time', 'activity_score_stay_active',
       'activity_score_training_frequency', 'activity_score_training_volume',
       'daytime', 'evening', 'morning', 'night', 'total_met',
       'morning_ratio', 'evening_ratio', 'activity_type',
       'activity_average_met_lag2', 'activity_average_met_lag3',
       'activity_average_met_3d_avg', 'activity_average_met_7d_avg',
       'activity_average_met_30d_avg', 'activity_inactive_lag2',
       'activity_inactive_lag3', 'activity_inactive_3d_avg',
       'activity_inactive_7d_avg', 'activity_inactive_30d_avg',
       'total_met_lag2', 'total_met_lag3', 'total_met_3d_avg',
       'total_met_7d_avg', 'total_met_30d_avg', 'morning_ratio_lag2',
       'morning_ratio_lag3', 'morning_ratio_3d_avg', 'morning_ratio_7d_avg',
       'morning_ratio_30d_avg', 'evening_ratio_lag2', 'evening_ratio_lag3',
       'evening_ratio_3d_avg', 'evening_ratio_7d_avg',
       'evening_ratio_30d_avg']

In [None]:
train_final = pd.concat([train_activity[train_activity_cols], sleep_train_add[sleep_train_add_cols], train_sleep[train_sleep_cols]], axis=1)
test_final = pd.concat([test_activity[train_activity_cols], sleep_test_add[sleep_train_add_cols], test_sleep[train_sleep_cols]], axis=1)

In [110]:
train_final.shape

(9704, 122)

In [109]:
test_final.shape

(2478, 122)

In [111]:
import os
folder="data/final"
os.makedirs(folder, exist_ok=True)

# train_label_merged.to_csv(log_folder+"/train_label_merged.csv", index=False)
train_final.to_csv(folder+"/train.csv", index=False)
test_final.to_csv(folder+"/test.csv", index=False)