In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('./csv/data_simple.csv')

df.head()

Unnamed: 0,Group,Rt_scale_IKI-SD,Rt_scale_IRI-SD,Rt_long_scale_IKI-SD,Rt_long_scale_IRI-SD,Rt_arpeggio_IKI-SD,Rt_arpeggio_IRI-SD,Lt_scale_IKI-SD,Lt_scale_IRI-SD,Lt_long_scale_IKI-SD,Lt_long_scale_IRI-SD,Lt_arpeggio_IKI-SD,Lt_arpeggio_IRI-SD,handedness,affected side
0,1,13.229921,28.898811,13.31198,39.062791,15.424398,43.541499,13.630298,31.908667,13.465412,39.938112,16.358171,40.739545,R,
1,1,12.973927,31.402664,12.947017,54.441392,16.330051,45.51188,30.340508,28.444453,14.65474,58.183404,16.548132,27.776093,R,
2,1,13.557577,28.3226,11.066986,25.212559,15.888496,44.515929,14.732778,25.661814,11.653787,29.018465,21.112888,47.075235,R,
3,1,14.77329,30.408687,16.464423,35.699352,14.93032,49.281032,16.96338,29.648825,17.913393,40.423535,16.6873,35.095249,R,
4,1,15.166748,22.041914,12.79219,30.024507,11.296934,27.286541,14.566124,24.018085,12.947909,37.861518,12.329798,18.891626,R,


In [3]:
# まず、両列の一意な値を確認
print("Handedness unique values:", df['handedness'].unique())
print("Affected side unique values:", df['affected side'].unique())

# 健常者群（Group 1）のaffected sideは'None'に置換
df.loc[df['Group'] == 1, 'affected side'] = 'None'

# NaNの数を確認
print("\nMissing values count:")
print(df[['handedness', 'affected side']].isna().sum())

# handednessのNaNがあれば確認
if df['handedness'].isna().any():
    print("\nRows with missing handedness:")
    print(df[df['handedness'].isna()])

# Group別の集計を確認
print("\nGroup distribution:")
print(df.groupby('Group')[['handedness', 'affected side']].value_counts())

Handedness unique values: ['R' 'L']
Affected side unique values: [nan 'R' 'B' 'L']

Missing values count:
handedness       0
affected side    0
dtype: int64

Group distribution:
Group  handedness  affected side
1      R           None             21
       L           None              1
2      R           R                13
                   B                 4
                   L                 4
       L           R                 1
Name: count, dtype: int64


In [4]:
# 前処理済みデータを保存
df.to_csv('./csv/preprocessed_data_simple.csv', index=False)
print("前処理済みデータを保存しました: ./csv/preprocessed_data.csv")

前処理済みデータを保存しました: ./csv/preprocessed_data.csv


# データの概要と前処理結果

## データの構造
- **Group**: 1 = 健常者群、2 = 職業性ジストニア群
- **測定項目**:
  - Rt/Lt: 右手/左手のデータ
  - 課題: scale, long_scale, arpeggio
  - 指標: IKI (inter-keystroke interval), IRI (inter-key-release interval)

## 被験者の特徴
### 健常者群 (Group 1, n=22)
- 右利き: 21名
- 左利き: 1名
- affected side: すべて'None'

### ジストニア群 (Group 2, n=22)
- 右利き (n=21):
  - 右側優位: 13名
  - 両側性: 4名
  - 左側優位: 4名
- 左利き (n=1):
  - 右側優位: 1名

## データの品質
- handedness, affected side ともに欠損値なし
- affected sideは None, R, B, L の4カテゴリ
- handednessは R, L の2カテゴリ