<a href="https://colab.research.google.com/github/ancestor9/Affalatoxin-Analysis/blob/main/prediction/09_09_timeseries_data_missing_imputation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [47]:
import pandas as pd
import numpy as np
import re

# 예제 데이터 생성
np.random.seed(42)
data = np.random.randn(10, 10)

# NaN 패턴 삽입
data[0, 0] = np.nan
data[0, 1] = np.nan
data[1, 3] = np.nan
data[1, 4] = np.nan
data[1, 5] = np.nan
data[2, 5] = np.nan
data[4, 7] = np.nan
data[6, 2] = np.nan
data[8, 9] = np.nan

columns = ['wtf_01', 'wtf_02', 'wtf_03', 'wtf_04',
           'vsf_01', 'vsf_02', 'vsf_03',
           'other_01', 'other_02', 'other_03']

df = pd.DataFrame(data, columns=columns)
print("--- 변환 전 DataFrame ---")
display(df)

# 효율적인 NaN 대체 함수 (자동 그룹화)
def fix_nan_auto_group(df):
    result = df.copy()

    # 컬럼 이름에서 그룹 접두사(e.g., 'wtf', 'vsf')를 자동으로 추출
    # '_01' 과 같은 부분을 제외하고 고유한 그룹 이름을 찾음
    groups = {re.findall(r'(\w+)_', col)[0] for col in df.columns if '_' in col}

    # 각 그룹별로 NaN 값 채우기
    for group_name in groups:
        group_cols = [col for col in df.columns if col.startswith(group_name)]
        # 그룹 내에서 뒤쪽 값으로 먼저 채운 후 앞쪽 값으로 채우기
        result[group_cols] = result[group_cols].bfill(axis=1).ffill(axis=1)

    # 그룹 내에서 채워지지 않은 NaN(행의 양끝)을 0으로 채우기
    result = result.fillna(0)

    return result

# 실행
result = fix_nan_auto_group(df)
print("\n--- 변환 후 DataFrame ---")
display(result)

--- 변환 전 DataFrame ---


Unnamed: 0,wtf_01,wtf_02,wtf_03,wtf_04,vsf_01,vsf_02,vsf_03,other_01,other_02,other_03
0,,,0.647689,1.52303,-0.234153,-0.234137,1.579213,0.767435,-0.469474,0.54256
1,-0.463418,-0.46573,0.241962,,,,-1.012831,0.314247,-0.908024,-1.412304
2,1.465649,-0.225776,0.067528,-1.424748,-0.544383,,-1.150994,0.375698,-0.600639,-0.291694
3,-0.601707,1.852278,-0.013497,-1.057711,0.822545,-1.220844,0.208864,-1.95967,-1.328186,0.196861
4,0.738467,0.171368,-0.115648,-0.301104,-1.478522,-0.719844,-0.460639,,0.343618,-1.76304
5,0.324084,-0.385082,-0.676922,0.611676,1.031,0.93128,-0.839218,-0.309212,0.331263,0.975545
6,-0.479174,-0.185659,,-1.196207,0.812526,1.35624,-0.07201,1.003533,0.361636,-0.64512
7,0.361396,1.538037,-0.035826,1.564644,-2.619745,0.821903,0.087047,-0.299007,0.091761,-1.987569
8,-0.219672,0.357113,1.477894,-0.51827,-0.808494,-0.501757,0.915402,0.328751,-0.52976,
9,0.097078,0.968645,-0.702053,-0.327662,-0.392108,-1.463515,0.29612,0.261055,0.005113,-0.234587



--- 변환 후 DataFrame ---


Unnamed: 0,wtf_01,wtf_02,wtf_03,wtf_04,vsf_01,vsf_02,vsf_03,other_01,other_02,other_03
0,0.647689,0.647689,0.647689,1.52303,-0.234153,-0.234137,1.579213,0.767435,-0.469474,0.54256
1,-0.463418,-0.46573,0.241962,0.241962,-1.012831,-1.012831,-1.012831,0.314247,-0.908024,-1.412304
2,1.465649,-0.225776,0.067528,-1.424748,-0.544383,-1.150994,-1.150994,0.375698,-0.600639,-0.291694
3,-0.601707,1.852278,-0.013497,-1.057711,0.822545,-1.220844,0.208864,-1.95967,-1.328186,0.196861
4,0.738467,0.171368,-0.115648,-0.301104,-1.478522,-0.719844,-0.460639,0.343618,0.343618,-1.76304
5,0.324084,-0.385082,-0.676922,0.611676,1.031,0.93128,-0.839218,-0.309212,0.331263,0.975545
6,-0.479174,-0.185659,-1.196207,-1.196207,0.812526,1.35624,-0.07201,1.003533,0.361636,-0.64512
7,0.361396,1.538037,-0.035826,1.564644,-2.619745,0.821903,0.087047,-0.299007,0.091761,-1.987569
8,-0.219672,0.357113,1.477894,-0.51827,-0.808494,-0.501757,0.915402,0.328751,-0.52976,-0.52976
9,0.097078,0.968645,-0.702053,-0.327662,-0.392108,-1.463515,0.29612,0.261055,0.005113,-0.234587
