In [10]:
import pandas as pd
import numpy as np

# 난수 고정 (재현 가능성)
np.random.seed(1234)

date_range = pd.date_range(start="2025-06-01", periods=20, freq="H")
regions = ["Seoul", "Busan", "Incheon"]
region_list = np.random.choice(regions, size=20)

temperature = np.random.uniform(20, 35, size=20)  # 20 ~ 35도 사이
humidity = np.random.uniform(40, 80, size=20)     # 40 ~ 80% 사이

# 3. 무작위로 일부 결측치 삽입
nan_indices_temp = np.random.choice(20, size=15, replace=False)
nan_indices_humidity = np.random.choice(20, size=12, replace=False)

temperature[nan_indices_temp] = np.nan
humidity[nan_indices_humidity] = np.nan

# 4. DataFrame 생성
df = pd.DataFrame({
    "datetime": date_range,
    "region": region_list,
    "temperature": temperature,
    "humidity": humidity,
    "rain": np.full(20, np.nan),
})

df

Unnamed: 0,datetime,region,temperature,humidity,rain
0,2025-06-01 00:00:00,Incheon,,41.622246,
1,2025-06-01 01:00:00,Busan,,61.924783,
2,2025-06-01 02:00:00,Seoul,21.473779,58.503064,
3,2025-06-01 03:00:00,Seoul,,,
4,2025-06-01 04:00:00,Seoul,,53.116483,
5,2025-06-01 05:00:00,Busan,,,
6,2025-06-01 06:00:00,Busan,,,
7,2025-06-01 07:00:00,Busan,,41.897059,
8,2025-06-01 08:00:00,Incheon,,,
9,2025-06-01 09:00:00,Incheon,,,


![image.png](attachment:image.png)

In [14]:
df['temp_ip'] = df['temperature'].interpolate(limit_direction='both')
df

Unnamed: 0,datetime,region,temperature,humidity,rain,temp_ip
0,2025-06-01 00:00:00,Incheon,,41.622246,,21.473779
1,2025-06-01 01:00:00,Busan,,61.924783,,21.473779
2,2025-06-01 02:00:00,Seoul,21.473779,58.503064,,21.473779
3,2025-06-01 03:00:00,Seoul,,,,22.023649
4,2025-06-01 04:00:00,Seoul,,53.116483,,22.573519
5,2025-06-01 05:00:00,Busan,,,,23.123389
6,2025-06-01 06:00:00,Busan,,,,23.673259
7,2025-06-01 07:00:00,Busan,,41.897059,,24.223128
8,2025-06-01 08:00:00,Incheon,,,,24.772998
9,2025-06-01 09:00:00,Incheon,,,,25.322868


In [22]:
# 지역별 보간
df_result = pd.DataFrame()
for r in df['region'].unique():
    df_tmp = df[df['region'] == r].copy()
    df_tmp = df_tmp.interpolate(limit_direction='both')
    
    df_result = pd.concat([df_result, df_tmp], axis=0)

df_result

Unnamed: 0,datetime,region,temperature,humidity,rain,temp_ip
0,2025-06-01 00:00:00,Incheon,28.072218,41.622246,,21.473779
8,2025-06-01 08:00:00,Incheon,28.072218,43.398806,,24.772998
9,2025-06-01 09:00:00,Incheon,28.072218,45.175365,,25.322868
10,2025-06-01 10:00:00,Incheon,28.072218,46.951925,,25.872738
13,2025-06-01 13:00:00,Incheon,28.072218,48.728484,,27.522348
14,2025-06-01 14:00:00,Incheon,28.072218,50.505044,,28.072218
15,2025-06-01 15:00:00,Incheon,31.859332,50.505044,,31.859332
1,2025-06-01 01:00:00,Busan,,61.924783,,21.473779
5,2025-06-01 05:00:00,Busan,,55.248875,,23.123389
6,2025-06-01 06:00:00,Busan,,48.572967,,23.673259
