In [12]:
import pandas as pd
from sqlalchemy import create_engine
import json

# DB 접속 정보 로드
with open('db-config.json') as f:
    config = json.load(f)

user = config['user']
password = config['password']
host = config['host']
port = config['port']
database = config['database']

# SQLAlchemy 엔진 생성
engine = create_engine(f"mysql+pymysql://{user}:{password}@{host}:{port}/{database}?charset=utf8mb4")



In [13]:
uv_dataset = pd.read_sql("SELECT * FROM uv_dataset", con=engine)

In [4]:
# 날짜만 추출해 그룹화 기준으로 사용
uv_dataset['date'] = pd.to_datetime(uv_dataset['datetime']).dt.date

# 하루 기준으로 최대값만 사용 (수치형 변수 기준)
daily = uv_dataset.groupby('date').max().reset_index()

# datetime 컬럼 재구성 (00:00:00으로 고정)
daily['datetime'] = pd.to_datetime(daily['date'].astype(str))  # → '2025-06-27 00:00:00'

# region 통일 ('서울시'로)
daily['region'] = '서울시'

# 불필요한 date 컬럼 제거
daily = daily.drop(columns=['date'])

# 정렬
daily = daily.sort_values(by='datetime').reset_index(drop=True)

# 확인
print(daily.head())

    datetime region    no2   co    so2     o3  pm10  pm25  temperature  \
0 2015-01-01    서울시  0.026  0.8  0.008  0.032   168    42         -3.8   
1 2015-01-02    서울시  0.048  1.0  0.009  0.034    57    27         -0.9   
2 2015-01-03    서울시  0.079  1.6  0.012  0.020    76    50          3.0   
3 2015-01-04    서울시  0.104  2.7  0.011  0.027    97    53          8.2   
4 2015-01-05    서울시  0.106  2.6  0.018  0.028   133    77          9.9   

   wind_direction  wind_speed  precipitation  humidity  uv_max  
0           358.9         8.3            0.0      71.0     4.7  
1           359.6         7.0            0.0      82.0     4.7  
2           359.8         3.7            0.0      91.0     4.7  
3           359.3         4.2            0.0     100.0     4.7  
4           359.3         5.0            0.0     100.0     4.7  


In [5]:
print(daily['datetime'].dtype)
print(daily['datetime'].head())

datetime64[ns]
0   2015-01-01
1   2015-01-02
2   2015-01-03
3   2015-01-04
4   2015-01-05
Name: datetime, dtype: datetime64[ns]


In [6]:
print(daily['datetime'].dt.time.unique())

[datetime.time(0, 0)]


In [7]:
daily['datetime'].apply(lambda x: x.strftime('%Y-%m-%d %H:%M:%S')).head()

0    2015-01-01 00:00:00
1    2015-01-02 00:00:00
2    2015-01-03 00:00:00
3    2015-01-04 00:00:00
4    2015-01-05 00:00:00
Name: datetime, dtype: object

In [8]:
daily

Unnamed: 0,datetime,region,no2,co,so2,o3,pm10,pm25,temperature,wind_direction,wind_speed,precipitation,humidity,uv_max
0,2015-01-01,서울시,0.026,0.8,0.008,0.032,168,42,-3.8,358.9,8.3,0.0,71.0,4.7
1,2015-01-02,서울시,0.048,1.0,0.009,0.034,57,27,-0.9,359.6,7.0,0.0,82.0,4.7
2,2015-01-03,서울시,0.079,1.6,0.012,0.020,76,50,3.0,359.8,3.7,0.0,91.0,4.7
3,2015-01-04,서울시,0.104,2.7,0.011,0.027,97,53,8.2,359.3,4.2,0.0,100.0,4.7
4,2015-01-05,서울시,0.106,2.6,0.018,0.028,133,77,9.9,359.3,5.0,0.0,100.0,4.7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3588,2024-12-27,서울시,0.046,0.8,0.007,0.043,45,24,2.7,357.4,7.0,0.0,80.0,3.3
3589,2024-12-28,서울시,0.037,0.8,0.008,0.041,47,23,1.2,358.6,6.7,0.0,72.0,3.5
3590,2024-12-29,서울시,0.054,1.1,0.009,0.037,41,30,6.6,359.7,3.4,0.0,87.0,3.5
3591,2024-12-30,서울시,0.058,1.3,0.008,0.052,95,59,10.3,359.5,6.5,0.0,91.0,3.0


In [9]:
print("최종 병합 데이터 크기:", daily.shape)

최종 병합 데이터 크기: (3593, 14)


In [10]:
# 테이블 덮어쓰기 저장 (기존 uv_dataset 삭제 후 재생성됨)
daily.to_sql(
    name='uv_dataset',    # 같은 테이블에 덮어쓰기
    con=engine,
    if_exists='replace',  # 삭제 후 생성
    index=False
)

print("일별 최고치 기준으로 재구성 완료 & DB 저장")

일별 최고치 기준으로 재구성 완료 & DB 저장


In [11]:
daily.to_csv('자외선_일별최고치_데이터셋.csv', index=False, date_format='%Y-%m-%d %H:%M:%S')