In [105]:
from glob import glob
import pandas as pd

def csv_files_to_df(directory_path):
    csv_files = glob(f"{directory_path}/*.csv")
    
    df_list = []
    for csv_file in csv_files:
        try:
            df = pd.read_csv(csv_file, encoding='utf-8')
        except:
            df = pd.read_csv(csv_file, encoding='euc-kr')
        df_list.append(df)

    # df_list = [pd.read_csv(csv_file, encoding='utf-8') for csv_file in csv_files]

    return pd.concat(df_list, ignore_index=True)

In [107]:
humidity_dir_path = "humidity_csv"
temperature_dir_path = "temperature_csv"

humidity_df = csv_files_to_df(humidity_dir_path)
temperature_df = csv_files_to_df(temperature_dir_path)

humidity_df

Unnamed: 0,지점번호,지점명,일시,평균습도(%rh),최저습도(%rh)
0,152,울산,2019-01-01,45.1,22.0
1,152,울산,2019-01-02,32.8,17.0
2,152,울산,2019-01-03,23.5,10.0
3,152,울산,2019-01-04,22.3,11.0
4,152,울산,2019-01-05,33.6,17.0
...,...,...,...,...,...
41474,172,고창,2024-12-26,65.5,48.0
41475,172,고창,2024-12-27,71.8,52.0
41476,172,고창,2024-12-28,92.3,80.0
41477,172,고창,2024-12-29,74.5,44.0


In [110]:
humidity_df.drop(columns=["지점번호"], inplace=True)
humidity_df

Unnamed: 0,지점명,일시,평균습도(%rh),최저습도(%rh)
0,울산,2019-01-01,45.1,22.0
1,울산,2019-01-02,32.8,17.0
2,울산,2019-01-03,23.5,10.0
3,울산,2019-01-04,22.3,11.0
4,울산,2019-01-05,33.6,17.0
...,...,...,...,...
41474,고창,2024-12-26,65.5,48.0
41475,고창,2024-12-27,71.8,52.0
41476,고창,2024-12-28,92.3,80.0
41477,고창,2024-12-29,74.5,44.0


In [111]:
temperature_df.drop(columns=["지점번호"], inplace=True)
temperature_df

Unnamed: 0,지점명,일시,평균기온(℃),최고기온(℃),최고기온시각,최저기온(℃),최저기온시각,일교차
0,서울,2019-01-01,-5.0,-0.6,15:25,-8.2,06:31,7.6
1,서울,2019-01-02,-4.9,0.2,14:58,-8.8,08:02,9.0
2,서울,2019-01-03,-3.5,3.2,14:36,-8.4,07:26,11.6
3,서울,2019-01-04,-1.1,4.1,15:04,-6.2,07:57,10.3
4,서울,2019-01-05,-2.8,1.1,15:21,-5.5,23:58,6.6
...,...,...,...,...,...,...,...,...
37092,광주,2024-12-26,3.6,8.1,11:47,1.9,7:40,6.2
37093,광주,2024-12-27,2.0,4.3,12:38,0.2,8:56,4.1
37094,광주,2024-12-28,0.3,2.0,12:50,-1.4,7:08,3.4
37095,광주,2024-12-29,3.5,7.9,15:25,-0.6,7:57,8.5


In [112]:
humidity_rename_info = {"지점명": "region", 
                        "일시": "report_date", 
                        "평균습도(%rh)": "avg_humidity", 
                        "최저습도(%rh)": "min_humidity"
                        }

temperature_rename_info = {"지점명": "region", 
                           "일시": "report_date", 
                           "평균기온(°C)": "avg_temperature", 
                           "최저기온(°C)": "min_temperature", 
                           "최고기온(°C)": "max_temperature",
                           "최고기온시각": "max_temperature_time",
                           "최저기온시각": "min_temperature_time",
                           "일교차": "temperature_range" 
                           }

In [113]:
humidity_df.rename(columns=humidity_rename_info, inplace=True)
temperature_df.rename(columns=temperature_rename_info, inplace=True)

humidity_df

Unnamed: 0,region,report_date,avg_humidity,min_humidity
0,울산,2019-01-01,45.1,22.0
1,울산,2019-01-02,32.8,17.0
2,울산,2019-01-03,23.5,10.0
3,울산,2019-01-04,22.3,11.0
4,울산,2019-01-05,33.6,17.0
...,...,...,...,...
41474,고창,2024-12-26,65.5,48.0
41475,고창,2024-12-27,71.8,52.0
41476,고창,2024-12-28,92.3,80.0
41477,고창,2024-12-29,74.5,44.0


In [114]:
temperature_df

Unnamed: 0,region,report_date,평균기온(℃),최고기온(℃),max_temperature_time,최저기온(℃),min_temperature_time,temperature_range
0,서울,2019-01-01,-5.0,-0.6,15:25,-8.2,06:31,7.6
1,서울,2019-01-02,-4.9,0.2,14:58,-8.8,08:02,9.0
2,서울,2019-01-03,-3.5,3.2,14:36,-8.4,07:26,11.6
3,서울,2019-01-04,-1.1,4.1,15:04,-6.2,07:57,10.3
4,서울,2019-01-05,-2.8,1.1,15:21,-5.5,23:58,6.6
...,...,...,...,...,...,...,...,...
37092,광주,2024-12-26,3.6,8.1,11:47,1.9,7:40,6.2
37093,광주,2024-12-27,2.0,4.3,12:38,0.2,8:56,4.1
37094,광주,2024-12-28,0.3,2.0,12:50,-1.4,7:08,3.4
37095,광주,2024-12-29,3.5,7.9,15:25,-0.6,7:57,8.5


In [115]:
temperature_rename_info = {"평균기온(℃)": "avg_temperature", 
                           "최저기온(℃)": "min_temperature", 
                           "최고기온(℃)": "max_temperature"}

temperature_df.rename(columns=temperature_rename_info, inplace=True)
temperature_df

Unnamed: 0,region,report_date,avg_temperature,max_temperature,max_temperature_time,min_temperature,min_temperature_time,temperature_range
0,서울,2019-01-01,-5.0,-0.6,15:25,-8.2,06:31,7.6
1,서울,2019-01-02,-4.9,0.2,14:58,-8.8,08:02,9.0
2,서울,2019-01-03,-3.5,3.2,14:36,-8.4,07:26,11.6
3,서울,2019-01-04,-1.1,4.1,15:04,-6.2,07:57,10.3
4,서울,2019-01-05,-2.8,1.1,15:21,-5.5,23:58,6.6
...,...,...,...,...,...,...,...,...
37092,광주,2024-12-26,3.6,8.1,11:47,1.9,7:40,6.2
37093,광주,2024-12-27,2.0,4.3,12:38,0.2,8:56,4.1
37094,광주,2024-12-28,0.3,2.0,12:50,-1.4,7:08,3.4
37095,광주,2024-12-29,3.5,7.9,15:25,-0.6,7:57,8.5


In [119]:
import os

os.getcwd()

'/Users/yonmilk/git/eda-repo-4/database_upload'

In [120]:
os.chdir("../")

In [125]:
import db_function

db_engine = db_function.get_db_engine()
db_function.initialize_table_from_df(humidity_df, "weather_humidity", engine=db_engine)

Table weather_humidity created
Inserted 41479 rows into weather_humidity


In [128]:
db_function.initialize_table_from_df(temperature_df, "weather_temperature", engine=db_engine)

Table weather_temperature created
Inserted 37097 rows into weather_temperature
