# Full Dataset Showing via Data Wrangler

### Choi Chan Woo (인하대학교 에너지자원환경연구실, Py_Renewable Team)

This Python Code made in 12/15 is for viewing full Dataset used for this project (Since CSV Limitations on Libreoffice and Excel)

In [None]:
import pandas as pd

## Loading Dataset

In [None]:
df_Renewable_Generation = pd.read_csv('../Dataset/Base/한국전력거래소_지역별 시간별 태양광 및 풍력 발전량_2024.csv', encoding='cp949')

In [None]:
df_climate_ASOS_2024_Full = pd.read_csv('../Dataset/Base/OBS_ASOS_TIM_20251215090557.csv', encoding='cp949', low_memory=False)

## Full Dataset View by Data Wrangler

In [None]:
df_Renewable_Generation.head()

In [None]:
df_climate_ASOS_2024_Full.head()

## Dataset Properties

In [None]:
# Showing Dataset properties
print("df_Renewable_Generation Info:")
df_Renewable_Generation.info()

In [None]:
print("df_climate_ASOS_2024_Full Info:")
df_climate_ASOS_2024_Full.info()

## Showing Feature names

In [None]:
df_climate_ASOS_2024_Full.columns.tolist()

In [None]:
df_Renewable_Generation.columns.tolist()

## NaN Value Checking

In [None]:
# NaN 값 확인
print("NaN values in df_Renewable_Generation:")
print(df_Renewable_Generation.isna().sum())

In [None]:
print("NaN values in df_climate_ASOS_2024_Full:")
print(df_climate_ASOS_2024_Full.isna().sum())

In [None]:
# 지점 출력 및 지점-지점명 매핑
unique_stations = df_climate_ASOS_2024_Full['지점'].unique()
print("Unique Stations in df_climate_ASOS_2024_Full:")
for station in unique_stations:
    station_name = df_climate_ASOS_2024_Full[df_climate_ASOS_2024_Full['지점'] == station]['지점명'].iloc[0]
    print(f"Station Code: {station}, Station Name: {station_name}") 

In [None]:
# 신재생 에너지 발전량 지역 출력
unique_regions = df_Renewable_Generation['지역'].unique()
print("Unique Regions in df_Renewable_Generation:")
for region in unique_regions:
    print(f"Region: {region}")

In [None]:
# 신재생 에너지 발전량 에서 특정 지역만 추출 (인천시, 제주)
df_Renewable_Generation_Target = df_Renewable_Generation[df_Renewable_Generation['지역'].isin(['인천시', '제주'])]
df_Renewable_Generation_Target.head()

In [None]:
df_Renewable_Generation_Target.to_csv('../Dataset/한국전력거래소_인천시_제주_태양광_풍력_발전량_2024.csv', index=False, encoding='cp949')

In [None]:
# 기후 데이터에서 102, 112, 201, 98, 99, 119, 202, 203, 184, 185, 188, 189 추출

df_climate_ASOS_2024_Selected = df_climate_ASOS_2024_Full[df_climate_ASOS_2024_Full['지점'].isin([102, 112, 201, 98, 99, 119, 202, 203, 184, 185, 188, 189])]
df_climate_ASOS_2024_Selected.head()

In [None]:
df_climate_ASOS_2024_Selected.to_csv('../Dataset/기후데이터_인천시_제주_2024.csv', index=False, encoding='cp949')

In [None]:
# delete off QC flags in df_climate_ASOS_2024_Selected
columns_to_drop = [col for col in df_climate_ASOS_2024_Selected.columns if col.endswith('QC플래그')]
df_climate_ASOS_2024_Cleaned = df_climate_ASOS_2024_Selected.drop(columns=columns_to_drop)
df_climate_ASOS_2024_Cleaned

In [None]:
# delete off 지점 in df_climate_ASOS_2024_Selected
columns_to_drop = ['지점']
df_climate_ASOS_2024_Cleaned = df_climate_ASOS_2024_Cleaned.drop(columns=columns_to_drop)
df_climate_ASOS_2024_Cleaned

In [None]:
# finding nan values in df_climate_ASOS_2024_Cleaned
print("NaN values in df_climate_ASOS_2024_Cleaned:")
print(df_climate_ASOS_2024_Cleaned.isna().sum())

In [None]:
# Delete Columds with more than 10000 NaN values in df_climate_ASOS_2024_Cleaned
nan_threshold = 10000
columns_to_drop = df_climate_ASOS_2024_Cleaned.columns[df_climate_ASOS_2024_Cleaned.isna().sum() > nan_threshold]
df_climate_ASOS_2024_Cleaned = df_climate_ASOS_2024_Cleaned.drop(columns=columns_to_drop)
df_climate_ASOS_2024_Cleaned

In [None]:
# 지점명마다 다른 데이터셋 생성
station_names = df_climate_ASOS_2024_Cleaned['지점명'].unique()
for station in station_names:
    df_station = df_climate_ASOS_2024_Cleaned[df_climate_ASOS_2024_Cleaned['지점명'] == station]
    df_station.to_csv(f'../Dataset/Wind_Area/기후데이터_{station}_2024_Cleaned.csv', index=False, encoding='cp949')

In [None]:
# 지점명이 인천하고 제주인 데이터셋 생성
df_incheon = df_climate_ASOS_2024_Cleaned[df_climate_ASOS_2024_Cleaned['지점명'] == '인천']
df_jeju = df_climate_ASOS_2024_Cleaned[df_climate_ASOS_2024_Cleaned['지점명'] == '제주']

In [None]:
df_incheon

In [None]:
df_jeju

## Modifying Renewable Energy Target

In [None]:
# df_Renewable_Generation_Target에서 인천시를 인천으로 변경
df_Renewable_Generation_Target['지역'] = df_Renewable_Generation_Target['지역'].replace({'인천시': '인천'})
df_Renewable_Generation_Target

In [None]:
# 연료원이 풍력만인 데이터셋 생성
df_Renewable_Generation_Wind = df_Renewable_Generation_Target[df_Renewable_Generation_Target['연료원'] == '풍력']
df_Renewable_Generation_Wind

In [None]:
# 거래일자 + 거래시간을 하나의 datetime 열로 변환 (2024-01-01 00:00 형식)
# 거래시간은 1-24 시간을 나타내므로, HHMM 형식으로 변환 (1 -> 0100, 24 -> 2400)
df_Renewable_Generation_Wind['datetime'] = pd.to_datetime(
	df_Renewable_Generation_Wind['거래일자'].astype(str) + ' ' + 
	(df_Renewable_Generation_Wind['거래시간'].astype(str).str.zfill(2) + ':00'),
	format='%Y%m%d %H:%M'
)
df_Renewable_Generation_Wind