In [10]:
import pandas as pd
import numpy as np
import os
import torch
import random
import argparse
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
fix_seed = 42
random.seed(fix_seed)
torch.manual_seed(fix_seed)
np.random.seed(fix_seed)

In [3]:
result_df = pd.read_csv('./Data_Raw(로우데이터)/Final_Preprocessed_Data.csv')

In [4]:
result_df.head()

Unnamed: 0,date,현재수요(MW),전체지역_기온(°C)_가중평균,전체지역_누적강수량(mm)_가중평균,전체지역_풍향(deg)_가중평균,전체지역_풍속(m/s)_가중평균,전체지역_현지기압(hPa)_가중평균,전체지역_습도(%)_가중평균,전체지역_일조(Sec)_가중평균
0,2012-06-01 00:05:00,51342.53,17.208152,0.0,94.626955,0.963723,1010.204668,81.308414,155.341851
1,2012-06-01 00:10:00,51583.17,17.0844,0.0,85.152426,0.83922,1010.135003,81.810997,155.341851
2,2012-06-01 00:15:00,51631.69,17.037522,0.0,71.425811,0.821914,1010.054353,82.184806,155.341851
3,2012-06-01 00:20:00,51252.66,17.00843,0.0,70.963977,0.850567,1009.957392,82.288909,155.341851
4,2012-06-01 00:25:00,50888.38,16.939975,0.0,72.56493,0.753808,1009.960154,82.482134,155.341851


In [5]:
data = result_df.copy()

In [11]:
# 데이터 타입 확인 및 결측치 확인
print(data.info())
print(data.isnull().sum())

# 기술 통계 확인
print(data.describe())

# 시간 변수를 datetime으로 변환
data['date'] = pd.to_datetime(data['date'])

# 시계열 데이터 특성 시각화
plt.figure(figsize=(14, 6))
plt.plot(data['date'], data['현재수요(MW)'], label='Current Demand (MW)')
plt.title('Time Series of Electricity Demand')
plt.xlabel('Date')
plt.ylabel('Current Demand (MW)')
plt.legend()
plt.show()

# 기온과 전력 수요 간의 관계
plt.figure(figsize=(10, 6))
sns.scatterplot(x=data['전체지역_기온(°C)_가중평균'], y=data['현재수요(MW)'])
plt.title('Temperature vs Electricity Demand')
plt.xlabel('Temperature (°C)')
plt.ylabel('Current Demand (MW)')
plt.show()

# 변수 간 상관관계 분석
corr = data.corr()
plt.figure(figsize=(10, 6))
sns.heatmap(corr, annot=True, cmap='coolwarm')
plt.title('Correlation Matrix of Variables')
plt.show()

# 풍속과 전력 수요의 관계
plt.figure(figsize=(10, 6))
sns.scatterplot(x=data['전체지역_풍속(m/s)_가중평균'], y=data['현재수요(MW)'])
plt.title('Wind Speed vs Electricity Demand')
plt.xlabel('Wind Speed (m/s)')
plt.ylabel('Current Demand (MW)')
plt.show()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1135871 entries, 0 to 1135870
Data columns (total 9 columns):
 #   Column               Non-Null Count    Dtype         
---  ------               --------------    -----         
 0   date                 1135871 non-null  datetime64[ns]
 1   현재수요(MW)             1135871 non-null  float64       
 2   전체지역_기온(°C)_가중평균     1135871 non-null  float64       
 3   전체지역_누적강수량(mm)_가중평균  1135871 non-null  float64       
 4   전체지역_풍향(deg)_가중평균    1135871 non-null  float64       
 5   전체지역_풍속(m/s)_가중평균    1135871 non-null  float64       
 6   전체지역_현지기압(hPa)_가중평균  1135871 non-null  float64       
 7   전체지역_습도(%)_가중평균      1135871 non-null  float64       
 8   전체지역_일조(Sec)_가중평균    1135871 non-null  float64       
dtypes: datetime64[ns](1), float64(8)
memory usage: 78.0 MB
None
date                   0
현재수요(MW)               0
전체지역_기온(°C)_가중평균       0
전체지역_누적강수량(mm)_가중평균    0
전체지역_풍향(deg)_가중평균      0
전체지역_풍속(m/s)_가중평균      0
전체지역_현지기압(hPa)_가중평균    0