In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

In [2]:
raw_data = pd.read_excel('raw_data2.xlsx')
raw_data

Unnamed: 0,date_time,전력사용량(kWh),기온(°C),풍속(m/s),습도(%),강수량(mm),일조(hr),time_0,time_1,power_cat
0,2020-06-01 00,8179.056,17.6,2.5,92,0.8,0.0,1,0,0
1,2020-06-01 01,8135.640,17.7,2.9,91,0.3,0.0,1,0,0
2,2020-06-01 02,8107.128,17.5,3.2,91,0.0,0.0,1,0,0
3,2020-06-01 03,8048.808,17.1,3.2,91,0.0,0.0,1,0,0
4,2020-06-01 04,8043.624,17.0,3.3,92,0.0,0.0,1,0,0
5,2020-06-01 05,8010.576,16.9,3.4,93,0.0,0.0,1,0,0
6,2020-06-01 06,7978.176,16.7,3.4,90,0.1,0.0,1,0,0
7,2020-06-01 07,8019.000,16.9,2.3,86,0.0,0.1,1,0,0
8,2020-06-01 08,8020.944,17.8,3.3,80,0.0,0.3,0,1,0
9,2020-06-01 09,8083.152,19.3,2.1,73,0.0,0.7,0,1,0


In [3]:
raw_data.corr()

Unnamed: 0,전력사용량(kWh),기온(°C),풍속(m/s),습도(%),강수량(mm),일조(hr),time_0,time_1,power_cat
전력사용량(kWh),1.0,0.637313,0.057756,-0.007981,0.001241,0.06419,-0.291937,0.291937,0.755105
기온(°C),0.637313,1.0,0.141183,-0.529771,-0.113308,0.504956,-0.46059,0.46059,0.585676
풍속(m/s),0.057756,0.141183,1.0,-0.115921,0.189296,0.158611,-0.298931,0.298931,0.080485
습도(%),-0.007981,-0.529771,-0.115921,1.0,0.266364,-0.636673,0.358221,-0.358221,-0.049369
강수량(mm),0.001241,-0.113308,0.189296,0.266364,1.0,-0.114833,0.02764,-0.02764,-0.018338
일조(hr),0.06419,0.504956,0.158611,-0.636673,-0.114833,1.0,-0.344968,0.344968,0.088316
time_0,-0.291937,-0.46059,-0.298931,0.358221,0.02764,-0.344968,1.0,-1.0,-0.341195
time_1,0.291937,0.46059,0.298931,-0.358221,-0.02764,0.344968,-1.0,1.0,0.341195
power_cat,0.755105,0.585676,0.080485,-0.049369,-0.018338,0.088316,-0.341195,0.341195,1.0


In [4]:
train_pre = raw_data[['기온(°C)', '풍속(m/s)', '습도(%)', '강수량(mm)', '일조(hr)','time_0','time_1']]
train_pre

Unnamed: 0,기온(°C),풍속(m/s),습도(%),강수량(mm),일조(hr),time_0,time_1
0,17.6,2.5,92,0.8,0.0,1,0
1,17.7,2.9,91,0.3,0.0,1,0
2,17.5,3.2,91,0.0,0.0,1,0
3,17.1,3.2,91,0.0,0.0,1,0
4,17.0,3.3,92,0.0,0.0,1,0
5,16.9,3.4,93,0.0,0.0,1,0
6,16.7,3.4,90,0.1,0.0,1,0
7,16.9,2.3,86,0.0,0.1,1,0
8,17.8,3.3,80,0.0,0.3,0,1
9,19.3,2.1,73,0.0,0.7,0,1


In [5]:
# 공부할 데이터/검증용 데이터 분리
# 머신러닝용 라이브러리: sklearn, 사이킷런
from sklearn.model_selection import GridSearchCV,train_test_split
from sklearn.metrics import accuracy_score #채점해주는 라이브러리
from sklearn.tree import DecisionTreeClassifier

In [6]:
X_train, X_test, y_train, y_test = train_test_split(train_pre, #데이터프레임의 속성들
                 raw_data[['power_cat']], #맞추어야할 정답
                 test_size = 0.2, #테스트데이터 20%지정, 훈련데이터 80%할당
                 stratify = raw_data[['power_cat']],
                 shuffle = True,
                 random_state = 13, #seed값                                                                                   
                )

In [7]:
from sklearn.preprocessing import StandardScaler
# 변형 객체 생성
std_scaler = StandardScaler()

In [8]:
# 훈련데이터의 모수 분포 저장
std_scaler.fit(X_train)

# 훈련 데이터 스케일링
X_train_scaled = std_scaler.transform(X_train)

# 테스트 데이터의 스케일링
X_test_scaled = std_scaler.transform(X_test)

In [9]:
y_train = y_train.reset_index()
y_train = y_train.drop(['index'], axis=1)

y_test = y_test.reset_index()
y_test = y_test.drop(['index'], axis=1)

In [10]:
from sklearn.ensemble import RandomForestClassifier
estimator = RandomForestClassifier()

param_grid = {'n_estimators':[10,50,100,200,300,400], 'max_depth':[6,7,8,9,10,11]}
grid = GridSearchCV(estimator, param_grid=param_grid, cv=5, refit=True, return_train_score=True)               
grid.fit(X_train_scaled, y_train)       
              
# 각 파라미터값들에 대한 모델 결과값들이 cv_results_ 객체에 할당됨
scores_df = pd.DataFrame(grid.cv_results_).sort_values(by=['mean_test_score', 'mean_train_score'], ascending=False)

# score 결과값(ndarray형태로 할당됨) 중 특정 칼럼들만 가져오기 
scores_df[['params', 'mean_test_score', 'mean_train_score']]       

  from numpy.core.umath_tests import inner1d
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_par

  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_

  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_

  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_

  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_

  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_

  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  estimator.fit(X_train, y_train, **fit_params)
  self.best_estimator_.fit(X, y, **fit_params)


Unnamed: 0,params,mean_test_score,mean_train_score
33,"{'max_depth': 11, 'n_estimators': 200}",0.672794,0.936888
14,"{'max_depth': 8, 'n_estimators': 100}",0.671569,0.823833
17,"{'max_depth': 8, 'n_estimators': 400}",0.66973,0.821231
35,"{'max_depth': 11, 'n_estimators': 400}",0.669118,0.937039
21,"{'max_depth': 9, 'n_estimators': 200}",0.669118,0.86397
28,"{'max_depth': 10, 'n_estimators': 300}",0.668505,0.904565
23,"{'max_depth': 9, 'n_estimators': 400}",0.668505,0.8678
16,"{'max_depth': 8, 'n_estimators': 300}",0.668505,0.824293
31,"{'max_depth': 11, 'n_estimators': 50}",0.667892,0.92908
32,"{'max_depth': 11, 'n_estimators': 100}",0.667279,0.932752


In [12]:
classifier = RandomForestClassifier(n_estimators = 50, max_depth=9)
classifier.fit(X_train_scaled, y_train)
classifier.score(X_train_scaled, y_train)

  


0.8425245098039216

In [13]:
y_pred = classifier.predict(X_test_scaled) #문제를 주고 풀어보라고 함. => 정답 return!
print(accuracy_score(y_test, y_pred))

0.6740196078431373
