# Forest Fires Data Set
산불 피해 면적 예측
https://archive.ics.uci.edu/ml/machine-learning-databases/forest-fires/

In [40]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

forestfire = pd.read_csv('forestfires.csv')
"""
X : Montestinho 공원의 x 좌표
Y : Montestinho 공원의 x 좌표
month : 월
day : 일
FFMC : FWI system의 FFMC 번호(18.7~96.20)
DMC : FWI system의 DMC 번호(1.1~291.3)
DC : FWI system의 ISI 번호(0.0~56.10)
temp : 섭씨온도 (2.2~33.30)
RH : 상대습도(15.0~100)
wind : 풍속(0.40~9.40)
rain : 강수량(0.40~9.40)
area : 피해면적(0~1090.84)
"""
forestfire

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area
0,7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,0.00
1,7,4,oct,tue,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,0.00
2,7,4,oct,sat,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,0.00
3,8,6,mar,fri,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,0.00
4,8,6,mar,sun,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...
512,4,3,aug,sun,81.6,56.7,665.6,1.9,27.8,32,2.7,0.0,6.44
513,2,4,aug,sun,81.6,56.7,665.6,1.9,21.9,71,5.8,0.0,54.29
514,7,4,aug,sun,81.6,56.7,665.6,1.9,21.2,70,6.7,0.0,11.16
515,1,4,aug,sat,94.4,146.0,614.7,11.3,25.6,42,4.0,0.0,0.00


In [33]:
# 비용함수
def cost(x, y, w) :
    cost = 0
    for k in range(len(x)) :
        hx = w * x[k] # 예측방정식 = 가중치 * 입력값
        loss = (hx - y[k]) ** 2 # 비용 = (결과 - 실제결과)^2 /m
        cost += loss
    return cost / len(x) # 비용

# 경사하강법
def gradient_descent(x, y, w) :
    cost = 0
    for k in range(len(x)) :
        hx = w * x[k] # 예측방정식
        loss = (hx - y[k]) * x[i] # 비용함수의 기울기
        cost += loss
    return cost / len(x) # 기울기 평균

# 학습 함수
def fit (x, y) :
    w, old = 10, 100 # 가중치, 비용을 임의로 잡음
    for i in range(1000) : # 학습 1000번은 할것
        c = cost(x, y, w) # 비용
        g = gradient_descent(x, y, w) # 기울기
        w -= 0.1 * g # 한번 할 때마다 기울기*0.1 만큼 가중치를 변경시킴
        # 비용이 이전 비용보다 커지거나, 1.0e - 15보다 더 줄지 않으면 멈춤
        if c >= old and abs(c - old) < 1.0e-15:
            break
        old = c
    return w

# 예측 함수
def predict(w, x) :
    hx = w * np.array(x) # 학습으로 나온 가중치로 예측값 구하기.
    return list(hx)
        
# RMSE : 평균 제곱근 오차
def get_rmse(w, x_test, y_test) :
    y_predict = predict(w, x_test) # 예측값
    squared_error = 0
    for km, _ in enumerate(y_test) :
        squared_error += (y_predict - y_test) ** 2
    mse = squared_error / len(y_predict) # 비용(오차)
    return np.sqrt(mse) # 비용에 루트씌운것 (평균값으로만 비교하면 차이가 너무 큼)

In [39]:
forestfire_area = forestfire['area'] # 면적