# Regression Workshop

Usaremos datos que contienen información sobre área quemada durante incendios forestales en Portugal.

### Fine Fuel Moisture Code
The Fine Fuel Moisture Code (FFMC) is a numeric rating of the moisture content of litter and other cured fine fuels. This code is an indicator of the relative ease of ignition and the flammability of fine fuel.

### Duff Moisture Code
The Duff Moisture Code (DMC) is a numeric rating of the average moisture content of loosely compacted organic layers of moderate depth. This code gives an indication of fuel consumption in moderate duff layers and medium-size woody material.

### Drought Code
The Drought Code (DC) is a numeric rating of the average moisture content of deep, compact organic layers. This code is a useful indicator of seasonal drought effects on forest fuels and the amount of smoldering in deep duff layers and large logs.

### Initial Spread Index
The Initial Spread Index (ISI) is a numeric rating of the expected rate of fire spread. It combines the effects of wind and the FFMC on rate of spread without the influence of variable quantities of fuel.

In [None]:
library(dplyr)
library(corrplot)
library(ggplot2)
library(unam.theme)
library(manipulate)
library(rlang)
library(mlbench)
library(caret)

In [None]:
datos <- read.csv("forestfires.csv")

In [None]:
dplyr::glimpse(datos)

In [None]:
summary(datos)

In [None]:
corrplot::corrplot(cor(datos[,-c(3,4)]), method = "number")

In [None]:
datos$month <- factor(datos$month, levels = c("jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec"))
datos$day <- factor(datos$day, levels = c("sun", "mon", "tue", "wed", "thu", "fri", "sat"))

In [None]:
ggplot(datos)+
  geom_bar(aes(month))+
  theme_unam()

In [None]:
ggplot(datos)+
  geom_bar(aes(day))+
  theme_unam()

In [None]:
map <- png::readPNG("parkmap.png")

ggplot(datos)+
  ggpubr::background_image(map)+
  geom_count(aes(x = X, y = desc(Y)), colour = unam_oro)+
  coord_cartesian(xlim = c(0.5,9.5), ylim = c(-1,-10))+
  theme_unam()

In [None]:
ggplot(datos)+
  ggpubr::background_image(map)+
  geom_point(aes(x = X, y = desc(Y), size = area), colour = unam_oro)+
  coord_cartesian(xlim = c(0.5,9.5), ylim = c(-1,-10))+
  theme_unam()

In [None]:
# Run in RStudio
manipulate({
  ggplot(datos)+
    ggpubr::background_image(map)+
    geom_point(aes_string(x = "X", y = desc("Y"), size = variable))+
    coord_cartesian(xlim = c(0.5,9.5), ylim = c(-1,-10))+
    theme_unam()
  }, 
  variable = picker("FFMC", "DMC", "DC", "ISI", "temp", "RH", "wind", "rain", "area")
)

In [None]:
# Run in RStudio
manipulate({
  ggplot(datos)+
    geom_point(aes_string(x = "area", y = y))+
    labs(x = x, y = y)+
    theme_unam()
},
y = picker("FFMC", "DMC", "DC", "ISI", "temp", "RH", "wind", "rain")
)

In [None]:
ggplot(datos)+
  geom_density(aes(area), colour = unam_azul, fill = unam_azul)+
  theme_unam()

In [None]:
ggplot(datos)+
  geom_density(aes(asin(sqrt(area))), colour = unam_oro, fill = unam_oro)+
  theme_unam()

In [None]:
names(datos[,-c(1:4)])[findCorrelation(cor(datos[,-c(1:4)]), cutoff=0.5)]

In [None]:
datos <- datos %>% 
  select(-X, -Y, -month, -day)

In [None]:
names(datos)[findCorrelation(cor(datos), cutoff=0.5)]

In [None]:
model1 <- lm(area~., data = datos)
summary(model1)

In [None]:
gvlma::gvlma(model1)

In [None]:
datos <- datos %>% 
  mutate(area_t = log(ifelse(area == 0, area+0.00001, area)))

In [None]:
manipulate({
  ggplot(datos)+
    geom_point(aes(x = x, y = y))+
    labs(x = x, y = y)+
    theme_unam()
},
x = picker("area", "area_t"),
y = picker("FFMC", "DMC", "DC", "ISI", "temp", "RH", "wind", "rain")
)

In [None]:
model2 <- lm(area_t~., data = datos)
summary(model2)