## Exploratory Data Analysis (EDA) of the University of Maryland’s Center for International and Security Studies (CISSM) Cyber Attacks Database (CAD)
### A companion notebook to https://holisticinfosec.io/post/eda-cissm-cad/

*   Dependencies
*   dataxray
*   janitor, CGPfunctions, vtree
*   Model
*   Forecasts
*   Graphs


### Install dependencies if needed

In [None]:
my_packages <- c("correlationfunnel", "devtools", "forecast", "fpp2", "CGPfunctions", "ggpubr", "janitor", "tidyverse", "tsibble", "TTR", "vtree")  # Specify your packages
not_installed <- my_packages[!(my_packages %in% installed.packages()[ , "Package"])]    # Extract packages to be installed
if(length(not_installed)) install.packages(not_installed)                               # Install packages
devtools::install_github("holisticinfosec/dataxray") # Install dataxray

### Load libraries, ingest data, build data frame, and tsibble

In [None]:
library(dataxray)
library(forecast)
library(fpp2)
library(CGPfunctions)
library(ggpubr)
library(janitor)
library(tidyverse)
library(tsibble)
library(TTR)
library(vtree)

df <- read_csv("CISSM-export.csv", show_col_types = FALSE)

evtType <- tabyl(df, evtDate, event_type)

df1 <- as_tibble(evtType)

# create all event tsibble
df1 |>
  mutate(evtDate = yearmonth(evtDate)) |>
  as_tsibble(index = evtDate) -> AllEvents

# create disruptive events tsibble
AllEvents |> select(evtDate,Disruptive) -> disruptive

# create exploitative events tsibble
AllEvents |> select(evtDate,Exploitative) -> exploitative

### dataxray

In [None]:
df %>%
  report_xray(data_name = 'CISSM', study = 'ggplot2')

### janitor

In [None]:
table(df$event_type)

df %>% 
  count(event_type)

tabyl(df, event_type, motive)

tabyl(df, event_type, motive) %>%
  adorn_percentages("col") %>%
  adorn_pct_formatting(digits = 1)

### CGPfunctions

In [None]:
PlotXTabs2(df, event_type, motive, title = "Event Type by Motive")

### vtree

In [None]:
vtree(df, "event_type")
vtree(df, "motive", showcount = FALSE)
vtree(df, c("event_type", "motive"), showcount = FALSE, horiz = FALSE)

### Model disruptive events

In [None]:
# model disruptive events

naive_model_disruptive <- naive(disruptive, h = 12) # RMSE = 22.6, MAE = 15.9
summary(naive_model_disruptive)

ses_model_disruptive <- ses(disruptive$Disruptive, h = 12) # RMSE = 19.5, MAE = 13.9
summary(ses_model_disruptive)

arima_model_disruptive <- auto.arima(disruptive) # RMSE = 18.3, MAE = 13.5
summary(arima_model_disruptive)

### Model exploitative events

In [None]:
# model exploitative events

naive_model_exploitative <- naive(exploitative, h = 12) # RMSE = 20.5, MAE = 15.4
summary(naive_model_exploitative)

ses_model_exploitative <- ses(exploitative$Exploitative, h = 12) # RMSE = 18.8, MAE = 13.9
summary(ses_model_exploitative)

arima_model_exploitative <- auto.arima(exploitative) # RMSE = 18.2, MAE = 13.6
summary(arima_model_exploitative)

### Plot all events

In [None]:
autoplot(as.ts(AllEvents))

### Plot disruptive events and models

In [None]:
# plot disruptive events only
autoplot(as.ts(disruptive))

# forecast disruptive models with individual plots
forecast(naive_model_disruptive) %>% autoplot()
forecast(ses_model_disruptive) %>% autoplot()
forecast(arima_model_disruptive) %>% autoplot()

# forecast disruptive models with joined plot
naiveDIS = forecast(naive_model_disruptive) %>% autoplot()
sesDIS = forecast(ses_model_disruptive) %>% autoplot()
arimaDIS = forecast(arima_model_disruptive) %>% autoplot()

multi.pageDIS <- ggarrange(naiveDIS, sesDIS, arimaDIS,
                        nrow = 3, ncol = 1)

multi.pageDIS

### Plot exploitative events and models

In [None]:
# plot exploitative events only
autoplot(as.ts(exploitative))

# forecast exploitative models with individual plots
forecast(naive_model_exploitative) %>% autoplot()
forecast(ses_model_exploitative) %>% autoplot()
forecast(arima_model_exploitative) %>% autoplot()

# forecast disruptive models with joined plot
naiveEXP = forecast(naive_model_exploitative) %>% autoplot()
sesEXP = forecast(ses_model_exploitative) %>% autoplot()
arimaEXP = forecast(arima_model_exploitative) %>% autoplot()

multi.pageEXP <- ggarrange(naiveEXP, sesEXP, arimaEXP,
                        nrow = 3, ncol = 1)