Permalink
Fetching contributors…
Cannot retrieve contributors at this time
43 lines (35 sloc) 1.5 KB
library(dplyr)
library(readr)
library(sampling)
header <- c("status", "duration", "history", "purpose", "amount",
"savings", "employment", "install.rate", "personal",
"guarantor", "residence", "property", "age", "install.plan",
"housing", "credits", "job", "liabilities", "telephone",
"foreign", "risk")
raw <-
read_delim("data/german.data", delim = " ", col_name = header) %>%
mutate(
status = factor(status, labels = 1:4),
history = factor(history, labels = 0:4),
purpose = factor(purpose, labels = c(0:6, 8:10)),
savings = factor(savings, labels = 1:5),
employment = factor(employment, labels = 1:5),
sex = as.factor(ifelse(personal %in% c("A91", "A93", "A94"), 0, 1)),
married = as.factor(ifelse(personal %in% c("A93", "A95"), 0, 1)),
guarantor = factor(guarantor, labels = 1:3),
property = factor(property, labels = 1:4),
install.plan = factor(install.plan, labels = 1:3),
housing = factor(housing, labels = 1:3),
job = factor(job, labels = 1:4),
telephone = factor(telephone, labels = 0:1),
foreign = factor(foreign, labels = 1:0),
risk = factor(risk, labels = 1:0)
) %>%
select(status:install.rate, sex:married, guarantor:risk)
raw$risk <- factor(raw$risk, levels(raw$risk)[2:1])
set.seed(1017)
train_id <- strata(raw, "risk", size = rev(table(raw$risk) * 0.8),
method = "srswor")$ID_unit
train <- raw[ train_id, ]
valid <- raw[-train_id, ]
save(train, valid, file = "data/clean_data.rds")