In [2]:
source("/minimum_wage/src/model/utils.R")

library(parsnip)
suppressPackageStartupMessages(library(recipes))
library(workflows)
library(doParallel)
library(dials)
library(ggplot2)
library(mlflow)

setwd("/minimum_wage/")

In [3]:
split <- train_test_split()
split

<Training/Testing/Total>
<122506/30628/153134>

In [4]:
spec <- multinom_reg(
    penalty = tune(), # regularization
    mixture = tune() # alpha: ratio of L1 and L2 regularization
) |>
    parsnip::set_mode("classification") |>
    parsnip::set_engine("glmnet")

spec

Multinomial Regression Model Specification (classification)

Main Arguments:
  penalty = tune()
  mixture = tune()

Computational engine: glmnet 


In [5]:
recipe <- recipe(
    group ~ countycat + sex + martial + educat + agecat,
    data = split
) |>
    step_num2factor(
        group,
        levels = c("test", "control1", "control2")
    ) |>
    step_interact(terms = ~ agecat:educat) |> 
    step_interact(terms = ~ agecat:martial) |>
    step_interact(terms = ~ agecat:sex) |> 
    step_interact(terms = ~ sex:martial) |> 
    step_interact(terms = ~ sex:educat)

recipe



[36m──[39m [1mRecipe[22m [36m──────────────────────────────────────────────────────────────────────[39m



── Inputs 

Number of variables by role

outcome:   1
predictor: 5



── Operations 

[36m•[39m Factor variables from: [34mgroup[39m

[36m•[39m Interactions with: [34magecat:educat[39m

[36m•[39m Interactions with: [34magecat:martial[39m

[36m•[39m Interactions with: [34magecat:sex[39m

[36m•[39m Interactions with: [34msex:martial[39m

[36m•[39m Interactions with: [34msex:educat[39m



## normal grid search

In [None]:
wf <- workflow(recipe, spec)
grid <- extract_parameter_set_dials(wf) |>
    grid_latin_hypercube(size = 30)
grid_res <- tune(training(split), wf, grid)

experiment_id <- log_val(
    wf, 
    collect_metrics(grid_res), 
    "multi_reg_elastic_net", 
    "notebook_ml_workflow",
    "latin_hypercube"
)

In [None]:
tune_plot <- autoplot(grid_res)
tune_plot

In [8]:
elastic_net <- log_test(
    wf, 
    split,
    grid_res, 
    "multi_reg_elastic_net",
    experiment_id
)

               Precision is undefined in this case, and those levels will be removed from the averaged result.
               Note that the following number of true events actually occured for each problematic event level:
               'test': 4807
               'control1': 11164

There were issues with some computations   [1m[33mA[39m[22m: x1

There were issues with some computations   [1m[33mA[39m[22m: x1





In [9]:
mlflow_server(
    file_store = "mlruns",
    host = "0.0.0.0",
    port = 5050
)

$server_url
[1] "http://0.0.0.0:5050"

$handle
PROCESS 'mlflow', running, pid 6203.

$file_store
[01;34m/minimum_wage/mlruns[0m

attr(,"class")
[1] "mlflow_server"

In [10]:
test(elastic_net, "elastic_net", split)

id,.pred_test,.pred_control1,.pred_control2,.row,.pred_class,group,.config,model
<chr>,<dbl>,<dbl>,<dbl>,<int>,<fct>,<fct>,<chr>,<chr>
train/test split,0.1569474,0.3644883,0.4785643,8,control2,control1,Preprocessor1_Model1,elastic_net
train/test split,0.1569474,0.3644883,0.4785643,11,control2,control1,Preprocessor1_Model1,elastic_net
train/test split,0.1569474,0.3644883,0.4785643,15,control2,control1,Preprocessor1_Model1,elastic_net
train/test split,0.1569474,0.3644883,0.4785643,16,control2,control1,Preprocessor1_Model1,elastic_net
train/test split,0.1569474,0.3644883,0.4785643,17,control2,control1,Preprocessor1_Model1,elastic_net
train/test split,0.1569474,0.3644883,0.4785643,19,control2,test,Preprocessor1_Model1,elastic_net
train/test split,0.1569474,0.3644883,0.4785643,23,control2,control1,Preprocessor1_Model1,elastic_net
train/test split,0.1569474,0.3644883,0.4785643,25,control2,control1,Preprocessor1_Model1,elastic_net
train/test split,0.1569474,0.3644883,0.4785643,26,control2,control1,Preprocessor1_Model1,elastic_net
train/test split,0.1569474,0.3644883,0.4785643,38,control2,control2,Preprocessor1_Model1,elastic_net
