<a href="https://colab.research.google.com/github/hududed/mlr3mbo-demo/blob/main/batch_mlr3mbo_single_objective.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# In Rstudio or R IDE, these are the R packages needed to be installed - in colab it takes awhile, so using pre-installed libraries
# install.packages(c("mlr3mbo", "mlr3", "mlr3learners", "bbotk", "data.table", "tibble","stringr", "ranger"))

# Pre-installed libraries
system("pip install --upgrade --no-cache-dir gdown", TRUE)
system("gdown 1Zd8Et1Tp2PCdBUe6uXr-JruAANIRGgpt", TRUE)
system("tar -xvf library.tar.gz", TRUE)

# Clone github repo
system("git clone https://github.com/hududed/mlr3mbo-demo.git", TRUE)

# Set library path
.libPaths("./usr/local/lib/R/site-library/")

# Import functions (see https://github.com/hududed/mlr3mbo-demo.git for the source files)
source("mlr3mbo-demo/utils/propose.R")

Loading required package: mlr3tuning

Loading required package: mlr3

Loading required package: paradox



In [2]:
# Please upload this file in your session (See Folder icon on the left pane)
file = 'CuAlMnNi-data.csv'
data <- as.data.table(read.csv(file))
data

Sample..,Cu..at..,Al..at..,Mn..at..,Ni..at..,Enthalpy..J.g.,DSC.Ms...C.,DSC.Mf...C.,DSC.As...C.,DSC.Af...C.,Thermal.Hysteresis...C.
<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,73.9,16.5,9.6,0.0,2.85,34.3,-21.7,29.7,62.0,39.6
2,72.3,17.5,10.2,0.0,2.31,28.7,-25.1,11.3,45.0,26.4
3,72.1,17.5,10.4,0.0,2.96,16.1,-29.7,-11.7,30.5,16.2
4,73.5,16.5,10.0,0.0,1.46,87.7,34.7,50.6,99.6,13.9
5,73.1,16.5,10.4,0.0,4.73,62.0,20.1,47.4,95.1,30.2
6,72.7,16.5,10.8,0.0,6.58,37.2,-14.9,17.26,64.6,29.8
7,72.1644,16.401,10.8346,0.6,0.892,78.1,17.2,35.9,96.9,18.8
8,72.9173,16.4588,10.374,0.25,3.29,43.6,-10.1,38.6,59.7,32.4
9,72.5518,16.3763,10.322,0.75,4.7,31.0,-53.4,-17.6,0.69,2.75
10,73.5,16.5,10.0,0.0,3.651,-6.0,-50.1,-40.6,8.5,12.0


In [3]:
# clean col names
names(data) <- gsub("\\.{2}.*", "", names(data))  # remove everything after the first two dots
names(data) <- gsub("[^[:alnum:]_.]", "", names(data))  # remove non-alphanumeric characters except dots and underscores
names(data) <- gsub("^_", "", names(data))  # remove leading underscores
names(data) <- gsub("_$", "", names(data))  # remove trailing underscores
names(data) <- gsub("\\.", "_", names(data))  # replace dots with underscores

In [4]:
column_names <- names(data)
print(column_names)

 [1] "Sample"             "Cu"                 "Al"                
 [4] "Mn"                 "Ni"                 "Enthalpy"          
 [7] "DSC_Ms"             "DSC_Mf"             "DSC_As"            
[10] "DSC_Af"             "Thermal_Hysteresis"


In [5]:
selected_columns <- c("Al", "Mn", "Ni", "DSC_Af")  # replace with your column names
dt <- data[, ..selected_columns]
dt

Al,Mn,Ni,DSC_Af
<dbl>,<dbl>,<dbl>,<dbl>
16.5,9.6,0.0,62.0
17.5,10.2,0.0,45.0
17.5,10.4,0.0,30.5
16.5,10.0,0.0,99.6
16.5,10.4,0.0,95.1
16.5,10.8,0.0,64.6
16.401,10.8346,0.6,96.9
16.4588,10.374,0.25,59.7
16.3763,10.322,0.75,0.69
16.5,10.0,0.0,8.5


In [6]:
# Update this accordingly to run the experiment

metadata <- list(
  bucket_name = "my_bucket",  # The name of the bucket where the archive will be saved
  user_id = "my_id",  # The user ID

  # CHANGE THESE
  table_name = "CuAlMnNi_2",  # The name of the table
  batch_number = "1",  # The batch number
  parameter_info = list(
    Al = "float",  # The type of the Al parameter
    Mn = "float",  # The type of the Mn parameter
    Ni = "float"  # The type of the Ni parameter
    # Add more parameters as needed
  ),
  parameter_ranges = list(
    Al = "(15, 19)",  # The range of the Al parameter
    Mn = "(8,13)",  # The range of the Mn parameter
    Ni = "(0,3)"  # The range of the Ni parameter
    # Add more ranges as needed
  ),
  output_column_names = c("DSC_Af"),  # The names of the output columns
  direction = "minimize",  # The direction of the optimization ("minimize" or "maximize")
  num_random_lines = 15,  # The number of random lines to generate
  to_nearest = 0.2  # The value to round to
)

In [7]:
# Run the experiment function
result <- propose_experiment(dt, metadata)

[1] "Al"
[1] "float"
[1] "(15, 19)"
[1] "Mn"
[1] "float"
[1] "(8,13)"
[1] "Ni"
[1] "float"
[1] "(0,3)"
[1] "Model archive so far: "
<Archive>
    Al   Mn   Ni DSC_Af              timestamp batch_nr
 1: 16  9.6 0.00  62.00 2024-02-06 15:04:34.77        1
 2: 18 10.2 0.00  45.00 2024-02-06 15:04:34.77        1
 3: 18 10.4 0.00  30.50 2024-02-06 15:04:34.77        1
 4: 16 10.0 0.00  99.60 2024-02-06 15:04:34.77        1
 5: 16 10.4 0.00  95.10 2024-02-06 15:04:34.77        1
 6: 16 10.8 0.00  64.60 2024-02-06 15:04:34.77        1
 7: 16 10.8 0.60  96.90 2024-02-06 15:04:34.77        1
 8: 16 10.4 0.25  59.70 2024-02-06 15:04:34.77        1
 9: 16 10.3 0.75   0.69 2024-02-06 15:04:34.77        1
10: 16 10.0 0.00   8.50 2024-02-06 15:04:34.77        1
11: 16 10.3 0.70  14.30 2024-02-06 15:04:34.77        1
     Al   Mn  Ni  x_domain acq_ei .already_evaluated
1: 15.6 10.2 2.2 <list[3]>    0.6              FALSE
[1] "RDS files saved in directory: my_bucket/my_id/CuAlMnNi_2/1"
[[1]]
      Al 

In [8]:
result

Cu,Al,Mn,Ni,DSC_Af
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
73.9,16.5,9.6,0.0,62.0
72.3,17.5,10.2,0.0,45.0
72.1,17.5,10.4,0.0,30.5
73.5,16.5,10.0,0.0,99.6
73.1,16.5,10.4,0.0,95.1
72.7,16.5,10.8,0.0,64.6
72.1644,16.401,10.8346,0.6,96.9
72.9172,16.4588,10.374,0.25,59.7
72.5517,16.3763,10.322,0.75,0.69
73.5,16.5,10.0,0.0,8.5


In [10]:
file = 'my_bucket/my_id/CuAlMnNi_2/1/output.csv' # This folder my_bucket/my_id/CuAlMnNi_2/1 also contains the trained model .rds files
data <- as.data.table(read.csv(file))
data

Cu,Al,Mn,Ni,DSC_Af
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
73.9,16.5,9.6,0.0,62.0
72.3,17.5,10.2,0.0,45.0
72.1,17.5,10.4,0.0,30.5
73.5,16.5,10.0,0.0,99.6
73.1,16.5,10.4,0.0,95.1
72.7,16.5,10.8,0.0,64.6
72.1644,16.401,10.8346,0.6,96.9
72.9172,16.4588,10.374,0.25,59.7
72.5517,16.3763,10.322,0.75,0.69
73.5,16.5,10.0,0.0,8.5


# UPDATE EXPERIMENT
The following assumes you have ran the batch of experiments and filled up the output file.
I have re-uploaded this file as `updated.csv` that contains (hypothetical) measured values.

In [11]:
# Import functions (see https://github.com/hududed/mlr3mbo-demo.git for the source files)
# FOR UPDATES MAKE SURE THIS IS SOURCED, NOT mlr3mbo-demo/utils/batch.R!
source("mlr3mbo-demo/utils/update.R")

Loading required package: R.oo

Loading required package: R.methodsS3

R.methodsS3 v1.8.2 (2022-06-13 22:00:14 UTC) successfully loaded. See ?R.methodsS3 for help.

R.oo v1.25.0 (2022-06-12 02:20:02 UTC) successfully loaded. See ?R.oo for help.


Attaching package: ‘R.oo’


The following object is masked from ‘package:R.methodsS3’:

    throw


The following objects are masked from ‘package:methods’:

    getClasses, getMethods


The following objects are masked from ‘package:base’:

    attach, detach, load, save


R.utils v2.12.2 (2022-11-11 22:00:03 UTC) successfully loaded. See ?R.utils for help.


Attaching package: ‘R.utils’


The following object is masked from ‘package:mlr3’:

    resample


The following object is masked from ‘package:utils’:

    timestamp


The following objects are masked from ‘package:base’:





In [12]:
# Please upload the new updated file in your session (See Folder icon on the left pane)
file = 'updated.csv' # This is uploaded to root directory (same level as my_bucket and mlr3mbo-demo folders), feel free to set your own paths here
data <- as.data.table(read.csv(file))
data

Cu,Al,Mn,Ni,DSC_Af
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
73.9,16.5,9.6,0.0,62.0
72.3,17.5,10.2,0.0,45.0
72.1,17.5,10.4,0.0,30.5
73.5,16.5,10.0,0.0,99.6
73.1,16.5,10.4,0.0,95.1
72.7,16.5,10.8,0.0,64.6
72.1644,16.401,10.8346,0.6,96.9
72.9172,16.4588,10.374,0.25,59.7
72.5517,16.3763,10.322,0.75,0.69
73.5,16.5,10.0,0.0,8.5


In [13]:
metadata <- list(

  # RECREATE FIRST BATCH FOLDERS FOLLOWING THIS STRUCTURE, AND UPLOAD THE ASSOCIATED THREE RDS FILES there
  # e.g. my_bucket/user_id/CuAlMnNi/1
  bucket_name = "my_bucket",  # The name of the bucket where the archive will be saved
  user_id = "my_id",  # The user ID
  table_name = "CuAlMnNi_2",  # The name of the table
  parameter_info = list(
    Al = "float",  # The type of the Al parameter
    Mn = "float",  # The type of the Mn parameter
    Ni = "float"  # The type of the Ni parameter
  ),
  parameter_ranges = list(
    Al = "(15, 19)",  # The range of the Al parameter
    Mn = "(8,13)",  # The range of the Mn parameter
    Ni = "(0,3)"  # The range of the Ni parameter
  ),
  output_column_names = c("DSC_Af"),  # The names of the output columns
  direction = "minimize",  # The direction of the optimization ("minimize" or "maximize")
  num_random_lines = 15,  # The number of random lines to generate
  to_nearest = 0.2,  # The value to round to

  # CHANGE THIS
  # If you are running batch 2, it will expect three RDS files in my_bucket/user_id/CuAlMnNi/1
  # If you are running batch 3, it will expect three RDS files in my_bucket/user_id/CuAlMnNi/2
  batch_number = "2"  # The batch number for the second batch
)

In [14]:
# Run the experiment (FOR UPDATES MAKE SURE mlr3mbo-demo/utils/update.R is sourced, not batch.R)
result <- update_experiment(data, metadata)

      Cu   Al   Mn  Ni DSC_Af
 1: 72.0 15.6 10.2 2.2    7.0
 2: 71.0 15.8 10.4 2.8    7.0
 3: 70.6 16.0 10.4 3.0    6.0
 4: 72.4 16.2 10.4 1.0    5.0
 5: 72.2 16.4 10.2 1.2    6.0
 6: 72.0 16.2 10.6 1.2    7.0
 7: 71.2 16.4 11.8 0.6    4.4
 8: 71.8 15.8  9.8 2.6    3.4
 9: 72.2 15.4 11.8 0.6   16.0
10: 72.4 15.2 10.6 1.8   15.0
11: 72.4 15.4 11.6 0.6   24.0
12: 70.6 15.8 10.6 3.0    5.0
13: 72.4 15.2 10.6 1.8    6.0
14: 72.4 15.6 10.6 1.4    8.0
15: 71.8 15.2 10.6 2.4   22.0
[1] "Model archive so far: "
<Archive>
    Al   Mn   Ni DSC_Af              timestamp batch_nr
 1: 16  9.6 0.00  62.00 2024-02-06 15:04:34.77        1
 2: 18 10.2 0.00  45.00 2024-02-06 15:04:34.77        1
 3: 18 10.4 0.00  30.50 2024-02-06 15:04:34.77        1
 4: 16 10.0 0.00  99.60 2024-02-06 15:04:34.77        1
 5: 16 10.4 0.00  95.10 2024-02-06 15:04:34.77        1
 6: 16 10.8 0.00  64.60 2024-02-06 15:04:34.77        1
 7: 16 10.8 0.60  96.90 2024-02-06 15:04:34.77        1
 8: 16 10.4 0.25  59.70 2024-02-0

In [15]:
file = 'my_bucket/my_id/CuAlMnNi_2/2/output.csv'
data <- as.data.table(read.csv(file))
data

Cu,Al,Mn,Ni,DSC_Af
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
73.9,16.5,9.6,0.0,62.0
72.3,17.5,10.2,0.0,45.0
72.1,17.5,10.4,0.0,30.5
73.5,16.5,10.0,0.0,99.6
73.1,16.5,10.4,0.0,95.1
72.7,16.5,10.8,0.0,64.6
72.1644,16.401,10.8346,0.6,96.9
72.9172,16.4588,10.374,0.25,59.7
72.5517,16.3763,10.322,0.75,0.69
73.5,16.5,10.0,0.0,8.5
