In [18]:
from typing import Dict 
from pathlib import Path
import os
import yaml

In [14]:
tmp_dir = Path("/tmp/RtmpzJiZbd/vignette1364d3efb3da5")
tmp_dir_posix = str(tmp_dir.as_posix())
print(tmp_dir_posix)

/tmp/RtmpzJiZbd/vignette1364d3efb3da5


In [15]:
os.listdir(tmp_dir)

['board.yml', 'db.SQLite']

In [69]:
import pathlib
import importlib

class Board:
    def __init__(self, yml_path: str):
        with open(pathlib.Path(yml_path), "r") as board_yml:
            try:
                self.board_meta = yaml.safe_load(board_yml)
            except yaml.YAMLError as yaml_error:
                raise yaml_error
        self.board_type = self.board_meta.pop('board')
        self.set_board()
    
    def set_board(self) -> None:
        if(self.board_type == "pins_board_folder"):
            self.board_constructor = getattr(importlib.import_module("pins"),"board_folder")
            self.board = self.board_constructor(**self.board_meta)
            return None
        raise NameError(f"{self.board_type} is currently not implemented")

    def __repr__(self) -> str:
        return str(self.board)

In [70]:
my_board = Board(tmp_dir.joinpath(''))

In [71]:
my_board

<pins.boards.BaseBoard object at 0x7f68c248d610>

In [29]:
test = board_constructor = getattr(importlib.import_module("pins"),"board_folder")


In [37]:
test_board = board_constructor(path='/tmp/RtmpzJiZbd/pins-1364d3b59fea8', versioned=True)

In [39]:
test_board.pin_browse()

TypeError: pin_browse() missing 1 required positional argument: 'name'

In [None]:
tmp_dir = Path("/tmp/Rtmp4Le5GC/vignette100f721087ce9")
tmp_dir_posix = str(tmp_dir.as_posix())
print(tmp_dir_posix)

In [None]:
import rpwf
from rpwf import database, rpwf
from pathlib import Path
from typing import Dict
from dataclasses import dataclass
import pandas
from sklearn.model_selection import (
    RepeatedStratifiedKFold,
    GridSearchCV,
    cross_val_score
)

In [None]:
tmp_dir = Path("/tmp/Rtmp4Le5GC/vignette100f721087ce9")
tmp_dir_posix = str(tmp_dir.as_posix())
print(tmp_dir_posix)

In [None]:
board_dir = Path("/tmp/Rtmp4Le5GC/pins-100f7417b41bb")

In [None]:
@dataclass
class TestArgs:
    db_name: str
    project_root: str
    cores: int
    inner_n_cv: int
    inner_n_repeats: int
    outer_n_cv: int
    outer_n_repeats: int

In [None]:
args = TestArgs("db.SQLite", tmp_dir_posix, 3, 2, 1, 2, 1)
wflow_id = 1

In [None]:
db_obj = database.Base(args.project_root, args.db_name)
wflow_df = db_obj.all_wflow()

In [None]:
wflow_obj = rpwf.Wflow(db_obj, wflow_id)
n_cores = args.cores

# Generate the parameters
p_grid = rpwf.RGrid(db_obj, wflow_obj).get_grid()

df_obj = rpwf.TrainDf(db_obj, wflow_obj)
X, y = df_obj.get_df_X(), df_obj.get_df_y()

model_type_obj = rpwf.Model(db_obj, wflow_obj)
base_learner = rpwf.BaseLearner(wflow_obj, model_type_obj).base_learner
score = wflow_obj._get_par("costs")

In [None]:
p_grid

In [None]:
test = p_grid[1]

In [None]:
test

In [None]:
def val_to_list(d: Dict):
    for v in d:
        d[v] = [d[v]]
    return d

In [None]:
wrapped_grid = [None] * len(p_grid)

In [None]:
for i in range(len(p_grid)):
    wrapped_grid[i]= val_to_list(p_grid[i])

In [None]:
wrapped_grid

In [None]:
# Nested resampling
inner_cv = RepeatedStratifiedKFold(
    n_splits=args.inner_n_cv, 
    n_repeats=args.inner_n_repeats,
    random_state=wflow_obj.random_state
)
outer_cv = RepeatedStratifiedKFold(
    n_splits=args.outer_n_cv,
    n_repeats=args.outer_n_repeats,
    random_state=wflow_obj.random_state,
)

if p_grid is None:
    print("No tune grid specified, running with default params")
    nested_score = cross_val_score(
        base_learner, X=X, y=y, cv=outer_cv, n_jobs=n_cores, scoring=score
    )

else:
    print("Performing nested-cv using provided Rgrid")
    param_tuner = GridSearchCV(
        estimator=base_learner,
        param_grid=p_grid,
        cv=inner_cv,
        n_jobs=n_cores,
        scoring=score,
    )
    nested_score = cross_val_score(param_tuner, X=X, y=y, cv=outer_cv)

In [None]:
# if args.export:
    # Export the results
exporter = rpwf.Export(db_obj, wflow_obj)
nested_score_df = pandas.DataFrame(nested_score, columns=[score])
exporter.export_cv(nested_score_df, "nested_cv")
exporter.export_db()

In [None]:
%run -m rpwf.script.nested_resampling -h

In [None]:
%run -m rpwf.script.nested_resampling $tmp_dir_posix -db db.SQLite -s

In [None]:
%run -m rpwf.script.nested_resampling $tmp_dir_posix -db db.SQLite -af -c 7 -icv 5 -icr 1 -ocv 5 -ocr 1