In [33]:
%autoreload 2

In [182]:
from collections import defaultdict
import copy
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
import typing
import sklearn
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.pipeline import Pipeline


In [25]:
FITNESS_DATA_FILE = '../data/fitness_scores.csv'
NON_FEATURE_COLUMNS = set(['Index', 'src_file', 'game_name', 'domain_name', 'real', 'original_game_name'])


def regrowth_game_name_cleanup(df: pd.DataFrame):
    regrowth_games = df[df.src_file == 'ast-regrwoth-samples.pddl']
    new_game_names = []
    original_game_names = []
    game_name_counter = defaultdict(lambda: 0)
    for i, row in regrowth_games.iterrows():
        game_name = row.game_name
        original_game_names.append(game_name)
        new_game_name = f'{game_name}-{game_name_counter[game_name]}'
        new_game_names.append(new_game_name)
        game_name_counter[game_name] += 1

    regrowth_games = regrowth_games.assign(game_name=new_game_names, original_game_name=original_game_names)

    df[df.src_file == 'ast-regrwoth-samples.pddl'] = regrowth_games
    return df


fitness_df = pd.read_csv(FITNESS_DATA_FILE)
fitness_df = fitness_df.assign(real=fitness_df.src_file == 'interactive-beta.pddl', original_game_name=None)
fitness_df = regrowth_game_name_cleanup(fitness_df)
fitness_df = fitness_df[~(fitness_df.src_file == 'ast-mle-samples.pddl')]
print(fitness_df.columns)
fitness_df.head()

Index(['Index', 'src_file', 'game_name', 'domain_name', 'variables_defined',
       'all_preferences_used', 'setup_objects_used', 'no_adjacent_once',
       'starts_and_ends_once', 'variable_not_repeated', 'no_nested_logicals',
       'pref_forall_correct', 'real', 'original_game_name'],
      dtype='object')


Unnamed: 0,Index,src_file,game_name,domain_name,variables_defined,all_preferences_used,setup_objects_used,no_adjacent_once,starts_and_ends_once,variable_not_repeated,no_nested_logicals,pref_forall_correct,real,original_game_name
0,0,interactive-beta.pddl,6172feb1665491d1efbce164-0,medium-objects-room-v1,1.0,1.0,1.0,1.0,0.5,1.0,1.0,1.0,True,
1,1,interactive-beta.pddl,5f77754ba932fb2c4ba181d8-2,many-objects-room-v1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,True,
2,2,interactive-beta.pddl,614b603d4da88384282967a7-3,many-objects-room-v1,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,True,
3,3,interactive-beta.pddl,5bc79f652885710001a0e82a-5,few-objects-room-v1,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,True,
4,4,interactive-beta.pddl,614dec67f6eb129c3a77defd-6,medium-objects-room-v1,1.0,1.0,0.25,1.0,1.0,1.0,1.0,1.0,True,


In [17]:
fitness_df.drop('Index', axis=1).groupby('src_file').agg([np.mean, np.std])

Unnamed: 0_level_0,variables_defined,variables_defined,all_preferences_used,all_preferences_used,setup_objects_used,setup_objects_used,no_adjacent_once,no_adjacent_once,starts_and_ends_once,starts_and_ends_once,variable_not_repeated,variable_not_repeated,no_nested_logicals,no_nested_logicals,pref_forall_correct,pref_forall_correct,real,real
Unnamed: 0_level_1,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std
src_file,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2
ast-regrwoth-samples.pddl,0.975223,0.130014,0.88374,0.264247,0.391846,0.398341,0.867028,0.333025,0.816273,0.36622,0.969922,0.096532,0.980867,0.137035,0.765967,0.385004,0.0,0.0
interactive-beta.pddl,1.0,0.0,0.988095,0.083762,0.419189,0.407367,0.887755,0.31729,0.847789,0.347843,1.0,0.0,0.979592,0.142119,0.862245,0.312058,1.0,0.0


## Data splitting approach
Under the NCE-style thing I want to try, I basically want to take the real games and split them train/val/test, and then use the regrowth samples corresponding to each game for negative examples and for the normalization.

In [88]:
RANDOM_SEED = 33
TRAINING_PROP = 0.8
VALIDATION_PROP = 0.1
TEST_PROP = 0.1

real_game_names = fitness_df[fitness_df.real].game_name.unique()

train_game_names, val_and_test_game_names = train_test_split(real_game_names, train_size=TRAINING_PROP, random_state=RANDOM_SEED)
val_game_names, test_game_names = train_test_split(val_and_test_game_names, train_size=VALIDATION_PROP/(VALIDATION_PROP+TEST_PROP), random_state=RANDOM_SEED)

train_df = fitness_df[fitness_df.game_name.isin(train_game_names) | fitness_df.original_game_name.isin(train_game_names)]
val_df = fitness_df[fitness_df.game_name.isin(val_game_names) | fitness_df.original_game_name.isin(val_game_names)]
test_df = fitness_df[fitness_df.game_name.isin(test_game_names) | fitness_df.original_game_name.isin(test_game_names)]

normalization_values = {}
for column in train_df.columns:
    if column not in NON_FEATURE_COLUMNS:
        train_col_mean = train_df[column].mean()
        train_col_std = train_df[column].std()
        normalization_values[column] = (train_col_mean, train_col_std)
        train_df = train_df.assign(**{column: (train_df[column] - train_col_mean) / train_col_std})
        val_df = val_df.assign(**{column: (val_df[column] - train_col_mean) / train_col_std})
        test_df = test_df.assign(**{column: (test_df[column] - train_col_mean) / train_col_std})

print(train_df.groupby('real').mean())

            Index  variables_defined  all_preferences_used  \
real                                                         
False  953.602564          -0.011556             -0.024719   
True    46.756410           0.184903              0.395504   

       setup_objects_used  no_adjacent_once  starts_and_ends_once  \
real                                                                
False           -0.003073         -0.003858             -0.005117   
True             0.049164          0.061722              0.081866   

       variable_not_repeated  no_nested_logicals  pref_forall_correct  
real                                                                   
False              -0.019010       -4.046799e-16            -0.015519  
True                0.304153       -3.981858e-16             0.248310  


# Approach
* In each batch, sample some number of real games, and for each of them, subsample some number of the corrupted games.
* Learn a regressor to the fitness (maybe with a hidden layer?)
* Try different regularization approaches/strengths (L1, L2, both)
* Evaluate on held-out validation set, see that it doesn't collapse

## Loss function
I'm inspired by the way Chris Dyer (in https://arxiv.org/abs/1410.8251) writes down the NCE loss:
$$ \mathcal{L}_{NCE_k}^{MC} = \sum_{(w,c) \in \mathcal{D}} \left( \log p (D = 1 \mid c, w) - \sum_{i=1, \bar{w} \sim q}^k \log p (D = 0 \mid c, \bar{w}) \right) $$
where: 
* $\mathcal{D}$ is the dataset comprised of pairs $(c, w)$ of context and the correct continuation $w$
* $D$ is the label, where $D = 1$ indicates true data and $D = $ indicates noise
* $q$ is a noise proposal distribution from which to sample $\bar{w}$, the noise foil examples for the current context.

In our case: 
* I think of the context $c$ as some game id, where the correct production $w$ is the true game
* Tha labels $D$ behave as they do above, $D = 1$ for a correct game and $D = 0$ for an incorrect one. 
* Our regrowth sampler is the proposal distribution $q$ (from which we could eventually generate as many samples as we want, but currently I pre-generate some number of samples per game).
* Given that my fitness model produces a single output, which I currently pass through a sigmoid, we can think about it as outputting $P(D = 1 \mid c, w)$, and taking 1 - its output as $P(D = 0 \mid \cdot)$

Thus, the procedure becomes:
1. In each batch, sample some number $B$ of true games.
2. For each of those, sample $k$ correuptions of the game. 
3. Compute the loss for this example, and then average over the minibatch.
4. Take a gradient step in this direction.


In [62]:
def df_to_tensor(df: pd.DataFrame, feature_columns: typing.List[str]):
    return torch.tensor(
        np.stack([
            np.concatenate((
                df.loc[df.game_name == game_name, feature_columns].to_numpy(),
                df.loc[df.original_game_name == game_name, feature_columns].to_numpy()
            ))
            for game_name
            in df[df.original_game_name.isna()].game_name.unique()
        ]),
        dtype=torch.float
    )


In [186]:
class FitnessEenrgyModel(nn.Module):
    def __init__(self, n_features: int, hidden_size: typing.Optional[int] = None,
        hidden_activation: typing.Callable = torch.relu,
        n_outputs: int = 1):
        super().__init__()
        self.n_features = n_features
        self.n_outputs = n_outputs

        if hidden_size is None:
            self.fc1 = nn.Linear(self.n_features, self.n_outputs)
            self.hidden_activation = None
        
        else:
            self.fc1 = nn.Linear(self.n_features, hidden_size)
            self.fc2 = nn.Linear(hidden_size, self.n_outputs)
            self.hidden_activation = hidden_activation

    def forward(self, x, activate: bool = True):
        x = self.fc1(x)

        if self.hidden_activation is not None:
            x = self.hidden_activation(x)
            x = self.fc2(x)

        # TODO: do we want a sigmoid or something else? Or nothing at all? 
        if self.n_outputs == 1 and activate:
            x = torch.sigmoid(x)

        return x


DEFAULT_MODEL_PARAMS = {
    'n_features': None,
    'hidden_size': None,
    'hidden_activation': torch.relu,
    'n_outputs': 1,
}

DEFAULT_TRAIN_KWARGS = {
    'weight_decay': 0.0,
    'lr': 1e-2,
    'should_print': False, 
    'print_interval': 10,
    'patience_epochs': 5, 
    'patience_threshold': 0.01, 
    'batch_size': 8, 
    'k': 4, 
    'device': 'cpu',
    'seed': 33,
}

class SklearnFitnessWrapper:
    def __init__(self,
        model_kwargs: typing.Optional[typing.Dict[str, typing.Any]] = None, 
        train_kwargs: typing.Optional[typing.Dict[str, typing.Any]] = None, **params):

        self.model_kwargs = copy.deepcopy(DEFAULT_MODEL_PARAMS)
        if model_kwargs is not None:
            self.model_kwargs.update(model_kwargs)

        self.train_kwargs = copy.deepcopy(DEFAULT_TRAIN_KWARGS)
        if train_kwargs is not None:
            self.train_kwargs.update(train_kwargs)

        self.set_params(**params)

    def get_params(self, deep: bool = True):
        return {
            **self.model_kwargs,
            **self.train_kwargs,
        }

    def set_params(self, **params):
        for key, value in params.items():
            if key in self.model_kwargs:
                self.model_kwargs[key] = value
            elif key in self.train_kwargs:
                self.train_kwargs[key] = value
            else:
                raise ValueError(f'Unknown parameter {key}')

        return self

    def fit(self, X, y=None):
        self.model = train_and_validate_model(FitnessEenrgyModel(**self.model_kwargs), X, **self.train_kwargs)[0] # type: ignore
        return self
            
    def __call__(self, *args, **kwargs):
        if self.model is not None:
            return self.model(*args, **kwargs)

        return None
        

def nce_fitness_loss(scores: torch.Tensor):
    if scores.shape[-1] == 1:
        positive_scores = torch.log(scores[:, 0])
        negative_scores = torch.log(1 - scores[:, 1:]).sum(axis=1)  # type: ignore
    else:
        positive_scores = torch.log(scores[:, 0, 0])
        negative_scores = torch.log(1 - scores[:, 1:, 1]).sum(axis=1)  # type: ignore
        
    return -(positive_scores + negative_scores).mean()


def evaluate_fitness(model: typing.Union[nn.Module, SklearnFitnessWrapper], X: torch.Tensor, y=None, return_all=False):
    return_all = return_all and isinstance(model, nn.Module)
    
    if isinstance(model, Pipeline):
        model = model.named_steps['fitness']
    
    if isinstance(model, SklearnFitnessWrapper):
        model = model.model

    model.eval()
    with torch.no_grad():
        scores = model(X, activate=False)
        if scores.shape[-1] == 1:
            positive_scores = scores[:, 0]
            negative_scores = scores[:, 1:]
        else:
            positive_scores = scores[:, 0, 0]
            negative_scores = scores[:, 1:, 1]
        game_average_scores = positive_scores - negative_scores.mean(axis=1)
        if return_all:
            return positive_scores.mean(), negative_scores.mean(), game_average_scores.mean()
        else:
            return game_average_scores.mean().item()


def train_and_validate_model(model: nn.Module, 
    train_data: torch.Tensor, 
    val_data: typing.Optional[torch.Tensor] = None, 
    n_epochs: int = 100, lr: float = 0.01, weight_decay: float = 0.0, 
    should_print: bool = True, print_interval: int = 10,
    patience_epochs: int = 5, patience_threshold: float = 0.01, 
    batch_size: int = 8, k: int = 4, device: str = 'cpu', seed: int = 33):

    optimizer = torch.optim.SGD(model.parameters(), lr=lr, weight_decay=weight_decay)

    train_dataset = TensorDataset(train_data)
    train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    validate = val_data is not None
    if validate:
        val_dataset = TensorDataset(val_data)
        val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    torch.manual_seed(seed)

    min_loss = np.Inf
    patience_loss = np.Inf
    patience_update_epoch = 0
    best_model = model
    
    for epoch in range(n_epochs):
        model.train()
        epoch_train_losses = []
        for batch in train_dataloader:
            X = batch[0]
            optimizer.zero_grad()
            negative_indices = torch.randperm(X.shape[1] - 1)[:k] + 1
            indices = torch.cat((torch.tensor([0]), negative_indices))
            X = X[:, indices].to(device)
            scores = model(X)
            loss = nce_fitness_loss(scores)
            epoch_train_losses.append(loss.item())
            loss.backward()
            optimizer.step()

        epoch_val_losses = []

        if validate:
            model.eval()
            with torch.no_grad():
                for batch in val_dataloader:  # type: ignore
                    X = batch[0]
                    negative_indices = torch.randperm(X.shape[1] - 1)[:k] + 1
                    indices = torch.cat((torch.tensor([0]), negative_indices))
                    X = X[:, indices].to(device)

                    scores = model(X)
                    loss = nce_fitness_loss(scores)
                    epoch_val_losses.append(loss.item())

        if should_print and epoch % print_interval == 0:
            if validate:
                print(f'Epoch {epoch}: train loss {np.mean(epoch_train_losses):.4f} | val loss {np.mean(epoch_val_losses):.4f} | weights {model.fc1.weight.data}')  # type: ignore
            else:
                print(f'Epoch {epoch}: train loss {np.mean(epoch_train_losses):.4f} | weights {model.fc1.weight.data}')  # type: ignore

        epoch_loss = np.mean(epoch_val_losses) if validate else np.mean(epoch_train_losses)

        if epoch_loss < min_loss:
            min_loss = epoch_loss
            best_model = copy.deepcopy(model).cpu()

        if epoch_loss < patience_loss - patience_threshold:
            patience_loss = epoch_loss
            patience_update_epoch = epoch

        if epoch - patience_update_epoch >= patience_epochs:
            break

    model = best_model.to(device)

    if validate:    
        return model, evaluate_fitness(model, train_data), evaluate_fitness(model, val_data)
    else:
        return model, evaluate_fitness(model, train_data)




In [154]:
features = ['variables_defined', 'setup_objects_used', 'no_adjacent_once', 'variable_not_repeated']

def init_weights(m):
    if isinstance(m, nn.Linear):
        torch.nn.init.xavier_uniform_(m.weight)
        m.bias.data.fill_(0.01)


def repeated_evaluation(feature_columns: typing.List[str], model_kwargs: typing.Optional[typing.Dict[str, typing.Any]] = None,
    n_seeds: int = 10, start_seed: int = 0, weight_threshold: float = 0.1,
    weight_decay: float = 0.0):

    if model_kwargs is None:
        model_kwargs = {}

    train_tensor = df_to_tensor(train_df, feature_columns)
    val_tensor = df_to_tensor(val_df, feature_columns)

    results = []
    for seed in range(start_seed, start_seed + n_seeds):
        torch.manual_seed(seed)

        fitness_model = FitnessEenrgyModel(len(feature_columns), **model_kwargs)
        fitness_model.apply(init_weights)

        fitness_model, train_results, val_results = train_and_validate_model(fitness_model, train_tensor, val_tensor, 
            weight_decay=weight_decay, should_print=False, seed=seed)
        results.append({
            'seed': seed,
            'weight_decay': weight_decay,
            'features': feature_columns,
            'train': train_results[-1],
            'val': val_results[-1],
            'weights': fitness_model.fc1.weight.data.numpy(),
            'weights_over_threshold': (fitness_model.fc1.weight.data.abs() > weight_threshold).sum().item(),
            'weight_std': fitness_model.fc1.weight.data.std().item(),
        })

    print(f'Evaluated {n_seeds} seeds with weight decay {weight_decay} and weight threshold {weight_threshold}')
    print(f'Using features: {feature_columns}')
    print(f'With model kwargs: {model_kwargs}')
    for key in results[0]:
        if key in ('seed', 'weight_decay', 'features', 'weights'):
            continue
        values = [result[key] for result in results]
        m = np.mean(values)
        s = np.std(values)
        print(f'{key}: {m:.4f} +- {s:.4f}: [{m - s:.4f} - {m + s:.4f}]')
    print()

    return results

In [155]:
for n_outputs in (1, 2):
    model_kwargs = dict(n_outputs=n_outputs)
    repeated_evaluation(features, model_kwargs)
    repeated_evaluation(features, model_kwargs, weight_decay=0.01)
    _ = repeated_evaluation(features, model_kwargs, weight_decay=0.1)
    # _ = repeated_evaluation(features, model_kwargs, weight_decay=0.25)



Evaluated 10 seeds with weight decay 0.0 and weight threshold 0.1
Using features: ['variables_defined', 'setup_objects_used', 'no_adjacent_once', 'variable_not_repeated']
With model kwargs: {'n_outputs': 1}
train: -0.0190 +- 0.0135: [-0.0325 - -0.0055]
val: -0.2095 +- 0.1618: [-0.3713 - -0.0476]
weights_over_threshold: 3.4000 +- 0.8000: [2.6000 - 4.2000]
weight_std: 0.5820 +- 0.2176: [0.3644 - 0.7996]

Evaluated 10 seeds with weight decay 0.01 and weight threshold 0.1
Using features: ['variables_defined', 'setup_objects_used', 'no_adjacent_once', 'variable_not_repeated']
With model kwargs: {'n_outputs': 1}
train: -0.0189 +- 0.0134: [-0.0323 - -0.0055]
val: -0.2089 +- 0.1609: [-0.3698 - -0.0480]
weights_over_threshold: 3.3000 +- 0.7810: [2.5190 - 4.0810]
weight_std: 0.5771 +- 0.2177: [0.3594 - 0.7949]

Evaluated 10 seeds with weight decay 0.1 and weight threshold 0.1
Using features: ['variables_defined', 'setup_objects_used', 'no_adjacent_once', 'variable_not_repeated']
With model kwarg

In [149]:
all_features = [
    'variables_defined',
    'all_preferences_used', 'setup_objects_used', 'no_adjacent_once',
    'starts_and_ends_once', 'variable_not_repeated', 'no_nested_logicals',
    'pref_forall_correct']

for n_outputs in (1, 2):
    model_kwargs = dict(n_outputs=n_outputs)
    repeated_evaluation(all_features, model_kwargs)
    repeated_evaluation(all_features, model_kwargs, weight_decay=0.01)
    _ = repeated_evaluation(all_features, model_kwargs, weight_decay=0.1)
    # _ = repeated_evaluation(all_features, model_kwargs, weight_decay=0.25)

Evaluated 10 seeds with weight decay 0.0 and weight threshold 0.1
Using features: ['variables_defined', 'all_preferences_used', 'setup_objects_used', 'no_adjacent_once', 'starts_and_ends_once', 'variable_not_repeated', 'no_nested_logicals', 'pref_forall_correct']
With model kwargs: {'n_outputs': 1}
train: 0.3162 +- 0.0819: [0.2344 - 0.3981]
val: 0.3166 +- 0.0775: [0.2391 - 0.3940]
weights_over_threshold: 4.9000 +- 1.5133: [3.3867 - 6.4133]
weight_std: 0.2151 +- 0.0556: [0.1594 - 0.2707]

Evaluated 10 seeds with weight decay 0.01 and weight threshold 0.1
Using features: ['variables_defined', 'all_preferences_used', 'setup_objects_used', 'no_adjacent_once', 'starts_and_ends_once', 'variable_not_repeated', 'no_nested_logicals', 'pref_forall_correct']
With model kwargs: {'n_outputs': 1}
train: 0.3119 +- 0.0798: [0.2321 - 0.3918]
val: 0.3121 +- 0.0755: [0.2366 - 0.3876]
weights_over_threshold: 4.9000 +- 1.5133: [3.3867 - 6.4133]
weight_std: 0.2110 +- 0.0543: [0.1567 - 0.2653]

Evaluated 10 

# Reworking the above logic to support cross-validation

In [150]:
RANDOM_SEED = 33
TRAINING_PROP = 0.8
TEST_PROP = 0.2

real_game_names = fitness_df[fitness_df.real].game_name.unique()

train_game_names, test_game_names = train_test_split(real_game_names, train_size=TRAINING_PROP, random_state=RANDOM_SEED)
train_df = fitness_df[fitness_df.game_name.isin(train_game_names) | fitness_df.original_game_name.isin(train_game_names)]
test_df = fitness_df[fitness_df.game_name.isin(test_game_names) | fitness_df.original_game_name.isin(test_game_names)]


In [176]:
class CustomSklearnScaler:
    def __init__(self):
        self.mean = None
        self.std = None

    def fit(self, X, y=None):
        if X.ndim != 3:
            raise ValueError('X must be 3D')

        self.mean = X.mean(axis=(0, 1))
        self.std = X.std(axis=(0, 1))   
        return self

    def transform(self, X, y=None):
        if X.ndim != 3:
            raise ValueError('X must be 3D')

        return (X - self.mean) / self.std
    
    def fit_transform(self, X, y=None):
        return self.fit(X).transform(X)
        
    def get_feature_names_out(self, input_features=None):
        return [f'x{i}' for i in range(self.mean.shape[0])]

    def set_params(self, **params):
        return self

    def get_params(self, deep=True):
        return {}

In [187]:

def cross_validate(train: pd.DataFrame, feature_columns: typing.List[str], param_grid: typing.Dict[str, typing.Any],
    train_kwargs: typing.Optional[typing.Dict[str, typing.Any]] = None):
    train_tensor = df_to_tensor(train, feature_columns)
    pipeline = Pipeline(steps=[('scaler', CustomSklearnScaler()), ('fitness', SklearnFitnessWrapper(train_kwargs=train_kwargs))])

    param_grid['fitness__n_features'] = [len(feature_columns)]

    cv = GridSearchCV(pipeline, param_grid, scoring=evaluate_fitness, cv=5, n_jobs=-1, verbose=1)
    cv.fit(train_tensor, None)
    return cv
     
test_param_grid = {
    'fitness__n_outputs': [1], 
    'fitness__weight_decay': [0.0, 0.01, 0.05, 0.1], 
    'fitness__hidden_size': [None, 2, 4]   
}
cv = cross_validate(train_df, all_features, test_param_grid, train_kwargs=dict(should_print=False))

Fitting 5 folds for each of 12 candidates, totalling 60 fits


In [193]:
cv.cv_results_.keys()

dict_keys(['mean_fit_time', 'std_fit_time', 'mean_score_time', 'std_score_time', 'param_fitness__hidden_size', 'param_fitness__n_features', 'param_fitness__n_outputs', 'param_fitness__weight_decay', 'params', 'split0_test_score', 'split1_test_score', 'split2_test_score', 'split3_test_score', 'split4_test_score', 'mean_test_score', 'std_test_score', 'rank_test_score'])

In [199]:
cv_df = pd.concat([
    pd.DataFrame(cv.cv_results_["params"]), 
    pd.DataFrame(cv.cv_results_["mean_test_score"], columns=['score_mean']), 
    pd.DataFrame(cv.cv_results_["std_test_score"], columns=['score_std']), 
    pd.DataFrame(cv.cv_results_["rank_test_score"], columns=['score_rank']),
],axis=1)

cv_df.sort_values(by='score_rank').head(10)

Unnamed: 0,fitness__hidden_size,fitness__n_features,fitness__n_outputs,fitness__weight_decay,score_mean,score_std,score_rank
0,,8,1,0.0,0.064895,0.00747,1
1,,8,1,0.01,0.063647,0.006956,2
2,,8,1,0.05,0.060849,0.009188,3
3,,8,1,0.1,0.057272,0.005527,4
10,4.0,8,1,0.05,0.012064,0.020287,5
7,2.0,8,1,0.1,0.010181,0.009132,6
6,2.0,8,1,0.05,0.008133,0.007365,7
5,2.0,8,1,0.01,0.005079,0.008298,8
4,2.0,8,1,0.0,0.005025,0.008544,9
8,4.0,8,1,0.0,0.001563,0.013546,10


In [201]:
cv.best_estimator_.named_steps['fitness'].model.fc1.weight.data

tensor([[ 0.2690,  0.4436,  0.0080,  0.0876, -0.0740,  0.4694, -0.1023,  0.1250]])

In [202]:
test_tensor = df_to_tensor(test_df, all_features)

In [203]:
evaluate_fitness(cv.best_estimator_.named_steps['fitness'].model, test_tensor)

0.07964463531970978