In [33]:
%autoreload 2

In [34]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset
import typing
import sklearn
from sklearn.model_selection import train_test_split


In [35]:
FITNESS_DATA_FILE = '../data/fitness_scores.csv'
NON_FEATURE_COLUMNS = set(['Index', 'src_file', 'game_name', 'domain_name', 'real'])

fitness_df = pd.read_csv(FITNESS_DATA_FILE)
fitness_df = fitness_df.assign(real=fitness_df.src_file == 'interactive-beta.pddl')
fitness_df.head()

Unnamed: 0,Index,src_file,game_name,domain_name,variables_defined,all_preferences_used,setup_objects_used,no_adjacent_once,starts_and_ends_once,variable_not_repeated,no_nested_logicals,pref_forall_correct,real
0,0,interactive-beta.pddl,6172feb1665491d1efbce164-0,medium-objects-room-v1,1.0,1.0,1.0,1.0,0.5,1.0,1.0,1.0,True
1,1,interactive-beta.pddl,5f77754ba932fb2c4ba181d8-2,many-objects-room-v1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,True
2,2,interactive-beta.pddl,614b603d4da88384282967a7-3,many-objects-room-v1,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,True
3,3,interactive-beta.pddl,5bc79f652885710001a0e82a-5,few-objects-room-v1,1.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,True
4,4,interactive-beta.pddl,614dec67f6eb129c3a77defd-6,medium-objects-room-v1,1.0,1.0,0.25,1.0,1.0,1.0,1.0,1.0,True


In [36]:
fitness_df.columns

Index(['Index', 'src_file', 'game_name', 'domain_name', 'variables_defined',
       'all_preferences_used', 'setup_objects_used', 'no_adjacent_once',
       'starts_and_ends_once', 'variable_not_repeated', 'no_nested_logicals',
       'pref_forall_correct', 'real'],
      dtype='object')

In [38]:
fitness_df.drop('Index', axis=1).groupby('src_file').agg([np.mean, np.std])

Unnamed: 0_level_0,variables_defined,variables_defined,all_preferences_used,all_preferences_used,setup_objects_used,setup_objects_used,no_adjacent_once,no_adjacent_once,starts_and_ends_once,starts_and_ends_once,variable_not_repeated,variable_not_repeated,no_nested_logicals,no_nested_logicals,pref_forall_correct,pref_forall_correct,real,real
Unnamed: 0_level_1,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std
src_file,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2
ast-mle-samples.pddl,0.91,0.287623,0.859548,0.272426,0.194491,0.30793,0.364833,0.426874,0.313667,0.402353,0.772989,0.143191,0.98,0.140705,0.292417,0.396267,0.0,0.0
ast-regrwoth-samples.pddl,0.975223,0.130014,0.88374,0.264247,0.391846,0.398341,0.867028,0.333025,0.816273,0.36622,0.969922,0.096532,0.980867,0.137035,0.765967,0.385004,0.0,0.0
interactive-beta.pddl,1.0,0.0,0.988095,0.083762,0.419189,0.407367,0.887755,0.31729,0.847789,0.347843,1.0,0.0,0.979592,0.142119,0.862245,0.312058,1.0,0.0


In [22]:
RANDOM_SEED = 33
TRAINING_PROP = 0.7
VALIDATION_PROP = 0.1
TEST_PROP = 0.2

train_df, val_and_test_df = train_test_split(fitness_df, train_size=TRAINING_PROP, random_state=RANDOM_SEED, stratify=fitness_df.real)
val_df, test_df = train_test_split(val_and_test_df, train_size=VALIDATION_PROP/(VALIDATION_PROP+TEST_PROP), random_state=RANDOM_SEED, stratify=val_and_test_df.real)

normalization_values = {}
for column in train_df.columns:
    if column not in NON_FEATURE_COLUMNS:
        col_mean = train_df[column].mean()
        col_std = train_df[column].std()
        normalization_values[column] = (col_mean, col_std)
        train_df[column] = (train_df[column] - col_mean) / col_std

print(train_df.groupby('real').mean())

            Index  variables_defined  all_preferences_used  \
real                                                         
False  149.642857          -0.240107             -0.289342   
True    49.338235           0.247169              0.297852   

       setup_objects_used  no_adjacent_once  starts_and_ends_once  \
real                                                                
False           -0.337423         -0.572281             -0.569543   
True             0.347347          0.589113              0.586294   

       variable_not_repeated  no_nested_logicals  pref_forall_correct  
real                                                                   
False              -0.724844            0.001726            -0.633719  
True                0.746163           -0.001777             0.652358  


# Approach
* Sample balanced batches from the training set with a subset of features
* Learn a regressor to the fitness (maybe with a hidden layer?)
* Try different regularization approaches/strengths (L1, L2, both)
* Evaluate on held-out validation set, see that it doesn't collapse


In [29]:
def _df_to_tensor(df: pd.DataFrame):
    return torch.tensor(df.to_numpy())

def df_to_datasets(df: pd.DataFrame, feature_columns: typing.List[str], split_column: str = 'real'):
    return _df_to_tensor(df.loc[df[split_column], feature_columns]), _df_to_tensor(df.loc[~df[split_column], feature_columns])


train_real, train_fake = df_to_datasets(train_df, ['variables_defined', 'setup_objects_used', 'no_adjacent_once'])

In [28]:
class FitnessEenrgyModel(nn.Module):
    def __init__(self, n_features: int):
        super().__init__()
        self.n_features = n_features
        self.fc1 = nn.Linear(self.n_features, 1)
        # TODO: consider a hidden layer
        # TODO: do we want a sigmoid or something else? Or nothing at all? 
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.fc1(x)
        x = self.sigmoid(x)
        return x


def train_model(model: nn.Module, train_real: torch.Tensor, train_fake: torch.Tensor, n_epochs: int = 100, lr: float = 0.001):
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    dataset = TensorDataset(train_real, test)
    
    for epoch in range(n_epochs):
        optimizer.zero_grad()
        real_preds = model(train_real)
        fake_preds = model(train_fake)
        loss = criterion(real_preds, torch.ones_like(real_preds)) + criterion(fake_preds, torch.zeros_like(fake_preds))
        loss.backward()
        optimizer.step()
        if epoch % 10 == 0:
            print(f'Epoch {epoch}: {loss.item()}')
    

tensor([[ 0.2472,  1.7900,  0.8116],
        [ 0.2472,  1.1355,  0.8116],
        [ 0.2472,  1.7900,  0.8116],
        [ 0.2472, -0.8282,  0.8116],
        [ 0.2472,  0.9173,  0.8116],
        [ 0.2472, -0.8282,  0.8116],
        [ 0.2472,  0.4809,  0.8116],
        [ 0.2472, -0.8282,  0.8116],
        [ 0.2472,  0.9173,  0.8116],
        [ 0.2472,  1.7900,  0.8116],
        [ 0.2472,  0.0446,  0.8116],
        [ 0.2472,  0.4809,  0.8116],
        [ 0.2472, -0.8282, -1.3499],
        [ 0.2472, -0.8282,  0.8116],
        [ 0.2472, -0.8282,  0.8116],
        [ 0.2472,  0.0446,  0.8116],
        [ 0.2472,  0.4809,  0.8116],
        [ 0.2472,  1.7900,  0.8116],
        [ 0.2472, -0.8282,  0.8116],
        [ 0.2472,  1.7900,  0.8116],
        [ 0.2472, -0.8282,  0.8116],
        [ 0.2472,  1.3537,  0.8116],
        [ 0.2472, -0.8282,  0.8116],
        [ 0.2472, -0.8282,  0.8116],
        [ 0.2472,  1.7900,  0.8116],
        [ 0.2472,  1.7900,  0.8116],
        [ 0.2472, -0.8282,  0.8116],
 