# Additional Example


In [8]:
import autora
import numpy as np
from typing import Iterable, Literal, Optional

from sklearn.metrics import DistanceMetric
from sklearn.preprocessing import StandardScaler
import torch 
from torch.autograd import Variable
from torch import nn

from autora.experimentalist.sampler.falsification import falsification_sampler, falsification_score_sampler, falsification_score_sampler_from_predictions
from autora.experimentalist.sampler.novelty import novelty_sampler, novelty_score_sampler


In [51]:
def adjust_distribution(p, temperature):
        # temperature cannot be 0
        #If the temperature is very low (close to 0), then the sampling will become almost deterministic, picking the event with the highest probability.
        #If the temperature is very high, then the sampling will be closer to uniform, with all events having roughly equal probability.
        
        p = p / np.sum(p)  # Normalizing the initial distribution
        p = np.log(p) / temperature  
        p = np.exp(p)  
        p = p / np.sum(p) # Normalizing the final distribution
        return p


    
def mixture_sampler(condition_pool: np.ndarray, temperature: float, samplers: list, params: dict, num_samples: Optional[int] = None) -> np.ndarray:
    """

    Args:
        condition_pool: pool of experimental conditions to evaluate
        temperature: how random is selection of conditions (cannot be 0; (0:1) - the choices are more deterministic than the choices made wrt
        samplers: tuple containing sampler functions, their names, and weights 
        for sampler functions that return both positive and negative scores, user can provide a list with two weights: the first one will be applied to positive scores, the second one -- to the negative
        params: nested dictionary. keys correspond to the sampler function names (same as provided in samplers),
        values correspond to the dictionaries of function arguments (argument name: its value)
        num_samples: number of experimental conditions to select
        
    Returns:
        Sampled pool of experimental conditions
    """
    
    rankings = []
    scores = []
    
    ## getting rankings and weighted scores from each function
    for (function, name, weight) in samplers:
        sampler_params = params[name]
        cur_ranking, cur_scores = function(condition_pool=condition_pool, **sampler_params)
        cur_indices = np.argsort(cur_ranking, axis=None)
        cur_ranking_sorted = cur_ranking[cur_indices]
        rankings.append(cur_ranking_sorted) # for checking: all elements should be the same & same order
        ## if function scores can be negative, then create a reversed dimension for them
        if np.sum(cur_scores<0)>0:
            
            cur_scores_positive = np.copy(cur_scores)
            cur_scores_positive[cur_scores<0]=0
            cur_scores_negative = -np.copy(cur_scores)
            cur_scores_negative[cur_scores>0]=0
            
            # aligning scores
            cur_scores_positive_sorted = cur_scores_positive[cur_indices]
            cur_scores_negative_sorted = cur_scores_negative[cur_indices]
            
            # if only one weight is provided, use it for both negative and positive dimensions
            if isinstance(weight, int):
                cur_scores_positive_weighted = cur_scores_positive_sorted * weight
                cur_scores_negative_weighted = cur_scores_negative_sorted * weight
            else:
                cur_scores_positive_weighted = cur_scores_positive_sorted * weight[0] # positive dimension gets the first weight
                cur_scores_negative_weighted = cur_scores_negative_sorted * weight[1] # negative dimension gets the second weight
            
            scores.append(cur_scores_positive_weighted)
            scores.append(cur_scores_negative_weighted)
            
        else:
            cur_scores_sorted = cur_scores[cur_indices]
            if isinstance(weight, int):
                cur_scores_weighted = cur_scores_sorted * weight
            else: 
                cur_scores_weighted = cur_scores_sorted * weight[0]
            scores.append(cur_scores_weighted)
    
    weighted_mixture_scores = np.sum(scores, axis = 0)
    
    # adjust mixture scores wrt temperature
    weighted_mixture_scores_adjusted = adjust_distribution(weighted_mixture_scores, temperature)
    
    if num_samples is None:
        num_samples = condition_pool.shape[0]
    
    conditions = np.random.choice(cur_ranking_sorted.T.squeeze(), num_samples,
              p=weighted_mixture_scores_adjusted, replace = False)
    
    return conditions

In [52]:
mixture_sampler(condition_pool = np.array([10,49,59]),temperature = 20, 
    samplers = [[novelty_score_sampler, "novelty", [0.2,0.8]]], params = {"novelty": {"reference_conditions": np.array([12,25,30])}})


Use `novelty_score_sample` instead. `novelty_score_sampler` is deprecated.


[ 1.1460018   0.144641   -1.29064281]
[ 1.1460018   0.144641   -1.29064281]
[1.1460018 0.144641  0.       ]
[0.         0.         1.29064281]
[array([0.        , 0.0289282 , 0.22920036]), array([1.03251424, 0.        , 0.        ])]
[array([[10],
       [49],
       [59]])]


array([49, 59, 10])

In [10]:
def adjust_distribution(p, temperature):
        # temperature cannot be 0
        #If the temperature is very low (close to 0), then the sampling will become almost deterministic, picking the event with the highest probability.
        #If the temperature is very high, then the sampling will be closer to uniform, with all events having roughly equal probability.
        
        p = p / np.sum(p)  # Normalizing the initial distribution
        p = np.log(p) / temperature  
        p = np.exp(p)  
        p = p / np.sum(p) # Normalizing the final distribution
        return p


    
def mixture_sampler(
    condition_pool: np.ndarray, weights: np.ndarray, temperature: int, 
    X_ref: np.ndarray, 
    X_train: np.ndarray,
    Y_train: np.ndarray, Y_predicted, num_samples: Optional[int] = None) -> np.ndarray:
    """
    Add a description of the sampler here.

    Args:
        condition_pool: pool of experimental conditions to evaluate
        num_samples: number of experimental conditions to select
        weights: array containing 4 weights -- importance of the falsification, confirmation, novelty, and familiarity (ideally, each pair of opposites? sums up to 1 or all? sum up to 1)
        temperature: how random is selection of conditions (cannot be 0; (0:1) - the choices are more deterministic than the choices made wrt
        the mixture scores; 1 - choices are made wrt to the mixture scores; (1, inf) - the choices are more random)
        X_ref, X_train, Y_train, Y_predicted: parameters required for falsification and novelty samplers
    
    Returns:
        Sampled pool of experimental conditions
    """
    
    
    falsification_ranking, falsification_scores = get_scored_samples_from_model_prediction(condition_pool, 
                                                                                           Y_predicted, X_train,
                                                                                           Y_train, n=condition_pool.shape[0])
    
    # getting rid of negative scores by introducing confirmation scores 
    confirmation_scores = -falsification_scores
    confirmation_scores[falsification_scores>0]=0
    falsification_scores[falsification_scores<0]=0
    
    # getting rid of negative scores by introducing familiarity scores 
    novelty_ranking, novelty_scores = novelty_score_sample(condition_pool, X_ref, n=condition_pool.shape[0])
    
    familiarity_scores = -novelty_scores
    familiarity_scores[novelty_scores>0]=0
    novelty_scores[novelty_scores<0]=0
    
    # aligning the arrays based on the observations (condition pools)
    novelty_indices = np.argsort(novelty_ranking, axis=None)
    ranking_sorted = novelty_ranking[novelty_indices]
    novelty_scores_sorted = novelty_scores[novelty_indices]
    familiarity_scores_sorted = familiarity_scores[novelty_indices]

    falsification_indices = np.argsort(falsification_ranking, axis=None)
    falsification_scores_sorted = falsification_scores[falsification_indices]    
    confirmation_scores_sorted = confirmation_scores[falsification_indices] 
    
    weighted_mixture_scores = falsification_scores_sorted * weights[0] + confirmation_scores_sorted * weights[1] + novelty_scores_sorted * weights[2] + familiarity_scores_sorted * weights[3] 
    # each score is weighted by the relative importance of these different axes
    
    
    # adjust mixture scores wrt temperature
    weighted_mixture_scores_adjusted = adjust_distribution(weighted_mixture_scores, temperature)
    
    if num_samples is None:
        num_samples = condition_pool.shape[0]
    
    conditions = np.random.choice(ranking_sorted.T.squeeze(), num_samples,
              p=weighted_mixture_scores_adjusted, replace = False)
    
    return conditions





class MixtureExperimentalist:
    def __init__(self, weights: np.ndarray, temperature: int, 
    X_ref: np.ndarray, 
    X_train: np.ndarray,
    Y_train: np.ndarray, Y_predicted, num_samples: Optional[int] = None):
        self.weights = weights
        self.temperature = temperature
        self.X_ref = X_ref
        self.X_train = X_train
        self.Y_train = Y_train
        self.Y_predicted = Y_predicted
        self.num_samples = num_samples

    def __call__(self, condition_pool: np.ndarray, **kwargs):
        params = dict(weights = self.weights, temperature = self.temperature, X_ref = self.X_ref,
                     X_train = self.X_train, Y_train = self.Y_train, Y_predicted = self.Y_predicted,
                     num_samples = self.num_samples)
        params.update(kwargs)
        
        samples = mixture_sampler(condition_pool, **params)
        return samples
        


In [26]:


experimentalist = MixtureExperimentalist(condition_pool = np.array([10,49,59]),temperature = 20, 
    samplers = ([novelty_score_sampler, "novelty", [0.2,0.8]]), params = {"novelty": {"X_ref": np.array([10,25,30])}})
    #X_ref = np.array([10,25,30]),
    #X_train = np.array([2,5,6]),
    #Y_train = np.array([20,25,26]), Y_predicted = np.array([21,22,23]), num_samples = 2)

TypeError: __init__() got an unexpected keyword argument 'condition_pool'

In [21]:
experimentalist(condition_pool = np.array([1,2,3]), num_samples = 3)

TypeError: mixture_sampler() got an unexpected keyword argument 'weights'

In [None]:
mixture_sampler(
    np.array([1,2,3]), np.array([0.2,0.3,0.3,0.7]), 20, 
    np.array([10,25,30]), 
    np.array([2,5,6]),
    np.array([20,25,26]), np.array([21,22,23]), 2)

In [34]:
mixture_sampler(condition_pool = np.array([10,49,59]),temperature = 20, 
    samplers = [[novelty_score_sampler, "novelty", [0.2,0.8]]], params = {"novelty": {"reference_conditions": np.array([12,25,30])}})


Use `novelty_score_sample` instead. `novelty_score_sampler` is deprecated.
  p = np.log(p) / temperature


ValueError: Fewer non-zero entries in p than size