In [11]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pymongo import MongoClient
from modAL.models import ActiveLearner
from modAL.uncertainty import uncertainty_sampling
from sklearn.ensemble import RandomForestClassifier

In [2]:
# Connect to database server
client = MongoClient()

# Get database
db = client.BERTHA

# Get collection
collection = db.experiments

In [3]:
df = pd.DataFrame(list(collection.find()))
params = ['Power_Ax1_setpoint_[W]', 'Power_Ax2_setpoint_[W]', 'Power_Ax3_setpoint_[W]']
X = df[params].to_numpy()
target = ['Settled']
y = df[target].to_numpy()

# TODO

- ~~What happens in the only one class case? Answer, no issues. Works just fine.
- Test the AL class on some data and make sure that it works reasonably.
- Test MongoDB filtering with queries. Make sure that it works.

In [22]:
class ActiveLearnerBERTHA:
    '''
    The active learning class made specifically for the BERTHA setup. 
    This class augments the active learner from ModAL. It learns from 
    data taken from the database used by BERTHA, and it stores the data
    that it has learned from to generate new queries in a pool-based
    setting. 
    '''
    
    def __init__(self, 
                 mongodb_collection, 
                 input_args : list, 
                 input_ranges : list, 
                 target : list
                ):
        '''
        Params
        ------
        mongodb_collection: the MongoDB collection from the database to train from
        input_args: list of parameters from database to train on, example: [pressure, power1, power2]
        input_ranges: list of ranges for each input parameter, example: [50, 70, 120].
        Note that input_args and input_ranges must map 1:1. This is also error checked by 
        the constructor.
        target: list the classifier target. example: ["Settled"]
        '''
        
        self.collection = mongodb_collection
        self.input_args = input_args
        self.input_ranges = input_ranges
        self.target = target
        self.X = np.empty((0,len(input_args)), int)
        self.y = np.empty((0,len(target)), int)
        
        assert(len(input_args) == len(input_ranges))
        
        self.learner = ActiveLearner(
        estimator=RandomForestClassifier(),
        query_strategy=uncertainty_sampling
        )
        

    def learn_from_db(self, mongodb_query : dict = {}) -> None:
        '''
        Trains the active learner from the available data in the database.
        
        Params
        ------
        mongodb_query : filters data according to the MongoDB query language 
        (https://www.mongodb.com/docs/manual/tutorial/query-documents/).
        Defaults to no query.
        '''
        
        # Filter data from the database based on the MongoDB_query parameter
        df = pd.DataFrame(list(collection.find(mongodb_query)))
        
        # Train the active learner on the data from the database
        self.X = df[self.input_args].to_numpy()
        self.y = df[self.target].to_numpy().ravel()
        self.learner.teach(X=self.X, y=self.y)
        

    def get_process_params(self, pool_size=500) -> np.ndarray:
        '''
        Query the active learner for the next set of most informative
        experiment parameters.
        
        Params
        ------
        pool_size: amount of randomly generated datapoints to evaluate
        in the pool-based setting. The higher the number, the greater the
        information gain; however, this comes at the cost of higher
        computation.
        
        Returns
        -------
        A numpy array of the most informative set of process parameters 
        from the generated pool for the next experiment experiment.
        '''
        
        # Generate pool of data
        pool = np.random.randint(self.input_ranges, size=(pool_size, len(self.input_ranges)))
        
        # Remove elements from pool already in the training data (set difference: pool - self.X)
        pool = pool[~((pool[:,None,:] == self.X).all(-1)).any(1)]
        
        # Query the active learner
        query_index, query_parameters = self.learner.query(pool)
        
        return query_parameters
    

In [23]:
alb = ActiveLearnerBERTHA(collection, params, [80,80,80], target)

In [24]:
alb.learn_from_db()

In [25]:
alb.get_process_params()

array([[22, 71, 36]])

# Experiment corner

In [None]:
class Test:
    
    def set_smth(self, arg):
        self.arg = arg

In [None]:
t = Test()

In [None]:
t.set_smth('hi')

In [None]:
t.arg

## Generate pool

In [None]:
bounds = [5, 120, 2400]
n = 10
np.random.randint(bounds, size=(n, 3))

In [None]:
A = np.array([[1,4,3],[2,5,1],[7,8,9]])
B = np.array([[1,4,3],[3,6,1],[7,8,9]])

A = A[~((A[:,None,:] == B).all(-1)).any(1)]

In [None]:
type(A)