# Develop Sigma Sweep Experiment with Interactions

Written by Jess Breda

**Goal**: The goal of this notebook is to set up the base code for an experiment running a mutli-class logistic regression model with the new design matrix generator code I wrote and updated experimental classes that don't require as much copying & pasting. 

Specific todos:

[X] import design matrix code from .py and confirm working

[X] write parent experimental design class

[X] write child sigma sweep class

[ ] run for animals

[ ] determine model configs optimization

In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import pathlib
import sys

[
    sys.path.append(str(folder))
    for folder in pathlib.Path("../src/").iterdir()
    if folder.is_dir()
]

from design_matrix_generator_interactions import DesignMatrixGeneratorInteractions
from train_test_splitter import TrainTestSplitter
from get_rat_data import get_rat_viol_data
from fitting_utils import get_taus_df, create_violation_interaction_pairs
from multiclass_logistic_regression import MultiClassLogisticRegression

sns.set_context("talk")
%load_ext autoreload
%autoreload 2

In [5]:
animal_id = "W075"
taus_df = get_taus_df()
tau = taus_df.query("animal_id == @animal_id")["tau"].values[0]

In [7]:
X, Y = DesignMatrixGeneratorInteractions(model_type="multi").generate_design_matrix(
    df=get_rat_viol_data([animal_id]),
    tau=tau,
    filter_column="prev_violation",
    interaction_pairs=create_violation_interaction_pairs(tau, ["s_a", "s_b"]),
)

returning viol data for ['W075']


In [21]:
tts = TrainTestSplitter(test_size=0.2, random_state=65)

tts.get_sessions_for_split(df=X)

x1, x2, y1, y2 = tts.apply_session_split(X, Y)

In [94]:
class Experiment:
    def __init__(self, params):
        self.animals = params["animals"]
        self.sigmas = params["sigmas"]
        self.df = get_rat_viol_data(animal_ids=self.animals)
        self.taus = get_taus_df()
        self.random_state = params.get("random_state", 23)
        self.test_size = params.get("test_size", 0.2)
        self.null_models = []
        self.model_config = params["model_config"]

        if self.animals is None:
            self.animals = self.df.animal_id.unique()
        self.n_animals = len(self.animals)

    @staticmethod
    def store(data, df):
        """
        Function to store the fit information for a single
        animal and model sweep. This creates a single row of the
        self.fit_models data frame.

        params
        ------
        data : dict
            dictonary with keys corresponding to the columns
            of df and values corresponding to the values
            of a single fit row
        df : pd.DataFrame
            dataframe of fit where each row corresponds to
            a single animal with fitting parameters
        """
        # assure df and data have same columns & keys, respectively
        # assert df.columns == data.keys(), "fit data & df columns don't match!"

        # append to df
        next_index = len(df)
        for key, value in data.items():
            df.loc[next_index, key] = value
        return None

In [96]:
class ExperimentSigmaSweep(Experiment):
    """
    Model that runs a sigma sweep for a given 
    set of animals, sigmas and parameters
    """
    def __init__(self, params):
        super().__init__(params)
        self.fit_models = pd.DataFrame(
            columns=[
                "animal_id",
                "model_name",
                "nll",
                "sigma",
                "tau",
                "features",
                "weights",
                "n_train_trials",
                "n_test_trials",
            ]
        )

    def run(self):
        # TODO- training stage is hard coded- consider changing
        for animal_id in self.animals:
            print(f"\n\n !!!!! evaluating animal {animal_id} !!!!!\n\n")
            animal_df = self.df.query("animal_id == @animal_id and training_stage > 2")
            tau = self.taus.query("animal_id == @animal_id")["tau"].values[0]
            self.run_single_animal(animal_id, animal_df, tau)

    def run_single_animal(self, animal_id, animal_df, tau):
        # Make design matrix given model configs
        design_matrix_generator_class = globals()[
            self.model_config["design_matrix_generator"]
        ]
        design_matrix_args = self.model_config.get("design_matrix_generator_args", {})
        design_matrix_generator = design_matrix_generator_class(
            model_type=self.model_config["model_type"]
        )

        X, Y = design_matrix_generator.generate_design_matrix(
            df=animal_df,
            tau=tau,
            **design_matrix_args,
        )

        # Split into train and test.
        tts = TrainTestSplitter(self.test_size, self.random_state)
        tts.get_sessions_for_split(X)
        X_train, X_test, Y_train, Y_test = tts.apply_session_split(X, Y)

        # Fit models for each sigma
        for sigma in self.sigmas:
            model = self.model_config["model_class"](sigma=sigma)
            W_fit = model.fit(X_train, Y_train)
            nll = model.eval(X_test, Y_test)

            # Store
            data = {
                "animal_id": animal_id,
                "model_name": self.model_config["model_type"],
                "nll": nll,
                "sigma": sigma,
                "tau": tau,
                "features": X_test.columns,
                "weights": W_fit,
                "n_train_trials": len(X_train),
                "n_test_trials": len(X_test),
            }
            self.store(data, self.fit_models)

    def store(self, data, df):
        return super().store(data, df)

In [5]:
params = {
    "animals": None,
    "sigmas": [1, 2],
    "model_config": {
        "model_class": MultiClassLogisticRegression,
        "model_type": "multi",
        "design_matrix_generator": "DesignMatrixGeneratorInteractions",
        "design_matrix_generator_args": {
            "filter_column": "prev_violation",
            "interaction_pairs": create_violation_interaction_pairs(["s_a", "s_b"]),
        },
    },
}

# es = ExperimentSigmaSweep(params)

# es.run()

In [2]:
from experiment_sigma_sweep import ExperimentSigmaSweep

In [9]:
exp = ExperimentSigmaSweep(params)

exp.run()

returning viol dataset for all animals


 !!!!! evaluating animal W051 !!!!!


         Current function value: 40966.882523
         Iterations: 28
         Function evaluations: 76
         Gradient evaluations: 64
Optimization terminated successfully.
         Current function value: 40961.912757
         Iterations: 29
         Function evaluations: 41
         Gradient evaluations: 41


 !!!!! evaluating animal W060 !!!!!


         Current function value: 29047.404409
         Iterations: 28
         Function evaluations: 95
         Gradient evaluations: 83
         Current function value: 29042.341032
         Iterations: 28
         Function evaluations: 56
         Gradient evaluations: 45


 !!!!! evaluating animal W061 !!!!!


Optimization terminated successfully.
         Current function value: 24402.560879
         Iterations: 30
         Function evaluations: 42
         Gradient evaluations: 42
         Current function value: 24400.019581
         Iterations: 29
     