## **A**utomated **L**earning for **I**nsightful **C**omparison and **E**valuation - (ALICE)

In [7]:
import numpy as np
import pandas as pd
import os
cur_dir = os.getcwd()

In [8]:
## Just to test stuff out
from sklearn.datasets import fetch_california_housing

data = fetch_california_housing()

X = pd.DataFrame(data=data.data, columns=data.feature_names)
y = pd.DataFrame(data=data.target, columns=data.target_names)

df = pd.concat([X,y], axis=1)

In [9]:
df

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,MedHouseVal
0,8.3252,41.0,6.984127,1.023810,322.0,2.555556,37.88,-122.23,4.526
1,8.3014,21.0,6.238137,0.971880,2401.0,2.109842,37.86,-122.22,3.585
2,7.2574,52.0,8.288136,1.073446,496.0,2.802260,37.85,-122.24,3.521
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,3.413
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,3.422
...,...,...,...,...,...,...,...,...,...
20635,1.5603,25.0,5.045455,1.133333,845.0,2.560606,39.48,-121.09,0.781
20636,2.5568,18.0,6.114035,1.315789,356.0,3.122807,39.49,-121.21,0.771
20637,1.7000,17.0,5.205543,1.120092,1007.0,2.325635,39.43,-121.22,0.923
20638,1.8672,18.0,5.329513,1.171920,741.0,2.123209,39.43,-121.32,0.847


In [10]:
df.describe()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,MedHouseVal
count,20640.0,20640.0,20640.0,20640.0,20640.0,20640.0,20640.0,20640.0,20640.0
mean,3.870671,28.639486,5.429,1.096675,1425.476744,3.070655,35.631861,-119.569704,2.068558
std,1.899822,12.585558,2.474173,0.473911,1132.462122,10.38605,2.135952,2.003532,1.153956
min,0.4999,1.0,0.846154,0.333333,3.0,0.692308,32.54,-124.35,0.14999
25%,2.5634,18.0,4.440716,1.006079,787.0,2.429741,33.93,-121.8,1.196
50%,3.5348,29.0,5.229129,1.04878,1166.0,2.818116,34.26,-118.49,1.797
75%,4.74325,37.0,6.052381,1.099526,1725.0,3.282261,37.71,-118.01,2.64725
max,15.0001,52.0,141.909091,34.066667,35682.0,1243.333333,41.95,-114.31,5.00001


Since every feature here is numerical, I will generate a fake categorical feature and encode it into dummies to demonstrate that function can handle dummies toegether.

In [11]:
fake_labels = ['nice', 'not_nice', 'mid']
df['HouseEval'] = np.random.choice(fake_labels, size=len(df))

fake_labels_2 = ['white', 'black', 'gray']
df['WallColors'] = np.random.choice(fake_labels_2, size=len(df))

In [12]:
# Get dummies
# Get dummies for the column and set one value as base
df = pd.get_dummies(df, columns=['HouseEval', 'WallColors'], drop_first=True)

In [13]:
df.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,MedHouseVal,HouseEval_nice,HouseEval_not_nice,WallColors_gray,WallColors_white
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23,4.526,1,0,0,1
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22,3.585,1,0,0,1
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24,3.521,1,0,0,1
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,3.413,0,0,0,0
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,3.422,0,0,0,1


In [14]:
# Discretize df

mean_target = df['MedHouseVal'].mean()
df_discrete = df.copy()

df_discrete['AboveMean'] = (df_discrete['MedHouseVal'] >= mean_target).astype(int)
df_discrete.drop('MedHouseVal', axis=1, inplace=True)

In [9]:
# Can just import entire module
import alice

In [6]:
# Import regression metrics
from alice.metrics.regress import mse, rmse, mae
# Import classification metrics
from alice.metrics.classify import accuracy, precision, recall, f1
# Import regression agreeability metric
from alice.agreeability.regress import pearson
# Import classification agreeability metric
from alice.agreeability.classify import cohen_kappa


In [11]:
# Import our demo search algorithm 
from alice.search_and_compare.sequential import BackEliminator

In [132]:
from alice.metrics.regress import mse, rmse, mae
from alice.metrics.classify import accuracy, precision, recall, f1
from alice.agreeability.regress import pearson
from alice.agreeability.classify import cohen_kappa
from alice.utils.feature_lists import dummy_grouper
from alice.utils.feature_lists import feature_fixer
from alice.utils.feature_lists import feature_list_flatten
import pandas as pd
import plotly.express as px
import plotly.graph_objs as go
from plotly.subplots import make_subplots
import numpy as np

class BackEliminator():

    def __init__(self,
                 X=None,
                 y=None, 
                 validation_data=None,
                 task_type=None,
                 criterion=None,
                 agreeability=None,
                 dummy_list=None,
                 features_to_fix=None
                 ):

        self.X = X
        self.y = y
        if validation_data:
            self.validation_data = validation_data
            self.X_val = self.validation_data[0]
            self.y_val = self.validation_data[1]
        self.criterion_registry = {
            'mse': mse,
            'rmse': rmse,
            'mae': mae,
            'accuracy': accuracy,
            'precision': precision,
            'recall': recall,
            'f1': f1
            }
        self.criterion = criterion
        self.agreeability_registry = {
            'pearson': pearson,
            'cohen_kappa': cohen_kappa
        }
        self.agreeability = agreeability
        # To append all scores per dropped feature for all iterations of while loop
        self.scores_n_preds_m1 = []
        self.scores_n_preds_m2 = []
    
        #### =========================================================================================== ####
        #### NEW SORTING DEFINED                                                                            #
        #### ---------------------------------------------------------------------------------------------- #
        #### Rationale:                                                                                     #
        #### In Classification metrics: Higher Score <=> Better Predictive Performance                      #
        #### Worst feature will be that whose removal led to the highest score in iteration                 #
        #### In Regression metrics: Lower score <=> Better Predictive Performance                           #
        #### Worst feature will be that whose removal led to the lowest score in iteration                  #
        if task_type == 'classification':                                                                   #
            # Get the entry which has highest score (in second column - [1]) - used in compare_best_models  #
            self.find_worst_feature = lambda scores: max(scores, key=lambda x: x[1])                        #
            # Order the container in descending from max score to min score - used in compare_all_models    #
            self.sort_scores = lambda scores: sorted(scores, key=lambda x: x[1], reverse=True)              #
        elif task_type == 'regression':                                                                     #
            # Get the entry which has lowest score (in second column - [1])                                 #
            self.find_worst_feature = lambda scores: min(scores, key=lambda x: x[1])                        #
            # Order the container in ascending - min score on top max score on bottom                       #
            self.sort_scores = lambda scores: sorted(scores, key=lambda x: x[1])                            #
        else:                                                                                               #
            raise ValueError("Invalid task type specified. Choose 'regression' or 'classification'.")       #
        #### Return will be (worst_feature, best_score, best_preds) from iteration                          #
        #### =========================================================================================== ####

        # Handle feature lists
        # Will default to None if not provided
        self.dummy_list = dummy_list
        # Will default to None if not provided
        self.features_to_fix = features_to_fix
        # Group columns obtained from a one-hot-encoded variable together
        if self.dummy_list:
            self.initial_feature_list = dummy_grouper(feature_list=list(self.X.columns), dummy_list=self.dummy_list)
        else:
            self.initial_feature_list = list(self.X.columns)
        # Remove features we want to fix from the feature list
        if self.features_to_fix:
            self.initial_feature_list = feature_fixer(self.initial_feature_list, self.features_to_fix)
        
    
    # Method to be called in the main method of back elimination
    def _deselect_feature(self,
                          feature_list,
                          model):
        # Empty list for scores
        score_per_dropped_feature = []
        # Iterate over all features
        for feature in feature_list:
            # Generate temporary feature set to manipulate
            temporary_set = feature_list.copy()
            # Drop feature from set
            temporary_set.remove(feature)
            # Flatten list
            temporary_set = feature_list_flatten(temporary_set)
            # Train
            model.fit(self.X[temporary_set], self.y)
            # Predict on validation set
            if self.validation_data:
                y_preds = model.predict(self.X_val[temporary_set])
                # Evaluate
                score = self.criterion_registry[self.criterion](self.y_val, y_preds)
            # Predict on training set
            else:
                y_preds = model.predict(self.X[temporary_set])
                score = self.criterion_registry[self.criterion](self.y, y_preds)
            # Append feature name, score after dropping it, y_preds after dropping it
            score_per_dropped_feature.append((feature, score, y_preds))

        #### Deprecated ####
        # At the end of loop, identify feature
        # which led to the worst score when 
        # feature dropped
        # Descending sort based on score, (x[1])
        #score_per_dropped_feature = self.sort_scores(score_per_dropped_feature) #### REMOVE THIS

        # For ease of read
        #worst_feature = score_per_dropped_feature[0][0] #### REMOVE THIS
        #best_score = score_per_dropped_feature[0][1] ##### REMOVE THIS
        #best_preds = score_per_dropped_feature[0][2] ##### REMOVE THIS

        #del score_per_dropped_feature #### RETURN THIS
        # Return feature name
        #return worst_feature, best_score, best_preds
        
        #### =========================================================================================== ####
        #### NEW RETURN DEFINED                                                                             #
        #### ---------------------------------------------------------------------------------------------- #
        return score_per_dropped_feature                                                                    #
        #### Returns a list of tuples with three entries: str(feature_name), float(score), np.array(preds)  #
        #### =========================================================================================== ####
        ### TO DO ###
        # Add functionality to possibly save trained models 
        # Will take up large memory, may be unfeasible
        ### TO DO ###
    
    def compare_best_models(
            self,
            m1,
            m2
        ): 
        # Copy all features initially
        # for both models
        new_feature_list_m1 = self.initial_feature_list.copy()
        new_feature_list_m2 = self.initial_feature_list.copy()
        # Aggreeability scores
        results = []
        # First fit models w/o any removed features
        # Flat lists for fitting
        full_fit_m1 = feature_list_flatten(new_feature_list_m1)
        full_fit_m2 = feature_list_flatten(new_feature_list_m2)
        m1.fit(self.X[full_fit_m1], self.y)
        m2.fit(self.X[full_fit_m2], self.y)
        # Predict on validation set
        if self.validation_data:
            # Model 1
            m1_preds = m1.predict(self.X_val[full_fit_m1])
            m1_score = self.criterion_registry[self.criterion](self.y_val, m1_preds)
            # Model 2
            m2_preds = m2.predict(self.X_val[full_fit_m2])
            m2_score = self.criterion_registry[self.criterion](self.y_val, m2_preds)
            # Aggreeability Score
            agreeability_coeff = self.agreeability_registry[self.agreeability](m1_preds, m2_preds)
        # Predict on training set
        else:
            # Model 1
            m1_preds = m1.predict(self.X[full_fit_m1])
            m1_score = self.criterion_registry[self.criterion](self.y, m1_preds)
            # Model 2
            m2_preds = m2.predict(self.X[full_fit_m2])
            m2_score = self.criterion_registry[self.criterion](self.y, m2_preds)
            # Agreeability score
            agreeability_coeff = self.agreeability_registry[self.agreeability](m1_preds, m2_preds)
        
        # Append to results
        
        results.append({
            f'Best: M1 Included Features': full_fit_m1.copy(),
            f'Best: M1 {self.criterion.upper()}': m1_score,
            f'Best: M2 Included Features': full_fit_m2.copy(),
            f'Best: M2 {self.criterion.upper()}': m2_score,
            f'Best: Agreeability ({self.agreeability})': agreeability_coeff,
            })            

        ### DEBUG PRINTS
        print(f'Initial run: fitted both models with full feature set.')
        print(f'-' * 150)
        print(f'Model 1 included: {new_feature_list_m1}. {self.criterion.upper()}: {m1_score}')
        print(f'Model 2 included: {new_feature_list_m2}. {self.criterion.upper()}: {m2_score}')
        print(f'-' * 150)
        print(f'Agreeability Coefficient ({self.agreeability}): {agreeability_coeff}')
        print(f'=' * 150)
        ### DEBUG PRINTS   
        
        ### DEBUG
        counter = 0
        ### DEBUG

        # Begin loop to deselect and evaluate
        while len(new_feature_list_m1) > 1 and len(new_feature_list_m2) > 1:

            ### DEBUG
            counter += 1    
            ### DEBUG    

            # Obtain worst_feature, score and preds from deselect_feature functions
            #worst_feature_m1, m1_score, m1_preds = self._deselect_feature(new_feature_list_m1, m1)
            #worst_feature_m2, m2_score, m2_preds = self._deselect_feature(new_feature_list_m2, m2)
            # Update included feature lists
            #new_feature_list_m1.remove(worst_feature_m1) 
            #new_feature_list_m2.remove(worst_feature_m2)

            # Obtain the score lists (removed feature, corresponding score, corresponding preds)
            score_per_dropped_feature_m1 = self._deselect_feature(new_feature_list_m1, m1)
            score_per_dropped_feature_m2 = self._deselect_feature(new_feature_list_m2, m2)

            # Get the worst_feature, best_score, best_preds
            worst_feature_m1, m1_score, m1_preds = self.find_worst_feature(score_per_dropped_feature_m1)
            worst_feature_m2, m2_score, m2_preds = self.find_worst_feature(score_per_dropped_feature_m2)

            # Update included feature lists
            new_feature_list_m1.remove(worst_feature_m1)
            new_feature_list_m2.remove(worst_feature_m2)
            # Flat lists to append to results
            flat_feature_list_m1 = feature_list_flatten(new_feature_list_m1)
            flat_feature_list_m2 = feature_list_flatten(new_feature_list_m2)

            # Compute agreeability
            agreeability_coeff = self.agreeability_registry[self.agreeability](m1_preds, m2_preds)
            # Append to results
            results.append({
                'Model 1 Included Features': flat_feature_list_m1.copy(),
                f'Model 1 {self.criterion.upper()}': m1_score,
                'Model 2 Included Features': flat_feature_list_m2.copy(),
                f'Model 2 {self.criterion.upper()}': m2_score,
                f'Agreeability Coefficient ({self.agreeability})': agreeability_coeff
            })

            ### DEBUG PRINTS
            print(f'Iteration {counter}:')
            print(f'-' * 150)
            print(f'Model 1 included: {new_feature_list_m1}. {self.criterion.upper()}: {m1_score}')
            print(f'Model 2 included: {new_feature_list_m2}. {self.criterion.upper()}: {m2_score}')
            print(f'-' * 150)
            print(f'Agreeability Coefficient ({self.agreeability}): {agreeability_coeff}')
            print(f'=' * 150)
            ### DEBUG PRINTS
        # Save results
        self.results = results
        # Return results
        return results
    
### Order for best for best    
    def compare_all_models(
            self,
            m1,
            m2
        ):
        '''
        Note: feature elimination strategy same as compare_best_models().
        At higher computing costs, evaluates agreeability between sub-par models at each iteration and computers mean agreeability score and standard deviation.
        Results obtained from _deselect_feature are ordered from best to worst
        ''' 
        # Copy all features initially
        # for both models
        new_feature_list_m1 = self.initial_feature_list.copy()
        new_feature_list_m2 = self.initial_feature_list.copy()
        # Aggreeability scores
        results = []
        # Flat lists for fitting
        full_fit_m1 = feature_list_flatten(new_feature_list_m1)
        full_fit_m2 = feature_list_flatten(new_feature_list_m2)
        # First fit models w/o any removed features
        m1.fit(self.X[full_fit_m1], self.y)
        m2.fit(self.X[full_fit_m2], self.y)
        # Predict on validation set
        if self.validation_data:
            # Model 1
            m1_preds = m1.predict(self.X_val[full_fit_m1])
            best_score_m1 = self.criterion_registry[self.criterion](self.y_val, m1_preds)
            # Model 2
            m2_preds = m2.predict(self.X_val[full_fit_m2])
            best_score_m2 = self.criterion_registry[self.criterion](self.y_val, m2_preds)
            # Aggreeability Score
            agreeability_coeff = self.agreeability_registry[self.agreeability](m1_preds, m2_preds)
        # Predict on training set
        else:
            # Model 1
            m1_preds = m1.predict(self.X[full_fit_m1])
            best_score_m1 = self.criterion_registry[self.criterion](self.y, m1_preds)
            # Model 2
            m2_preds = m2.predict(self.X[full_fit_m2])
            best_score_m2 = self.criterion_registry[self.criterion](self.y, m2_preds)
            # Agreeability score
            agreeability_coeff = self.agreeability_registry[self.agreeability](m1_preds, m2_preds)
        
        # Append to results
        #### TO FIX
        #### Since the first run is on entire dataset, - mean agreeability == agreeability, stdev == 0
        #results.append({
            #f'Best: M1 Included Features': new_feature_list_m1.copy(),
            #f'Best: M1 {self.criterion.upper()}': best_score_m1,
            #f'Best: M2 Included Features': new_feature_list_m2.copy(),
            #f'Best: M2 {self.criterion.upper()}': best_score_m2,
            #f'Best: Agreeability ({self.agreeability})': agreeability_coeff,
            #f'All: Mean Agreeability ({self.agreeability})': np.mean(agreeability_coeff),
            #f'All: Agreeability St. Dev.': np.std(agreeability_coeff)
        #})          

        results.append({
            f'Best: M1 Included Features': full_fit_m1.copy(),
            f'Best: M1 {self.criterion}': best_score_m1,
            f'Best: M2 Included Features': full_fit_m2.copy(),
            f'Best: M2 {self.criterion}': best_score_m2,
            f'Best: Agreeability ({self.agreeability})': agreeability_coeff,
            f'All: M1 Mean {self.criterion}': best_score_m1,
            f'All: M1 STD {self.criterion}': 0,
            f'All: M2 Mean {self.criterion}': best_score_m2,
            f'All: M2 STD {self.criterion}': 0,
            f'All: Mean Agreeability ({self.agreeability})': agreeability_coeff,
            f'All: Agreeability St. Dev.': 0
            })      

        ### DEBUG PRINTS
        print(f'Initial run: fitted both models with full feature set.')
        print(f'-' * 150)
        print(f'Model 1 included: {new_feature_list_m1}. {self.criterion.upper()}: {best_score_m1:.4f}')
        print(f'Model 2 included: {new_feature_list_m2}. {self.criterion.upper()}: {best_score_m2:.4f}')
        print(f'-' * 150)
        print(f'Agreeability Coefficient ({self.agreeability}): {agreeability_coeff:.4f}')
        print(f'=' * 150)
        ### DEBUG PRINTS   
        
        ### DEBUG
        counter = 0
        ### DEBUG

        # Begin loop to deselect and evaluate
        while len(new_feature_list_m1) > 1 and len(new_feature_list_m2) > 1:

            ### DEBUG
            counter += 1    
            ### DEBUG    

            # Obtain worst_feature, score and preds from deselect_feature functions
            #worst_feature_m1, m1_score, m1_preds = self._deselect_feature(new_feature_list_m1, m1)
            #worst_feature_m2, m2_score, m2_preds = self._deselect_feature(new_feature_list_m2, m2)
            # Update included feature lists
            #new_feature_list_m1.remove(worst_feature_m1) 
            #new_feature_list_m2.remove(worst_feature_m2)

            # Obtain the score lists (removed feature, score, preds)
            score_per_dropped_feature_m1 = self._deselect_feature(new_feature_list_m1, m1)
            score_per_dropped_feature_m2 = self._deselect_feature(new_feature_list_m2, m2)

            # Sort the list
            # Note that after sorting row results will not match iteration for iteration in _deselect_feature runs for m1 and m2
            score_per_dropped_feature_m1 = self.sort_scores(score_per_dropped_feature_m1)
            score_per_dropped_feature_m2 = self.sort_scores(score_per_dropped_feature_m2)

            ####################################################################################################################
            ############################################### HANDLE SCORES ######################################################
            ####################################################################################################################
            
            # Obtain all scores for m1 and m2
            all_scores_m1 = [row[1] for row in score_per_dropped_feature_m1]
            all_scores_m2 = [row[1] for row in score_per_dropped_feature_m2]
            # Obtain all preds for m1 and m2
            all_preds_m1 = [row[2] for row in score_per_dropped_feature_m1]
            all_preds_m2 = [row[2] for row in score_per_dropped_feature_m2]
            # Append to respective containers ####### TO BE USED IN A NEW METHOD FOR TESTING #########
            self.scores_n_preds_m1.append((all_scores_m1, all_preds_m1))
            self.scores_n_preds_m2.append((all_scores_m2, all_preds_m2))
            # Get best scores 
            best_score_m1 = all_scores_m1[0]
            best_score_m2 = all_scores_m2[0]
            # Average of all scores
            mean_score_m1 = np.mean(all_scores_m1)
            mean_score_m2 = np.mean(all_scores_m2)
            # Get std-s of all scores (a bit manually not to recompute means implicitly by using np.std())
            std_score_m1 = np.sqrt(np.mean((all_scores_m1 - mean_score_m1) ** 2))
            std_score_m2 = np.sqrt(np.mean((all_scores_m2 - mean_score_m2) ** 2))

            ####################################################################################################################
            ############################################ HANDLE AGREEABILITY ###################################################
            ####################################################################################################################

            # Get all predictions from both models as a list of lists
            # This will iterate row for row in the third column of the containers, where prediction arrays are given. 
            all_preds_m1 = [row[2] for row in score_per_dropped_feature_m1]
            all_preds_m2 = [row[2] for row in score_per_dropped_feature_m2]

            # Get agreeability measures row for row
            # Result will be ordered s.t. entry on top is from the two models with best performance going all the way down to worst

            all_agreeabilities = [self.agreeability_registry[self.agreeability](all_preds_m1[i], all_preds_m2[i]) for i in range(len(all_preds_m1))]
            # Grab the agreeability coefficient between the predictions of best models
            agreeability_coeff = all_agreeabilities[0]
            # Takes average of all agreeability coeffs
            mean_agreeability = np.mean(all_agreeabilities)
            std_agreeability = np.std(all_agreeabilities)

            ####################################################################################################################
            ############################################## HANDLE FEATURES #####################################################
            #################################################################################################################### 

            #### FOR BETTER READABILITY DEFINE ALL VARIABLES INDIVIDUALLY
            worst_feature_m1 = score_per_dropped_feature_m1[0][0]
            worst_feature_m2 = score_per_dropped_feature_m2[0][0]
            # Update included feature lists
            new_feature_list_m1.remove(worst_feature_m1)
            new_feature_list_m2.remove(worst_feature_m2)
            # Flat lists to append to results
            flat_feature_list_m1 = feature_list_flatten(new_feature_list_m1)
            flat_feature_list_m2 = feature_list_flatten(new_feature_list_m2)
            #### ADD A TOPRINT METHOD SOMEWHERE TO MAKE SURE WE ARE NOT calling .upper() uselessly -- for the time being removed uppers.
            # Append to results
            results.append({
                f'Best: M1 Included Features': flat_feature_list_m1.copy(),
                f'Best: M1 {self.criterion}': best_score_m1,
                f'Best: M2 Included Features': flat_feature_list_m2.copy(),
                f'Best: M2 {self.criterion}': best_score_m2,
                f'Best: Agreeability ({self.agreeability})': agreeability_coeff,
                f'All: M1 Mean {self.criterion}': mean_score_m1,
                f'All: M1 STD {self.criterion}': std_score_m1,
                f'All: M2 Mean {self.criterion}': mean_score_m2,
                f'All: M2 STD {self.criterion}': std_score_m2,
                f'All: Mean Agreeability ({self.agreeability})': mean_agreeability,
                f'All: Agreeability St. Dev.': std_agreeability
            })  

        
            ### DEBUG PRINTS
            print(f'Iteration {counter}:')
            print(f'-' * 150)
            print(f'Results from best models:')
            print(f'Best Model 1 included: {new_feature_list_m1}. {self.criterion.upper()}: {best_score_m1:.4f}')
            print(f'Best Model 2 included: {new_feature_list_m2}. {self.criterion.upper()}: {best_score_m2:.4f}')
            print(f'Agreeability Coefficient ({self.agreeability}) between best models: {agreeability_coeff}')
            print(f'-' * 150)
            print(f'Results from all models:')
            print(f'M1 mean score: {mean_score_m1:.4f}. Standard deviation: {std_score_m1:.4f}')
            print(f'M1 mean score: {mean_score_m2:.4f}. Standard deviation: {std_score_m2:.4f}')
            print(f'Mean agreeability coefficient ({self.agreeability}): {mean_agreeability:.4f}. Standard deviation: {std_agreeability:.4f}')
            print(f'=' * 150)
            ### DEBUG PRINTS
        # Save results
        self.results = results
        # Return results
        return results


        #### REMOVE DESELECT_INPROG REMOVE DESELECT_INPROG REMOVE DESELECT_INPROG

    # Method to turn results into a df
    def dataframe_from_results(self):
        '''
        Return results as a dataframe.
        '''
        # Check if results exist
        if not self.results:
            raise ValueError("There are no results available. Make sure to run compare_models first.")
        # Return results
        return pd.DataFrame(self.results)
    
    # Method to turn results into an interactive plot
    def plot_from_results(self):
        '''
        Makes an interactive plot from the results.
        '''
        if not self.results:
            raise ValueError("There are no results available. Make sure to run compare_models first.")
        df = pd.DataFrame(self.results)

        df['Summary_Agreeability'] = df.apply(lambda row: f"<br> {df.columns[4]}: <br> {row.iloc[4]:.4f} <br> {df.columns[9]}: <br> {row.iloc[9]:.4f} <br> {df.columns[10]}: <br> {row.iloc[10]:.4f}", axis=1)
        df['Summary_M1'] = df.apply(lambda row: f"<br> {df.columns[1]}: <br> {row.iloc[1]:.4f} <br> {df.columns[0]}: <br> {', '.join(row.iloc[0])} <br> {df.columns[5]}: <br> {row.iloc[5]:.4f} <br> {df.columns[6]}: <br> {row.iloc[6]:.4f}", axis=1)
        df['Summary_M2'] = df.apply(lambda row: f"<br> {df.columns[3]}: <br> {row.iloc[3]:.4f} <br> {df.columns[2]}: <br> {', '.join(row.iloc[2])} <br> {df.columns[7]}: <br> {row.iloc[7]:.4f} <br> {df.columns[8]}: <br> {row.iloc[8]:.4f}", axis=1)


        fig = make_subplots(
            specs=[[{'secondary_y': True}]]
        )

        # Plot agreeability
        fig.add_trace(
            go.Scatter(
            x=df.index + 1,
            y=df.iloc[:, 4],
            name=f'{df.columns[4]}',
            mode='lines+markers',
            hovertext=df['Summary_Agreeability'],
            hoverinfo='text' 
            ),
            secondary_y=False
        )

        # Plot model 1 score
        fig.add_trace(
            go.Scatter(
                x=df.index + 1,
                y=df.iloc[:, 1],
                name=f'{df.columns[1]}',
                mode='lines+markers',
                hovertext=df['Summary_M1'],
                hoverinfo='text'
            ),
            secondary_y=True
        )

        # Plot model 2 score
        fig.add_trace(
            go.Scatter(
                x=df.index+1,
                y=df.iloc[:, 3],
                name=f'{df.columns[3]}',
                mode='lines+markers',
                hovertext=df['Summary_M2'],
                hoverinfo='text'
            ),
            secondary_y=True
        )

        fig.update_layout(
            title='Agreeability Coefficients and Model Scores Over Algorithm Iterations',
            xaxis_title='Iteration',
            yaxis_title='Agreeability',
            yaxis2_title='Model Scores',
            hovermode='closest'
        )

        fig.update_xaxes(type='category')
        fig.show()

In [133]:
df.columns

Index(['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup',
       'Latitude', 'Longitude', 'MedHouseVal', 'HouseEval_nice',
       'HouseEval_not_nice', 'WallColors_gray', 'WallColors_white'],
      dtype='object')

In [134]:
dummy_list = [
    ['HouseEval_nice', 'HouseEval_not_nice'],
    ['WallColors_gray', 'WallColors_white']
]
dummy_list

[['HouseEval_nice', 'HouseEval_not_nice'],
 ['WallColors_gray', 'WallColors_white']]

In [135]:
ftofix = [
    'Latitude',
    'Longitude',
    ['WallColors_gray', 'WallColors_white']
]
ftofix

['Latitude', 'Longitude', ['WallColors_gray', 'WallColors_white']]

### Check functionality on a regression task

In [136]:
from sklearn.model_selection import train_test_split

In [137]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20640 entries, 0 to 20639
Data columns (total 13 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   MedInc              20640 non-null  float64
 1   HouseAge            20640 non-null  float64
 2   AveRooms            20640 non-null  float64
 3   AveBedrms           20640 non-null  float64
 4   Population          20640 non-null  float64
 5   AveOccup            20640 non-null  float64
 6   Latitude            20640 non-null  float64
 7   Longitude           20640 non-null  float64
 8   MedHouseVal         20640 non-null  float64
 9   HouseEval_nice      20640 non-null  uint8  
 10  HouseEval_not_nice  20640 non-null  uint8  
 11  WallColors_gray     20640 non-null  uint8  
 12  WallColors_white    20640 non-null  uint8  
dtypes: float64(9), uint8(4)
memory usage: 1.5 MB


In [138]:
y = df['MedHouseVal']
X = df.drop('MedHouseVal', axis=1)


In [139]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=66)

In [140]:
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor

In [141]:
m1 = LinearRegression()
m2 = DecisionTreeRegressor()

In [142]:
seeker = BackEliminator(
    X=X_train,
    y=y_train,
    validation_data=(X_val, y_val),
    task_type='regression',
    criterion='rmse',
    agreeability='pearson',
    dummy_list=dummy_list,
)

In [143]:
results = seeker.compare_all_models(
    m1=m1,
    m2=m2
)

Initial run: fitted both models with full feature set.
------------------------------------------------------------------------------------------------------------------------------------------------------
Model 1 included: ['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude', ['HouseEval_nice', 'HouseEval_not_nice'], ['WallColors_gray', 'WallColors_white']]. RMSE: 0.7312
Model 2 included: ['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude', ['HouseEval_nice', 'HouseEval_not_nice'], ['WallColors_gray', 'WallColors_white']]. RMSE: 0.7198
------------------------------------------------------------------------------------------------------------------------------------------------------
Agreeability Coefficient (pearson): 0.7651
Iteration 1:
------------------------------------------------------------------------------------------------------------------------------------------------------
Results 

In [145]:
for result in results:
    print(result)

{'Best: M1 Included Features': ['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude', 'HouseEval_nice', 'HouseEval_not_nice', 'WallColors_gray', 'WallColors_white'], 'Best: M1 rmse': 0.7311818094816698, 'Best: M2 Included Features': ['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude', 'HouseEval_nice', 'HouseEval_not_nice', 'WallColors_gray', 'WallColors_white'], 'Best: M2 rmse': 0.7198003255099564, 'Best: Agreeability (pearson)': 0.7650956459381608, 'All: M1 Mean rmse': 0.7311818094816698, 'All: M1 STD rmse': 0, 'All: M2 Mean rmse': 0.7198003255099564, 'All: M2 STD rmse': 0, 'All: Mean Agreeability (pearson)': 0.7650956459381608, 'All: Agreeability St. Dev.': 0}
{'Best: M1 Included Features': ['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'Latitude', 'Longitude', 'HouseEval_nice', 'HouseEval_not_nice', 'WallColors_gray', 'WallColors_white'], 'Best: M1 rmse': 0.7240502408441594, 'Be

### INPROGRESS SECTION

In [146]:
# scores and preds now callable
all_m1 = seeker.scores_n_preds_m1
all_m2 = seeker.scores_n_preds_m2


In [160]:
# Best score from first iter
print(all_m1[0][0][0])
# Best (corresponding) preds from first iter
print(all_m1[0][1][0])

0.7240502408441594
[2.45287863 1.12014776 2.44103847 ... 3.27266263 2.21264488 2.61508088]


In [161]:
from scipy import stats

In [175]:
best_preds_iter_1 = all_m1[0][1][0]
best_preds_iter_2 = all_m1[1][1][0]
best_preds_iter_5 = all_m1[4][1][0]

Comparing first best model with second best - LINREG

In [177]:
# Simple pipeline for doing a paired t test and determining equal variance
# I will use levene test here 

levene_results = stats.levene(best_preds_iter_1, best_preds_iter_2)

# Levene test H0 is that variances are equal
# if cannot be rejected (pvalue > 0.05)
# go for normal t-test
if levene_results.pvalue > 0.05:
    t_test = stats.ttest_ind(best_preds_iter_1, best_preds_iter_2, equal_var=True)
    print(
        f'Results t-test:\n'
        f'Test statistic: {t_test[0]}\n'
        f'P-value: {t_test[1]}'
    )
else:
    # else use welch t test
    welch_t_test = stats.ttest_ind(best_preds_iter_1, best_preds_iter_2, equal_var=False)
    print(
        f'Results Welch\'s t-test:\n'
        f'Test statistic: {welch_t_test[0]}\n'
        f'P-value: {welch_t_test[1]}'
    )

Results t-test:
Test statistic: -0.0034260032284453134
P-value: 0.997266533060361


Comparing first best model with fifth best - LINREG

In [181]:
# Simple pipeline for doing a paired t test and determining equal variance
# I will use levene test here 

levene_results = stats.levene(best_preds_iter_1, best_preds_iter_2)

# Levene test H0 is that variances are equal
# if cannot be rejected (pvalue > 0.05)
# go for normal t-test
if levene_results.pvalue > 0.05:
    t_test = stats.ttest_ind(best_preds_iter_1, best_preds_iter_5, equal_var=True)
    print(
        f'Results t-test:\n'
        f'Test statistic: {t_test[0]}\n'
        f'P-value: {t_test[1]}'
    )
else:
    # else use welch t test
    welch_t_test = stats.ttest_ind(best_preds_iter_1, best_preds_iter_5, equal_var=False)
    print(
        f'Results Welch\'s t-test:\n'
        f'Test statistic: {welch_t_test[0]}\n'
        f'P-value: {welch_t_test[1]}'
    )

Results t-test:
Test statistic: 0.028315486960823682
P-value: 0.9774112132969359


Comparing first best model with second best - TREE

In [182]:
best_preds_iter_1 = all_m2[0][1][0]
best_preds_iter_2 = all_m2[1][1][0]
best_preds_iter_5 = all_m2[4][1][0]

In [183]:
# Simple pipeline for doing a paired t test and determining equal variance
# I will use levene test here 

levene_results = stats.levene(best_preds_iter_1, best_preds_iter_2)

# Levene test H0 is that variances are equal
# if cannot be rejected (pvalue > 0.05)
# go for normal t-test
if levene_results.pvalue > 0.05:
    t_test = stats.ttest_ind(best_preds_iter_1, best_preds_iter_2, equal_var=True)
    print(
        f'Results t-test:\n'
        f'Test statistic: {t_test[0]}\n'
        f'P-value: {t_test[1]}'
    )
else:
    # else use welch t test
    welch_t_test = stats.ttest_ind(best_preds_iter_1, best_preds_iter_2, equal_var=False)
    print(
        f'Results Welch\'s t-test:\n'
        f'Test statistic: {welch_t_test[0]}\n'
        f'P-value: {welch_t_test[1]}'
    )

Results t-test:
Test statistic: -0.020542423181810505
P-value: 0.9836111669396426


Comparing first best model with fifth best - LINREG

In [184]:
# Simple pipeline for doing a paired t test and determining equal variance
# I will use levene test here 

levene_results = stats.levene(best_preds_iter_1, best_preds_iter_2)

# Levene test H0 is that variances are equal
# if cannot be rejected (pvalue > 0.05)
# go for normal t-test
if levene_results.pvalue > 0.05:
    t_test = stats.ttest_ind(best_preds_iter_1, best_preds_iter_5, equal_var=True)
    print(
        f'Results t-test:\n'
        f'Test statistic: {t_test[0]}\n'
        f'P-value: {t_test[1]}'
    )
else:
    # else use welch t test
    welch_t_test = stats.ttest_ind(best_preds_iter_1, best_preds_iter_5, equal_var=False)
    print(
        f'Results Welch\'s t-test:\n'
        f'Test statistic: {welch_t_test[0]}\n'
        f'P-value: {welch_t_test[1]}'
    )

Results t-test:
Test statistic: 0.3992291190826404
P-value: 0.689734717869275


Tree less robust nice

### INPROGRESS SECTION END

In [185]:
pd.set_option('display.max_colwidth', None)

In [186]:
results_df = seeker.dataframe_from_results()

In [187]:
results_df

Unnamed: 0,Best: M1 Included Features,Best: M1 rmse,Best: M2 Included Features,Best: M2 rmse,Best: Agreeability (pearson),All: M1 Mean rmse,All: M1 STD rmse,All: M2 Mean rmse,All: M2 STD rmse,All: Mean Agreeability (pearson),All: Agreeability St. Dev.
0,"[MedInc, HouseAge, AveRooms, AveBedrms, Population, AveOccup, Latitude, Longitude, HouseEval_nice, HouseEval_not_nice, WallColors_gray, WallColors_white]",0.731182,"[MedInc, HouseAge, AveRooms, AveBedrms, Population, AveOccup, Latitude, Longitude, HouseEval_nice, HouseEval_not_nice, WallColors_gray, WallColors_white]",0.7198,0.765096,0.731182,0.0,0.7198,0.0,0.765096,0.0
1,"[MedInc, HouseAge, AveRooms, AveBedrms, Population, Latitude, Longitude, HouseEval_nice, HouseEval_not_nice, WallColors_gray, WallColors_white]",0.72405,"[MedInc, HouseAge, AveRooms, AveBedrms, Population, Latitude, Longitude, HouseEval_nice, HouseEval_not_nice, WallColors_gray, WallColors_white]",0.682069,0.774602,0.764188,0.055004,0.739456,0.050585,0.725306,0.067005
2,"[MedInc, HouseAge, AveRooms, AveBedrms, Population, Latitude, Longitude, HouseEval_nice, HouseEval_not_nice]",0.724048,"[HouseAge, AveRooms, AveBedrms, Population, Latitude, Longitude, HouseEval_nice, HouseEval_not_nice, WallColors_gray, WallColors_white]",0.677952,0.733733,0.75952,0.054831,0.740193,0.095293,0.728979,0.071836
3,"[MedInc, HouseAge, AveRooms, AveBedrms, Latitude, Longitude, HouseEval_nice, HouseEval_not_nice]",0.724049,"[HouseAge, AveRooms, Population, Latitude, Longitude, HouseEval_nice, HouseEval_not_nice, WallColors_gray, WallColors_white]",0.657985,0.720452,0.763943,0.056619,0.797329,0.193982,0.652501,0.103673
4,"[MedInc, HouseAge, AveRooms, AveBedrms, Latitude, Longitude]",0.724321,"[HouseAge, AveRooms, Population, Latitude, Longitude, HouseEval_nice, HouseEval_not_nice]",0.652074,0.722448,0.769845,0.058296,0.815149,0.222007,0.627361,0.120696
5,"[MedInc, HouseAge, AveBedrms, Latitude, Longitude]",0.729908,"[HouseAge, AveRooms, Latitude, Longitude, HouseEval_nice, HouseEval_not_nice]",0.650389,0.730253,0.777709,0.05971,0.838225,0.231158,0.613402,0.123719
6,"[MedInc, HouseAge, Latitude, Longitude]",0.734025,"[HouseAge, AveRooms, Latitude, Longitude]",0.651974,0.724751,0.822217,0.105224,0.858977,0.230923,0.563626,0.170499
7,"[MedInc, Latitude, Longitude]",0.741958,"[AveRooms, Latitude, Longitude]",0.639305,0.723538,0.845597,0.106522,0.897079,0.235023,0.524587,0.172521
8,"[MedInc, Latitude]",0.830187,"[Latitude, Longitude]",0.631228,0.598072,0.897744,0.089312,1.017906,0.277393,0.44149,0.148701
9,[MedInc],0.839073,[Longitude],1.00587,0.326271,1.0013,0.162228,1.025579,0.01971,0.307457,0.018813


In [188]:
seeker.plot_from_results()

### Check functionality on a classification task

In [105]:
df_discrete.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20640 entries, 0 to 20639
Data columns (total 13 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   MedInc              20640 non-null  float64
 1   HouseAge            20640 non-null  float64
 2   AveRooms            20640 non-null  float64
 3   AveBedrms           20640 non-null  float64
 4   Population          20640 non-null  float64
 5   AveOccup            20640 non-null  float64
 6   Latitude            20640 non-null  float64
 7   Longitude           20640 non-null  float64
 8   HouseEval_nice      20640 non-null  uint8  
 9   HouseEval_not_nice  20640 non-null  uint8  
 10  WallColors_gray     20640 non-null  uint8  
 11  WallColors_white    20640 non-null  uint8  
 12  AboveMean           20640 non-null  int64  
dtypes: float64(8), int64(1), uint8(4)
memory usage: 1.5 MB


In [106]:
y = df_discrete['AboveMean']
X = df_discrete.drop('AboveMean', axis=1)


In [107]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=66)

In [108]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier

In [109]:
m1 = LogisticRegression(solver='liblinear')
m2 = DecisionTreeClassifier()

In [111]:
seeker_2 = BackEliminator(
    X=X_train,
    y=y_train,
    validation_data=(X_val, y_val),
    task_type='classification',
    criterion='f1',
    agreeability='cohen_kappa',
    dummy_list=dummy_list
)

In [112]:
results_2 = seeker_2.compare_all_models(
    m1=m1,
    m2=m2
)

Initial run: fitted both models with full feature set.
------------------------------------------------------------------------------------------------------------------------------------------------------
Model 1 included: ['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude', ['HouseEval_nice', 'HouseEval_not_nice'], ['WallColors_gray', 'WallColors_white']]. F1: 0.7803
Model 2 included: ['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude', ['HouseEval_nice', 'HouseEval_not_nice'], ['WallColors_gray', 'WallColors_white']]. F1: 0.7888
------------------------------------------------------------------------------------------------------------------------------------------------------
Agreeability Coefficient (cohen_kappa): 0.6389
Iteration 1:
------------------------------------------------------------------------------------------------------------------------------------------------------
Results 

In [113]:
results_2

[{'Best: M1 Included Features': ['MedInc',
   'HouseAge',
   'AveRooms',
   'AveBedrms',
   'Population',
   'AveOccup',
   'Latitude',
   'Longitude',
   'HouseEval_nice',
   'HouseEval_not_nice',
   'WallColors_gray',
   'WallColors_white'],
  'Best: M1 f1': 0.780279975654291,
  'Best: M2 Included Features': ['MedInc',
   'HouseAge',
   'AveRooms',
   'AveBedrms',
   'Population',
   'AveOccup',
   'Latitude',
   'Longitude',
   'HouseEval_nice',
   'HouseEval_not_nice',
   'WallColors_gray',
   'WallColors_white'],
  'Best: M2 f1': 0.7887740029542099,
  'Best: Agreeability (cohen_kappa)': 0.6388752806145795,
  'All: M1 Mean f1': 0.780279975654291,
  'All: M1 STD f1': 0,
  'All: M2 Mean f1': 0.7887740029542099,
  'All: M2 STD f1': 0,
  'All: Mean Agreeability (cohen_kappa)': 0.6388752806145795,
  'All: Agreeability St. Dev.': 0},
 {'Best: M1 Included Features': ['MedInc',
   'HouseAge',
   'AveRooms',
   'AveBedrms',
   'AveOccup',
   'Latitude',
   'Longitude',
   'HouseEval_nice',


In [114]:
results_df_2 = seeker_2.dataframe_from_results()

In [115]:
results_df_2

Unnamed: 0,Best: M1 Included Features,Best: M1 f1,Best: M2 Included Features,Best: M2 f1,Best: Agreeability (cohen_kappa),All: M1 Mean f1,All: M1 STD f1,All: M2 Mean f1,All: M2 STD f1,All: Mean Agreeability (cohen_kappa),All: Agreeability St. Dev.
0,"[MedInc, HouseAge, AveRooms, AveBedrms, Population, AveOccup, Latitude, Longitude, HouseEval_nice, HouseEval_not_nice, WallColors_gray, WallColors_white]",0.78028,"[MedInc, HouseAge, AveRooms, AveBedrms, Population, AveOccup, Latitude, Longitude, HouseEval_nice, HouseEval_not_nice, WallColors_gray, WallColors_white]",0.788774,0.638875,0.78028,0.0,0.788774,0.0,0.638875,0.0
1,"[MedInc, HouseAge, AveRooms, AveBedrms, AveOccup, Latitude, Longitude, HouseEval_nice, HouseEval_not_nice, WallColors_gray, WallColors_white]",0.783136,"[HouseAge, AveRooms, AveBedrms, Population, AveOccup, Latitude, Longitude, HouseEval_nice, HouseEval_not_nice, WallColors_gray, WallColors_white]",0.810504,0.569479,0.762433,0.029422,0.786247,0.017792,0.596191,0.04958
2,"[MedInc, HouseAge, AveRooms, AveBedrms, AveOccup, Latitude, Longitude, HouseEval_nice, HouseEval_not_nice]",0.784242,"[HouseAge, AveRooms, AveBedrms, AveOccup, Latitude, Longitude, HouseEval_nice, HouseEval_not_nice, WallColors_gray, WallColors_white]",0.819701,0.579687,0.766375,0.024602,0.789357,0.042359,0.532935,0.055377
3,"[MedInc, HouseAge, AveRooms, AveOccup, Latitude, Longitude, HouseEval_nice, HouseEval_not_nice]",0.782136,"[HouseAge, AveRooms, AveOccup, Latitude, Longitude, HouseEval_nice, HouseEval_not_nice, WallColors_gray, WallColors_white]",0.823107,0.573282,0.762959,0.026715,0.786729,0.043275,0.528608,0.055331
4,"[MedInc, HouseAge, AveRooms, AveOccup, Latitude, Longitude]",0.780797,"[HouseAge, AveRooms, Latitude, Longitude, HouseEval_nice, HouseEval_not_nice, WallColors_gray, WallColors_white]",0.826048,0.587308,0.749637,0.044447,0.783358,0.05082,0.509634,0.078489
5,"[MedInc, HouseAge, AveOccup, Latitude, Longitude]",0.776399,"[HouseAge, AveRooms, Latitude, Longitude, WallColors_gray, WallColors_white]",0.825007,0.576532,0.743861,0.045557,0.763341,0.081809,0.470728,0.133257
6,"[MedInc, AveOccup, Latitude, Longitude]",0.767023,"[HouseAge, AveRooms, Latitude, Longitude]",0.826204,0.562033,0.71762,0.073182,0.755594,0.082546,0.421665,0.164988
7,"[MedInc, AveOccup, Latitude]",0.745134,"[HouseAge, Latitude, Longitude]",0.831278,0.497976,0.68527,0.076862,0.742161,0.087329,0.370983,0.162715
8,"[MedInc, AveOccup]",0.738155,"[Latitude, Longitude]",0.840624,0.510645,0.615847,0.137868,0.666243,0.124146,0.25775,0.186621
9,[MedInc],0.691214,[Longitude],0.615832,0.220298,0.527757,0.163457,0.608412,0.00742,0.141172,0.079127


In [116]:
seeker_2.plot_from_results()

### Working Code for 3D Plot

```python

# Create a 3D scatter plot
fig = go.Figure(data=[
    go.Scatter3d(
        x=df.index + 1,
        y=df.iloc[:, 4],
        z=df.iloc[:, 1], 
        mode='lines+markers',
        name=f'{df.columns[1]}',
        text=df['Summary_M1'],
        hoverinfo='text'
    ),
    go.Scatter3d(
        x=df.index + 1,
        y=df.iloc[:, 4],
        z=df.iloc[:, 3], 
        mode='lines+markers',
        name=f'{df.columns[3]}',
        text=df['Summary_M2'],
        hoverinfo='text'
    )
])

# Update layout
fig.update_layout(
    title='Agreeability Coefficients and Model Scores Over Algorithm Iterations',
    scene=dict(
        xaxis_title='Iteration',
        yaxis_title='Agreeability',
        zaxis_title='Model Scores'
    ),
    hovermode='closest'
)

# Show the plot
fig.show()

```

### Dependencies

In [30]:
import numpy as np
import pandas as pd
import statsmodels
import sklearn
import scipy
import plotly
import matplotlib
import seaborn as sns
import tensorflow as tf


In [32]:
print(f'pandas: {pd.__version__}')
print(f'numpy: {np.__version__}')
print(f'statsmodels: {statsmodels.__version__}')
print(f'sklearn: {sklearn.__version__}')
print(f'scipy: {scipy.__version__}')
print(f'plotly: {plotly.__version__}')
print(f'matplotlib: {matplotlib.__version__}')
print(f'seaborn: {sns.__version__}')
print(f'tensorflow: {tf.__version__}')


pandas: 1.5.3
numpy: 1.20.3
statsmodels: 0.13.5
sklearn: 1.2.2
scipy: 1.10.0
plotly: 5.18.0
matplotlib: 3.3.4
seaborn: 0.11.1
tensorflow: 2.10.1
