## **A**utomated **L**earning for **I**nsightful **C**omparison and **E**valuation - (ALICE)

In [1]:
import numpy as np
import pandas as pd
import os
cur_dir = os.getcwd()

In [2]:
## Just to test stuff out
from sklearn.datasets import fetch_california_housing

data = fetch_california_housing()

X = pd.DataFrame(data=data.data, columns=data.feature_names)
y = pd.DataFrame(data=data.target, columns=data.target_names)

df = pd.concat([X,y], axis=1)



In [3]:
df

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,MedHouseVal
0,8.3252,41.0,6.984127,1.023810,322.0,2.555556,37.88,-122.23,4.526
1,8.3014,21.0,6.238137,0.971880,2401.0,2.109842,37.86,-122.22,3.585
2,7.2574,52.0,8.288136,1.073446,496.0,2.802260,37.85,-122.24,3.521
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,3.413
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,3.422
...,...,...,...,...,...,...,...,...,...
20635,1.5603,25.0,5.045455,1.133333,845.0,2.560606,39.48,-121.09,0.781
20636,2.5568,18.0,6.114035,1.315789,356.0,3.122807,39.49,-121.21,0.771
20637,1.7000,17.0,5.205543,1.120092,1007.0,2.325635,39.43,-121.22,0.923
20638,1.8672,18.0,5.329513,1.171920,741.0,2.123209,39.43,-121.32,0.847


In [4]:
df.describe()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,MedHouseVal
count,20640.0,20640.0,20640.0,20640.0,20640.0,20640.0,20640.0,20640.0,20640.0
mean,3.870671,28.639486,5.429,1.096675,1425.476744,3.070655,35.631861,-119.569704,2.068558
std,1.899822,12.585558,2.474173,0.473911,1132.462122,10.38605,2.135952,2.003532,1.153956
min,0.4999,1.0,0.846154,0.333333,3.0,0.692308,32.54,-124.35,0.14999
25%,2.5634,18.0,4.440716,1.006079,787.0,2.429741,33.93,-121.8,1.196
50%,3.5348,29.0,5.229129,1.04878,1166.0,2.818116,34.26,-118.49,1.797
75%,4.74325,37.0,6.052381,1.099526,1725.0,3.282261,37.71,-118.01,2.64725
max,15.0001,52.0,141.909091,34.066667,35682.0,1243.333333,41.95,-114.31,5.00001


In [5]:
# Discretize df

mean_target = df['MedHouseVal'].mean()
df_discrete = df.copy()

df_discrete['AboveMean'] = (df_discrete['MedHouseVal'] >= mean_target).astype(int)
df_discrete.drop('MedHouseVal', axis=1, inplace=True)

In [6]:
# Can just import entire module
import alice

In [7]:
# Import regression metrics
from alice.metrics.regress import mse, rmse, mae
# Import classification metrics
from alice.metrics.classify import accuracy, precision, recall, f1
# Import regression agreeability metric
from alice.agreeability.regress import pearson
# Import classification agreeability metric
from alice.agreeability.classify import cohen_kappa


In [8]:
# Import our demo search algorithm 
from alice.search_and_compare.sequential import BackEliminator

In [23]:
from alice.metrics.regress import mse, rmse, mae
from alice.metrics.classify import accuracy, precision, recall, f1
from alice.agreeability.regress import pearson
from alice.agreeability.classify import cohen_kappa
import pandas as pd
import plotly.express as px


class BackEliminator():

    def __init__(self,
                 X=None,
                 y=None, 
                 validation_data=None,
                 task_type=None,
                 criterion=None,
                 agreeability=None
                 ):

        self.X = X
        self.y = y
        if validation_data:
            self.validation_data = validation_data
            self.X_val = self.validation_data[0]
            self.y_val = self.validation_data[1]
        self.criterion_registry = {
            'mse': mse,
            'rmse': rmse,
            'mae': mae,
            'accuracy': accuracy,
            'precision': precision,
            'recall': recall,
            'f1': f1
            }
        self.criterion = criterion
        self.agreeability_registry = {
            'pearson': pearson,
            'cohen_kappa': cohen_kappa
        }
        self.agreeability = agreeability
        self.initial_feature_list = list(self.X.columns)

        if task_type == 'classification':
            self.sort_scores = lambda scores: sorted(scores, key=lambda x: x[1], reverse=True)
        else:
            self.sort_scores = lambda scores: sorted(scores, key=lambda x: x[1])

    # Method to be called in the main method of back elimination
    def _deselect_feature(self,
                          feature_list,
                          model):
        # Empty list for scores
        score_per_dropped_feature = []
        # Iterate over all features
        for feature in feature_list:
            # Generate temporary feature set to manipulate
            temporary_set = feature_list.copy()
            # Drop feature from set
            temporary_set.remove(feature)
            # Train
            model.fit(self.X[temporary_set], self.y)
            # Predict on validation set
            if self.validation_data:
                y_preds = model.predict(self.X_val[temporary_set])
                # Evaluate
                score = self.criterion_registry[self.criterion](self.y_val, y_preds)
            # Predict on training set
            else:
                y_preds = model.predict(self.X[temporary_set])
                score = self.criterion_registry[self.criterion](self.y, y_preds)
            # Append
            score_per_dropped_feature.append((feature,score, y_preds))

        # At the end of loop, identify feature
        # which led to the worst score when 
        # feature dropped
        # Descending sort based on score, (x[1])
        score_per_dropped_feature = self.sort_scores(score_per_dropped_feature)

        # For ease of read
        worst_feature = score_per_dropped_feature[0][0]
        best_score = score_per_dropped_feature[0][1]
        best_preds = score_per_dropped_feature[0][2]

        # Free up memory
        del score_per_dropped_feature
        # Return feature name
        return worst_feature, best_score, best_preds
        ### TO DO ###
        # Add functionality to possibly save trained models 
        # Will take up large memory, may be unfeasible
        ### TO DO ###
    def compare_models(self,
                           m1,
                           m2,): 
        # Copy all features initially
        # for both models
        new_feature_list_m1 = self.initial_feature_list.copy()
        new_feature_list_m2 = self.initial_feature_list.copy()
        # Aggreeability scores
        results = []
        # First fit models w/o any removed features
        m1.fit(self.X[new_feature_list_m1], self.y)
        m2.fit(self.X[new_feature_list_m2], self.y)
        # Predict on validation set
        if self.validation_data:
            # Model 1
            m1_preds = m1.predict(self.X_val[new_feature_list_m1])
            m1_score = self.criterion_registry[self.criterion](self.y_val, m1_preds)
            # Model 2
            m2_preds = m2.predict(self.X_val[new_feature_list_m2])
            m2_score = self.criterion_registry[self.criterion](self.y_val, m2_preds)
            # Aggreeability Score
            agreeability_coeff = self.agreeability_registry[self.agreeability](m1_preds, m2_preds)
        # Predict on training set
        else:
            # Model 1
            m1_preds = m1.predict(self.X[new_feature_list_m1])
            m1_score = self.criterion_registry[self.criterion](self.y, m1_preds)
            # Model 2
            m2_preds = m2.predict(self.X[new_feature_list_m2])
            m2_score = self.criterion_registry[self.criterion](self.y, m2_preds)
            # Agreeability score
            agreeability_coeff = self.agreeability_registry[self.agreeability](m1_preds, m2_preds)
        # Append to results
        results.append({
            'Model 1 Included Features': new_feature_list_m1.copy(),
            f'Model 1 {self.criterion.upper()}': m1_score,
            'Model 2 Included Features': new_feature_list_m2.copy(),
            f'Model 2 {self.criterion.upper()}': m2_score,
            f'Agreeability Coefficient ({self.agreeability})': agreeability_coeff
        })


        ### DEBUG PRINTS
        print(f'Initial run: fitted both models with full feature set.')
        print(f'-' * 150)
        print(f'Model 1 included: {new_feature_list_m1}. {self.criterion.upper()}: {m1_score}')
        print(f'Model 2 included: {new_feature_list_m2}. {self.criterion.upper()}: {m2_score}')
        print(f'-' * 150)
        print(f'Agreeability Coefficient ({self.agreeability}): {agreeability_coeff}')
        print(f'=' * 150)
        ### DEBUG PRINTS   
        
        ### DEBUG
        counter = 0
        ### DEBUG

        # Begin loop to deselect and evaluate
        while len(new_feature_list_m1) > 1 and len(new_feature_list_m2) > 1:

            ### DEBUG
            counter += 1    
            ### DEBUG    

            # Obtain worst_feature, score and preds from deselect_feature functions
            worst_feature_m1, m1_score, m1_preds = self._deselect_feature(new_feature_list_m1, m1)
            worst_feature_m2, m2_score, m2_preds = self._deselect_feature(new_feature_list_m2, m2)
            # Update included feature lists
            new_feature_list_m1.remove(worst_feature_m1) 
            new_feature_list_m2.remove(worst_feature_m2)


            # Compute agreeability
            agreeability_coeff = self.agreeability_registry[self.agreeability](m1_preds, m2_preds)
            # Append to results
            results.append({
                'Model 1 Included Features': new_feature_list_m1.copy(),
                f'Model 1 {self.criterion.upper()}': m1_score,
                'Model 2 Included Features': new_feature_list_m2.copy(),
                f'Model 2 {self.criterion.upper()}': m2_score,
                f'Agreeability Coefficient ({self.agreeability})': agreeability_coeff
            })

            ### DEBUG PRINTS
            print(f'Iteration {counter}:')
            print(f'-' * 150)
            print(f'Model 1 included: {new_feature_list_m1}. {self.criterion.upper()}: {m1_score}')
            print(f'Model 2 included: {new_feature_list_m2}. {self.criterion.upper()}: {m2_score}')
            print(f'-' * 150)
            print(f'Agreeability Coefficient ({self.agreeability}): {agreeability_coeff}')
            print(f'=' * 150)
            ### DEBUG PRINTS
        # Save results
        self.results = results
        # Return results
        return results
    
    # Method to turn results into a df
    def dataframe_from_results(self):
        '''
        Return results as a dataframe.
        '''
        # Check if results exist
        if not self.results:
            raise ValueError("There are no results available. Make sure to run compare_models first.")
        # Return results
        return pd.DataFrame(self.results)
    
    # Method to turn results into an interactive plot
    def plot_from_results(self):
        '''
        Makes an interactive plot from the results.
        '''
        if not self.results:
            raise ValueError("There are no results available. Make sure to run compare_models first.")
        df = pd.DataFrame(self.results)

        # Create a new column that combines the relevant information for hovering with HTML line breaks
        df['Summary'] = df.apply(lambda row: f"{df.columns[0]}: <br>{', '.join(row.iloc[0])}<br>{df.columns[1]}: {row.iloc[1]}<br>{df.columns[2]}: <br>{', '.join(row.iloc[2])}<br>{df.columns[3]}: {row.iloc[3]}", axis=1)

        # Plot 
        fig = px.line(
            df, 
            x=df.index+1, 
            y=df.iloc[:, 4], 
            hover_data=['Summary'], 
            labels={'y': f'{df.columns[4]}', 'x': 'Iteration'},
            title='Agreeability Coefficients Over Algorithm Iterations',
            markers=True
        )

        fig.update_xaxes(type='category')
        fig.update_layout(hovermode='closest')

        fig.show()

### Check functionality on a regression task

In [9]:
from sklearn.model_selection import train_test_split

In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20640 entries, 0 to 20639
Data columns (total 9 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   MedInc       20640 non-null  float64
 1   HouseAge     20640 non-null  float64
 2   AveRooms     20640 non-null  float64
 3   AveBedrms    20640 non-null  float64
 4   Population   20640 non-null  float64
 5   AveOccup     20640 non-null  float64
 6   Latitude     20640 non-null  float64
 7   Longitude    20640 non-null  float64
 8   MedHouseVal  20640 non-null  float64
dtypes: float64(9)
memory usage: 1.4 MB


In [11]:
y = df['MedHouseVal']
X = df.drop('MedHouseVal', axis=1)


In [12]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=66)

In [13]:
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor

In [14]:
m1 = LinearRegression()
m2 = DecisionTreeRegressor()

In [15]:
seeker = BackEliminator(
    X=X_train,
    y=y_train,
    validation_data=(X_val, y_val),
    task_type='regression',
    criterion='rmse',
    agreeability='pearson'
)

In [16]:
results = seeker.compare_models(
    m1=m1,
    m2=m2
)

Initial run: fitted both models with full feature set.
------------------------------------------------------------------------------------------------------------------------------------------------------
Model 1 included: ['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude']. RMSE: 0.7315223258924328
Model 2 included: ['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude']. RMSE: 0.727676842497427
------------------------------------------------------------------------------------------------------------------------------------------------------
Agreeability Coefficient (pearson): 0.7615588209425082
Iteration 1:
------------------------------------------------------------------------------------------------------------------------------------------------------
Model 1 included: ['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'Latitude', 'Longitude']. RMSE: 0.7243210543253327
Model 2 

In [17]:
results

[{'Model 1 Included Features': ['MedInc',
   'HouseAge',
   'AveRooms',
   'AveBedrms',
   'Population',
   'AveOccup',
   'Latitude',
   'Longitude'],
  'Model 1 RMSE': 0.7315223258924328,
  'Model 2 Included Features': ['MedInc',
   'HouseAge',
   'AveRooms',
   'AveBedrms',
   'Population',
   'AveOccup',
   'Latitude',
   'Longitude'],
  'Model 2 RMSE': 0.727676842497427,
  'Agreeability Coefficient (pearson)': 0.7615588209425082},
 {'Model 1 Included Features': ['MedInc',
   'HouseAge',
   'AveRooms',
   'AveBedrms',
   'Population',
   'Latitude',
   'Longitude'],
  'Model 1 RMSE': 0.7243210543253327,
  'Model 2 Included Features': ['MedInc',
   'HouseAge',
   'AveRooms',
   'AveBedrms',
   'Population',
   'Latitude',
   'Longitude'],
  'Model 2 RMSE': 0.6797659162787423,
  'Agreeability Coefficient (pearson)': 0.7717165584399881},
 {'Model 1 Included Features': ['MedInc',
   'HouseAge',
   'AveRooms',
   'AveBedrms',
   'Latitude',
   'Longitude'],
  'Model 1 RMSE': 0.724320665

In [18]:
results_df = seeker.dataframe_from_results()

In [19]:
results_df

Unnamed: 0,Model 1 Included Features,Model 1 RMSE,Model 2 Included Features,Model 2 RMSE,Agreeability Coefficient (pearson)
0,"[MedInc, HouseAge, AveRooms, AveBedrms, Popula...",0.731522,"[MedInc, HouseAge, AveRooms, AveBedrms, Popula...",0.727677,0.761559
1,"[MedInc, HouseAge, AveRooms, AveBedrms, Popula...",0.724321,"[MedInc, HouseAge, AveRooms, AveBedrms, Popula...",0.679766,0.771717
2,"[MedInc, HouseAge, AveRooms, AveBedrms, Latitu...",0.724321,"[MedInc, AveRooms, AveBedrms, Population, Lati...",0.667478,0.783259
3,"[MedInc, HouseAge, AveBedrms, Latitude, Longit...",0.729908,"[MedInc, AveRooms, AveBedrms, Latitude, Longit...",0.646049,0.781554
4,"[MedInc, HouseAge, Latitude, Longitude]",0.734025,"[MedInc, AveRooms, Latitude, Longitude]",0.63506,0.779637
5,"[MedInc, Latitude, Longitude]",0.741958,"[AveRooms, Latitude, Longitude]",0.638131,0.724497
6,"[MedInc, Latitude]",0.830187,"[Latitude, Longitude]",0.632351,0.599097
7,[MedInc],0.839073,[Longitude],1.00587,0.326271


In [20]:
seeker.plot_from_results()

### Check functionality on a classification task

In [21]:
df_discrete.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20640 entries, 0 to 20639
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   MedInc      20640 non-null  float64
 1   HouseAge    20640 non-null  float64
 2   AveRooms    20640 non-null  float64
 3   AveBedrms   20640 non-null  float64
 4   Population  20640 non-null  float64
 5   AveOccup    20640 non-null  float64
 6   Latitude    20640 non-null  float64
 7   Longitude   20640 non-null  float64
 8   AboveMean   20640 non-null  int64  
dtypes: float64(8), int64(1)
memory usage: 1.4 MB


In [22]:
y = df_discrete['AboveMean']
X = df_discrete.drop('AboveMean', axis=1)


In [23]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=66)

In [24]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier

In [25]:
m1 = LogisticRegression(solver='liblinear')
m2 = DecisionTreeClassifier()

In [26]:
seeker_2 = BackEliminator(
    X=X_train,
    y=y_train,
    validation_data=(X_val, y_val),
    task_type='classification',
    criterion='f1',
    agreeability='cohen_kappa'
)

In [27]:
results_2 = seeker_2.compare_models(
    m1=m1,
    m2=m2
)

Initial run: fitted both models with full feature set.
------------------------------------------------------------------------------------------------------------------------------------------------------
Model 1 included: ['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude']. F1: 0.7787234042553192
Model 2 included: ['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude']. F1: 0.7909604519774012
------------------------------------------------------------------------------------------------------------------------------------------------------
Agreeability Coefficient (cohen_kappa): 0.6370880883296353
Iteration 1:
------------------------------------------------------------------------------------------------------------------------------------------------------
Model 1 included: ['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'AveOccup', 'Latitude', 'Longitude']. F1: 0.7815533980582524
Model 2 inc

In [28]:
results_2

[{'Model 1 Included Features': ['MedInc',
   'HouseAge',
   'AveRooms',
   'AveBedrms',
   'Population',
   'AveOccup',
   'Latitude',
   'Longitude'],
  'Model 1 F1': 0.7787234042553192,
  'Model 2 Included Features': ['MedInc',
   'HouseAge',
   'AveRooms',
   'AveBedrms',
   'Population',
   'AveOccup',
   'Latitude',
   'Longitude'],
  'Model 2 F1': 0.7909604519774012,
  'Agreeability Coefficient (cohen_kappa)': 0.6370880883296353},
 {'Model 1 Included Features': ['MedInc',
   'HouseAge',
   'AveRooms',
   'AveBedrms',
   'AveOccup',
   'Latitude',
   'Longitude'],
  'Model 1 F1': 0.7815533980582524,
  'Model 2 Included Features': ['HouseAge',
   'AveRooms',
   'AveBedrms',
   'Population',
   'AveOccup',
   'Latitude',
   'Longitude'],
  'Model 2 F1': 0.8131736526946108,
  'Agreeability Coefficient (cohen_kappa)': 0.5760027186873634},
 {'Model 1 Included Features': ['MedInc',
   'HouseAge',
   'AveRooms',
   'AveOccup',
   'Latitude',
   'Longitude'],
  'Model 1 F1': 0.78079710144

In [29]:
results_df_2 = seeker_2.dataframe_from_results()

In [30]:
results_df_2

Unnamed: 0,Model 1 Included Features,Model 1 F1,Model 2 Included Features,Model 2 F1,Agreeability Coefficient (cohen_kappa)
0,"[MedInc, HouseAge, AveRooms, AveBedrms, Popula...",0.778723,"[MedInc, HouseAge, AveRooms, AveBedrms, Popula...",0.79096,0.637088
1,"[MedInc, HouseAge, AveRooms, AveBedrms, AveOcc...",0.781553,"[HouseAge, AveRooms, AveBedrms, Population, Av...",0.813174,0.576003
2,"[MedInc, HouseAge, AveRooms, AveOccup, Latitud...",0.780797,"[HouseAge, AveRooms, Population, AveOccup, Lat...",0.82413,0.581393
3,"[MedInc, HouseAge, AveOccup, Latitude, Longitude]",0.776399,"[HouseAge, AveRooms, Population, Latitude, Lon...",0.828417,0.574024
4,"[MedInc, AveOccup, Latitude, Longitude]",0.767023,"[HouseAge, AveRooms, Latitude, Longitude]",0.830279,0.561308
5,"[MedInc, AveOccup, Latitude]",0.745134,"[HouseAge, Latitude, Longitude]",0.827295,0.490481
6,"[MedInc, AveOccup]",0.738155,"[Latitude, Longitude]",0.841493,0.514196
7,[MedInc],0.691214,[Longitude],0.615832,0.220298


In [31]:
seeker_2.plot_from_results()