In [None]:
import datarobot as dr
from datarobot import Project, Deployment
import pandas as pd
from pandas.io.json import json_normalize
import numpy as np

import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import matplotlib.ticker as mtick
from matplotlib.ticker import FormatStrFormatter

import datetime as dt
from datetime import datetime
import dateutil.parser
import os
import re 
from importlib import reload
import random
import math
from sklearn.metrics import mean_squared_error


%matplotlib inline
%config InlineBackend.figure_format = 'retina'

# Set Pandas configuration to show all columns
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', -1)

import warnings
warnings.filterwarnings('ignore')

In [None]:
dr.Client(config_path='../drconfig.yaml');

In [None]:
df = pd.read_csv(""" Enter Code """)

df.head(5)

## Months

In [None]:
MONTHS = '' 

def months(df):
    global MONTHS
    MIN_DATE = df['Date'].min()
    MAX_DATE = df['Date'].max()
    MONTHS = str(int((MAX_DATE - MIN_DATE).days / 30))
     
    print('Min Date: ', MIN_DATE)
    print('Max Date: ', MAX_DATE)
    print('Months:   ', MONTHS)

In [None]:
months(df)

## Create TS Settings

In [None]:
DATE      = """ Enter Code """
TARGET    = """ Enter Code """

PROJECT_NAME = 'Lab_4'

VERSION = '1'
MODE    = 'Q'

FDWS = """ Enter Code """ 

FDS  = """ Enter Code """ 

BASE   = 'L4_1_V:'
PREFIX = BASE + VERSION + '_Mnths:' + str(MONTHS) + '_Mode:' + MODE
DATASET_FILENAME = 'Months_' + str(MONTHS)
MAX_WAIT = 14400
READ_TIMEOUT = 14400

HOLDOUT_START_DATE  = None 
VALIDATION_DURATION = None 
HOLDOUT_DURATION    = None 
NUMBER_BACKTESTS    = None
GAP_DURATION        = None 

FEATURE_SETTINGS = []

CAL_ID = None

print(FEATURE_SETTINGS)
print(CAL_ID)


## Create Project

In [None]:
def create_dr_project(df, project_name, fw_start=None, fw_end=None, fdw_start=None, fdw_end=None, dataset_filename=DATASET_FILENAME):
    ###############################
    # Create Datetime Specification
    ###############################
    # SERIES_COL = [SERIES]
    time_partition = dr.DatetimePartitioningSpecification(
        datetime_partition_column = DATE,
        forecast_window_start     = fw_start, 
        forecast_window_end       = fw_end,
        feature_derivation_window_start = fdw_start,
        feature_derivation_window_end   = fdw_end,
        holdout_start_date        = HOLDOUT_START_DATE ,
        validation_duration       = VALIDATION_DURATION,  
        holdout_duration          = HOLDOUT_DURATION,
        number_of_backtests       = NUMBER_BACKTESTS, 
        feature_settings          = FEATURE_SETTINGS,
        use_time_series           = True
      )
     

    ################
    # Create Project
    ################
    project = dr.Project.create(
        project_name = project_name, 
        sourcedata   = df, 
        max_wait     = MAX_WAIT, 
        read_timeout = READ_TIMEOUT,
        dataset_filename = DATASET_FILENAME
    )
    print("Post-Project MB: ", (df.memory_usage(index=True).sum()/1024/1024).round(2))
    print("Post-Project Records: {:,}".format(len(df)))
    print(f'Project {project_name} Created...')
    print(" ")

    #################
    # Start Autopilot
    #################
    project.set_target(
        target = TARGET,   
        metric = None,      
        mode   = dr.AUTOPILOT_MODE.QUICK , # dr.AUTOPILOT_MODE.FULL_AUTO,
        #advanced_options = opts,
        worker_count = -1,
        partitioning_method = time_partition,
        max_wait = MAX_WAIT
    )
    return project


## Model Factory

In [None]:
projects = []  # Keep List of all project

In [None]:
def model_factory(df, FDWS, FDS):
    PREFIX = BASE + str(VERSION) + '_Mnths:' + MONTHS + '_Mode:' + MODE
    DATASET_FILENAME = 'Months_' + MONTHS
    result = len(FDWS) * len(FDS)
    proj_num = 1
    print(f"Kicking off {result} projects!")
    print(" ")
    
    
    for fdw in FDWS:
        for fd in FDS:
            fd_start  = fd[0] 
            fd_end    = fd[1]
            fdw_start = fdw[0]
            fdw_end   = fdw[1]
            
            print(f"Project {proj_num}")

            # Name project
            project_name = f"{PREFIX}_FDW:{fdw_start}-{fdw_end}_FD:{fd_start}-{fd_end}"  
            print(project_name)
            print(" ")

            data = df.copy() 

            # Create project
            project = create_dr_project(data, project_name, 
                                        fw_start=fd_start, fw_end=fd_end, 
                                        fdw_start=fdw_start, fdw_end=fdw_end,
                                        dataset_filename=DATASET_FILENAME)

            projects.append(project) 
            proj_num = proj_num + 1

## Default Model

In [None]:
model_factory(df, FDWS, FDS)

## Iterate over various FDWs

In [None]:
VERSION = 2

In [None]:
FDWS = """ Enter Code """  

FDS  = """ Enter Code """

In [None]:
model_factory(df, FDWS, FDS)

## Get Project Names in a List

In [None]:
projects = dr.Project.list(search_params={'project_name': BASE}) 
projects

## Get Project Names and PIDs in a List

In [None]:
lst = []

for p in projects:
    r = ((p, p.id))
    lst.append(r)
lst

## Unlock Holdouts

In [None]:
for i in lst:
    project = Project.get(i[1])
    project.unlock_holdout()

## Compute Backtests for Blenders

In [None]:
for i in lst:
    project = Project.get(i[1])
    lb = project.get_datetime_models()
    for model in lb:
        
        if 'Blender' in model.model_type:
            try:
                print(project.project_name, model)  # , model.id
                dr.DatetimeModel.score_backtests(model) 
                print(f'Computing backtests for model {model.id} in Project {project.project_name}')
            except dr.errors.ClientError:
                pass
            print(f'All available backtests have been submitted for scoring for project {project.project_name}')
            print(' ')
        else:
            None 

## Compute All Backtests for Top Models in Backtest 1 and Holdout groups

In [None]:
OPTIMIZE_GROUP = ['validation', 'holdout']

In [None]:
PROJECT_METRIC = project.metric
METRICS = list(set([PROJECT_METRIC, 'MASE', 'RMSE']))

In [None]:
for p in lst :
    for met in METRICS:
        for o in OPTIMIZE_GROUP:
            project = Project.get(p[1])
            lb = project.get_datetime_models()

            best_models = sorted(
                                [model for model in lb if model.metrics[met][o]],  
                                key=lambda m: m.metrics[met][o],
                                )[0:3]
            
            for mod in best_models:

                if mod.metrics[met]["backtesting"] == None:
                    try:
                        print(project.project_name, mod)  
                        dr.DatetimeModel.score_backtests(mod) 
                        print(f'Computing backtests for model {mod.model_type} in Project {project.project_name}')
                    except dr.errors.ClientError:
                        pass
                    print(f'All available backtests have been submitted for scoring for project {project.project_name}')
                    print(' ')
                else:
                    print(project.project_name)
                    print(f'{mod.model_type} All Backtests Already Computed')
                    print(' ')

## Get Project and Model Scores

In [None]:
# Enter correct partition period
OPTIMIZATION_PERIOD = """ Enter Code """  # BackTest 1: validation  All Backtest: backtesting  Holdout: holdout 

In [None]:
models = []
scores = pd.DataFrame()


for p in lst:
    project = Project.get(p[1])
    lb = project.get_datetime_models()
    best_model = sorted(
                        [model for model in lb if model.metrics[project.metric][OPTIMIZATION_PERIOD]],  
                        key=lambda m: m.metrics[project.metric][OPTIMIZATION_PERIOD],
                        )[0]

    backtest_scores = pd.DataFrame(
        [
            {
                'Project_Name': project.project_name,
                'Project_ID': project.id,
                'Model_ID': best_model.id,
                'Model_Type': best_model.model_type,
                'Featurelist': best_model.featurelist_name,
                'Optimization_Metric': project.metric,
                'Scores': best_model.metrics,
            }
        ]
    )
    scores = scores.append(backtest_scores, sort=False).reset_index(drop=True)  


print(f'Scores for all {len(projects)} projects have been computed')
print('')

scores = scores.join(json_normalize(scores["Scores"].tolist())).drop(labels=['Scores'], axis=1) 

# Drop Empty Columns
scores = scores[scores.columns.drop(list(scores.filter(regex='crossValidation$')))]

# Rename Columns
scores.columns = scores.columns.str.replace(".backtesting", "_All_BT")
scores.columns = scores.columns.str.replace(".holdout", "_Holdout")
scores.columns = scores.columns.str.replace(".validation", "_BT_1")
scores.columns = scores.columns.str.replace(' ', '_')

scores = scores[scores.columns.drop(list(scores.filter(regex='_All_BTScores$')))]

scores.head(2)

In [None]:
METRICS = scores.filter(regex='MASE|RMSE').columns.to_list()
PROJECT = ['Project_Name', 'Project_ID', 'Model_ID', 'Model_Type', 'Featurelist']
COLS = PROJECT + METRICS

In [None]:
scores[COLS]

In [None]:
scores['FDW_Start'] = scores['Project_Name'].str.extract(r'FDW:(-\d{1,2})')
scores['FDW_End']   = scores['Project_Name'].str.extract(r'FDW:-\d{1,2}-(\d{1,2})_')
scores['FD_Start']  = scores['Project_Name'].str.extract(r'FD:(\d{1,2})')
scores['FD_End']    = scores['Project_Name'].str.extract(r'FD:\d{1,2}-(\d{1,2})')
scores['Months']    = scores['Project_Name'].str.extract(r'_Mnths:(\d{1,2})_')

scores.rename(columns={'All_Backtests_Poisson Deviance':'All_Backtests_Poisson_Deviance', 
                       'Backtest_1_Poisson Deviance':'Backtest_1_Poisson_Deviance',
                       'Holdout_Poisson Deviance':'Holdout_Poisson_Deviance',
                       'Holdout_Tweedie Deviance':'Holdout_Tweedie_Deviance',
                       'All_Backtests_Tweedie Deviance':'All_Backtests_Tweedie_Deviance',
                       'Backtest_1_Tweedie Deviance':'Backtest_1_Tweedie_Deviance',
                       'Holdout_Tweedie Deviance':'Holdout_Tweedie_Deviance'}, inplace=True)


META = ['FDW_Start', 'FDW_End', 'FD_Start', 'FD_End', 'Months']
MORE = PROJECT + META + METRICS 
  
scores[MORE].sort_values(by=['MASE_All_BT'], ascending=True)

In [None]:
scores[MORE].sort_values(by=['MASE_All_BT'], ascending=True).round(4)

## Get Best Model

In [None]:
# Enter column with correct scoring metric and partition
hrmse = scores.loc[scores[""" Enter Code """].notnull()]

# Take the Single Best model
hrmse_best = pd.DataFrame(hrmse.loc[hrmse.MASE_All_BT.idxmin()]).transpose()

# Take the Best model by Project Name
# hrmse_best = hrmse.loc[hrmse.groupby('Project_Name').MASE_All_BT.idxmin()]

best_models = pd.DataFrame(hrmse_best) 
best_models

In [None]:
best_models

# Feature Lists

## Pull Feature Impact from Top Model

In [None]:
RECORD = """ Enter Code """

In [None]:
# Verify correct model
PID = best_models['Project_ID'].values[RECORD]
MID = best_models['Model_ID'].values[RECORD]

project = dr.Project.get(PID)
model   = dr.Model.get(PID, MID)
print(project)
print(model)

In [None]:
PID = best_models['Project_ID'].values[RECORD]
MID = best_models['Model_ID'].values[RECORD]

project = dr.Project.get(PID)
print(project)
print(" ")

model   = dr.Model.get(PID, MID)
print(model)
print(" ")

feature_impacts = model.get_or_request_feature_impact()

In [None]:
dr_dark_blue = '#08233F'
dr_blue      = '#1F77B4'
dr_orange    = '#FF7F0E'
dr_red       = '#BE3C28'

In [None]:
percent_tick_fmt = mtick.PercentFormatter(xmax=1.0)

impact_df = pd.DataFrame(feature_impacts)
impact_df.sort_values(by='impactNormalized', ascending=True, inplace=True)

# Positive values are blue, negative are red
bar_colors = impact_df.impactNormalized.apply(lambda x: dr_red if x < 0
                                              else dr_blue)

ax = impact_df.plot.barh(x='featureName', y='impactNormalized',
                         legend=False,
                         color=bar_colors,
                         figsize=(12, 14))
ax.xaxis.set_major_formatter(percent_tick_fmt)
ax.xaxis.set_tick_params(labeltop=True)
ax.xaxis.grid(True, alpha=0.2)
ax.set_facecolor(dr_dark_blue)

plt.ylabel('')
plt.xlabel('Normalized Impact')
plt.xlim((None, 1))  # Allow for negative impact
plt.title('Feature Impact', y=1.04);

## Histogram

In [None]:
def matplotlib_pair_histogram(labels, counts, target_avgs,
                              bin_count, ax1, feature):
    
#     ax.yaxis.set_major_formatter(FormatStrFormatter('%.2f'))
#     ax.xaxis.set_major_formatter(FormatStrFormatter('%.2f'))
    
    # Rotate categorical labels
    if feature.feature_type in ['Categorical', 'Text', 'Numeric']:
        ax1.tick_params(axis='x', rotation=45)
        
    ax1.set_ylabel(feature.name, color=dr_blue)
    ax1.bar(labels, counts, color=dr_blue)
    ax1.set_xticklabels([str(round(float(label), 2)) for label in labels])
    
    # Instantiate a second axes that shares the same x-axis
    ax2 = ax1.twinx()
    ax2.set_ylabel(TARGET, color=dr_orange)
    ax2.plot(labels, target_avgs, marker='o', lw=1, color=dr_orange)
    ax1.set_facecolor(dr_dark_blue)
    title = 'Histogram for {} ({} bins)'.format(feature.name, bin_count)
    ax1.set_title(title)
    
def draw_feature_histogram(feature_name, bin_count):
    feature = dr.Feature.get(project.id, feature_name)
    # Retrieve downsampled histogram data
    # Based on desired bin count
    data = feature.get_histogram(bin_count).plot
    
    data = pd.DataFrame(data, columns=['label', 'count', 'target'])
    data['label'] = data['label'].astype(float).astype(int).astype(str)
    data = data.to_dict(orient='records')
    
    labels = [row['label'] for row in data]
    counts = [row['count'] for row in data]
    target_averages = [row['target'] for row in data]
    f, axarr = plt.subplots()
    f.set_size_inches((10, 4))
    matplotlib_pair_histogram(labels, counts, target_averages,
                              bin_count, axarr, feature)

## List Features

In [None]:
project.get_features()

## Feature Histogram

In [None]:
draw_feature_histogram('retail_sales', 10)

# Reduced Feature Lists
<font color=lightblue>
Three Methods to Choose from:</font> <br>
&nbsp;&nbsp; &nbsp; &nbsp; 1. Percent of Top Features <br> 
&nbsp;&nbsp; &nbsp; &nbsp; 2. Number of Top Features <br>
&nbsp;&nbsp; &nbsp; &nbsp; 3. Manually Specifying Features <br>




### Create DataFrame to store Feature List Names and IDs

In [None]:
feature_lists_df = pd.DataFrame()

## Top X Percent of Features

In [None]:
# Enter Threshold Percentage
THRESHOLD = """ Enter Code """

In [None]:
print(f'Collecting Feature Impact for M{model.model_number} in project "{project.project_name}"')
	
impact = pd.DataFrame.from_records(model.get_or_request_feature_impact())
impact['cumulative_impact'] = impact['impactUnnormalized'].cumsum() / impact['impactUnnormalized'].sum()

to_keep = np.where(impact['cumulative_impact'] <= THRESHOLD)[0]
if len(to_keep) < 1:
    print('Applying this threshold would result in a featurelist with no features')


idx = np.max(to_keep)

selected_features = impact.loc[0:idx, 'featureName'].to_list()
feature_list = project.create_modeling_featurelist(f'Top {len(selected_features)} features M{model.model_number}', 
                                                   selected_features)


cust_feat_list = pd.DataFrame(
    [
        {
            'Name': feature_list.name,
            'ID': feature_list.id
        }
    ]
)
feature_lists_df = feature_lists_df.append(cust_feat_list, sort=False).reset_index(drop=True) 

feature_lists_df

In [None]:
THRESHOLD = """ Enter Code """

In [None]:
print(f'Collecting Feature Impact for M{model.model_number} in project "{project.project_name}"')
	
impact = pd.DataFrame.from_records(model.get_or_request_feature_impact())
impact['cumulative_impact'] = impact['impactUnnormalized'].cumsum() / impact['impactUnnormalized'].sum()

to_keep = np.where(impact['cumulative_impact'] <= THRESHOLD)[0]
if len(to_keep) < 1:
    print('Applying this threshold would result in a featurelist with no features')


idx = np.max(to_keep)

selected_features = impact.loc[0:idx, 'featureName'].to_list()
feature_list = project.create_modeling_featurelist(f'Top {len(selected_features)} features M{model.model_number}', 
                                                   selected_features)


cust_feat_list = pd.DataFrame(
    [
        {
            'Name': feature_list.name,
            'ID': feature_list.id
        }
    ]
)
feature_lists_df = feature_lists_df.append(cust_feat_list, sort=False).reset_index(drop=True) 

feature_lists_df

## Top X Features

In [None]:
# Enter Number of features to include
MAX_FEATURES = """ Enter Code """

In [None]:
print(f'Collecting Feature Impact for M{model.model_number} in project "{project.project_name}"')

impact = model.get_or_request_feature_impact()

impact.sort(key=lambda x: x['impactNormalized'], reverse=True)
feature_list_items = [f['featureName'] for f in impact[:MAX_FEATURES]]

feature_list = project.create_modeling_featurelist(f'Top {MAX_FEATURES} features M{model.model_number}', 
                                                   feature_list_items)


cust_feat_list = pd.DataFrame(
    [
        {
            'Name': feature_list.name,
            'ID': feature_list.id
        }
    ]
)

feature_lists_df = feature_lists_df.append(cust_feat_list, sort=False).reset_index(drop=True) 

feature_lists_df

## Manually Select Features

In [None]:
project.get_modeling_features()

## List Feature By Normalized Impact

In [None]:
feature_impacts = model.get_or_request_feature_impact()

feature_impacts.sort(key=lambda x: x['impactNormalized'], reverse=True)
feature_impacts

### Custom Feature List 1

In [None]:
# Select features from list above
FEATURES = [""" Enter Code """]

print(len(FEATURES))

In [None]:
feature_list = project.create_modeling_featurelist(f'Manual Feature Selection {len(FEATURES)}', FEATURES)


cust_feat_list = pd.DataFrame(
    [
        {
            'Name': feature_list.name,
            'ID': feature_list.id
        }
    ]
)

feature_lists_df = feature_lists_df.append(cust_feat_list, sort=False).reset_index(drop=True) 

feature_lists_df

### Custom Feature List 2

In [None]:
FEATURES = [""" Enter Code """]

print(len(FEATURES))

In [None]:
feature_list = project.create_modeling_featurelist(f'Manual Feature Selection {len(FEATURES)}', FEATURES)

cust_feat_list = pd.DataFrame(
    [
        {
            'Name': feature_list.name,
            'ID': feature_list.id
        }
    ]
)

feature_lists_df = feature_lists_df.append(cust_feat_list, sort=False).reset_index(drop=True) 

feature_lists_df

## List all Feature Lists

In [None]:
project.get_modeling_featurelists()

## Run Blueprints on new Feature Lists

In [None]:
# feature_lists_df

### Get Models/Blueprints to run new Feature Lists on

In [None]:
scores_lb = pd.DataFrame()


lb = project.get_datetime_models()
best_models = sorted(
                    [model for model in lb if model.metrics[project.metric][OPTIMIZATION_PERIOD]],  
                    key=lambda m: m.metrics[project.metric][OPTIMIZATION_PERIOD],
                    )

for m in best_models:
    backtest_scores = pd.DataFrame(
        [
            {
#                 'Project_Name': project.project_name,
                'Project_ID': project.id,
                'Model_ID': m.id,
                'Model_Type': m.model_type,
                'Featurelist': m.featurelist_name,
                'Optimization_Metric': project.metric,
                'Scores': m.metrics,
            }
        ]
    )
    scores_lb = scores_lb.append(backtest_scores, sort=False).reset_index(drop=True)  

scores_lb = scores_lb.join(json_normalize(scores_lb["Scores"].tolist())).drop(labels=['Scores'], axis=1) 

# Drop Empty Columns
scores_lb = scores_lb[scores_lb.columns.drop(list(scores.filter(regex='crossValidation$')))]

# Rename Columns
scores_lb.columns = scores_lb.columns.str.replace(".backtesting", "_All_BT")
scores_lb.columns = scores_lb.columns.str.replace(".holdout", "_Holdout")
scores_lb.columns = scores_lb.columns.str.replace(".validation", "_BT_1")
scores_lb.columns = scores_lb.columns.str.replace(' ', '_')

scores_lb = scores_lb[scores_lb.columns.drop(list(scores_lb.filter(regex='_All_BTScores$')))]

scores_lb.sort_values(by=[""" Enter Code """], ascending=True)

In [None]:
# Enter Model ID's to run
MODEL_LIST = [""" Enter Code """, """ Enter Code """]

## Run New Feature Lists against selected Blueprints

In [None]:
DURATION = dr.helpers.partitioning_methods.construct_duration_string(years=17, months=5, days=1)

In [None]:
PID = scores_lb['Project_ID'][0]

for m in MODEL_LIST :
    model = dr.Model.get(PID, m)
    
    for fl in feature_lists_df.values:
        fl_id = fl[1] 
        try:
            model.train_datetime(featurelist_id = fl_id, 
                                 training_duration = DURATION)
            print(f"Running Feature List {fl[0]} on Model {model.model_type}")
        except dr.errors.ClientError:
            pass
        print(f"Feature List {fl[0]} already run on Model {model.model_type}")
        print(' ')

## Compute Backtests for Blueprints run on new Feature Lists

In [None]:
OPTIMIZE_GROUP = ['validation', 'holdout']

In [None]:
PROJECT_METRIC = project.metric
METRICS = list(set([PROJECT_METRIC, 'MASE', 'RMSE']))

In [None]:
for met in METRICS:
    for o in OPTIMIZE_GROUP:
        project = Project.get(PID)
        lb = project.get_datetime_models()

        best_models = sorted(
                            [model for model in lb if model.metrics[met][o]],  
                            key=lambda m: m.metrics[met][o],
                            )[0:5]

        for mod in best_models:

            if mod.metrics[met]["backtesting"] == None:
                try:
                    print(project.project_name, mod)  
                    dr.DatetimeModel.score_backtests(mod) 
                    print(f'Computing backtests for model {mod.model_type} in Project {project.project_name}')
                except dr.errors.ClientError:
                    pass
                print(f'All available backtests have been submitted for scoring for project {project.project_name}')
                print(' ')
            else:
                print(project.project_name)
                print(f'{mod.model_type} All Backtests Already Computed')
                print(' ')

## Get Project and Model Scores

In [None]:
# Enter correct optimization period
OPTIMIZATION_PERIOD = """ Enter Code """  # BackTest 1: validation  All Backtest: backtesting  Holdout: holdout 

In [None]:
models = []
scores = pd.DataFrame()


project = Project.get(PID)
lb = project.get_datetime_models()
best_model = sorted(
                    [model for model in lb if model.metrics[project.metric][OPTIMIZATION_PERIOD]],  
                    key=lambda m: m.metrics[project.metric][OPTIMIZATION_PERIOD],
                    )[:]

for m in best_model:
    backtest_scores = pd.DataFrame(
        [
            {
                'Project_Name': project.project_name,
                'Project_ID': project.id,
                'Model_ID': m.id,
                'Model_Type': m.model_type,
                'Featurelist': m.featurelist_name,
                'Optimization_Metric': project.metric,
                'Scores': m.metrics,
            }
        ]
    )
    scores = scores.append(backtest_scores, sort=False).reset_index(drop=True)  


print(f'Scores for all {len(projects)} projects have been computed')
print('')

scores = scores.join(json_normalize(scores["Scores"].tolist())).drop(labels=['Scores'], axis=1) 

# Drop Empty Columns
scores = scores[scores.columns.drop(list(scores.filter(regex='crossValidation$')))]

# Rename Columns
scores.columns = scores.columns.str.replace(".backtesting", "_All_BT")
scores.columns = scores.columns.str.replace(".holdout", "_Holdout")
scores.columns = scores.columns.str.replace(".validation", "_BT_1")
scores.columns = scores.columns.str.replace(' ', '_')

scores = scores[scores.columns.drop(list(scores.filter(regex='_All_BTScores$')))]

scores.sort_values(by=['MASE_All_BT'])

In [None]:
scores.sort_values(by=['MASE_All_BT'])

## Change Numeric to Categorical

In [None]:
# project.create_type_transform_feature(
#         "retail_sales(Cat)",  # new feature name
#         "retail_sales",       # parent name
#         dr.enums.VARIABLE_TYPE_TRANSFORM.CATEGORICAL_INT
# )

## Change Categorical to Text

In [None]:
# project.create_type_transform_feature(
#     "addr_state(Text)",  # new feature name
#     "addr_state",        # parent name
#     dr.enums.VARIABLE_TYPE_TRANSFORM.TEXT
# )