# Abid - Phase 2
#### 12/9/2022

#### Relevant Libraries

In [1]:
# !pip install scikit-learn

In [7]:
#### Add Relevant Libraries
#################################################
from datetime import datetime, time, date, timedelta
import pandas as pd
import numpy as np
import sys
import os
CWD = os.getcwd()
WD = CWD.split('Main')[0] + 'Main/'
DFNC = WD + 'Functions'
sys.path.insert(1, DFNC)

#### Add External Functions
#################################################
from Scripts.FN_Support import Drct, Periods
from Scripts.CREATE_MODEL_CLASSIFICATION import create_model 
from Scripts.RUN_PRODUCTION_CLASSIFICATION import run_production_predict 

### Updated Patterns
- Below is an updated summery of patterns
- Several of these patterns are new from what I sent you last time
- All of them have a different column naming structure (more on that below...)

In [8]:
### Set the AGG Period Type
#######################################
AGG='5T'

### Load Pattern Summeries for ALL_CODES
#######################################
SUMMERY_FILE = f'{WD}Sources/aggs/{AGG}/Results/Pattern_Summeries/All.csv'
summeries = Drct.csv_from(SUMMERY_FILE)
ALL_CODES = list(summeries["pcode"].unique())
print(summeries[['pcode','filled']])


   pcode  filled
0   3BPD   17777
1   3BPU   18429
2   BFLD   13953
3   BFLU   14734
4    BOD   42491
5    BOU   73240
6   BTRD    2965
7   BTRU    2990
8    CCB   26940
9    CCS   26629
10    DB   69421
11   DBB   22744
12    DT   28497
13   DTS   11382
14   PBB     880
15   PBS     882
16  SBRT   16388
17   SSD   87128
18  SSRT    6777
19   SSU   86691
20   TAG   14475
21   TAR   14117


### Load Lookback Periods
- This LOOKBACK_DF is what I use to re-test the previous 12-month period every 1-month
  1. So every month I look at the previous 12-months starting on the last day of the previous month
  2. I want to create new models from the previous 12-month period
  3. I will backtest/validate the models by running the current month data through the model

In [9]:
### Load Lookback Periods
#######################################
LOOKBACK_DF = Periods.lookback_df(LB_MONTHS=12,NEXT=False,MIN_DATE=None,MAX=None,LB_MIN_DATE=None)
LOOKBACK_DF = LOOKBACK_DF[['month_id','lookback_start','lookback_end']][(LOOKBACK_DF["month_id"] <= '2022_11')].head(12).reset_index(drop=True)
print(LOOKBACK_DF)

   month_id lookback_start lookback_end
0   2022_11     2021-11-01   2022-10-31
1   2022_10     2021-10-01   2022-09-30
2   2022_09     2021-09-01   2022-08-31
3   2022_08     2021-08-01   2022-07-31
4   2022_07     2021-07-01   2022-06-30
5   2022_06     2021-06-01   2022-05-31
6   2022_05     2021-05-01   2022-04-30
7   2022_04     2021-04-01   2022-03-31
8   2022_03     2021-03-01   2022-02-28
9   2022_02     2021-02-01   2022-01-31
10  2022_01     2021-01-01   2021-12-31
11  2021_12     2020-12-01   2021-11-30


### Loop through Lookback Periods
- Below is an example of what I would like to be able to do in creating & testing models for each lookback period


In [10]:
def treat_timestamp(ID):
    stamp = ID.split('_')[1]
    format_date  = {'year': int(stamp[0:4]),
                    'month': int(stamp[4:6]),
                    'date': int(stamp[6:8]),
                    'hour': int(stamp[8:10]),
                    'minutes': int(stamp[10:12])}
    return format_date

In [11]:
lst = []
for index, row in LOOKBACK_DF.iterrows():
    
    MONTH_ID = row["month_id"]
    lookback_start = pd.to_datetime(row["lookback_start"]).strftime('%Y-%m-%d')
    lookback_end = pd.to_datetime(row["lookback_end"]).strftime('%Y-%m-%d')
    LAST_MO_ID = pd.to_datetime(row["lookback_end"]).strftime('%Y_%m')
    print(f"\n{'='*100}\nRun for {MONTH_ID} \t Lookback Range {lookback_start} to {lookback_end}\n{'='*100}\n")
    
    for PCODE in ALL_CODES:        
       ### Load Production Data (For Backtesting)
        #######################################
        model_production_data = Drct.load_pcode(AGG=AGG,PCODE=PCODE,PP=True,LAST_MO_ID=MONTH_ID,MONTHS_BACK=1,PRINT=True)
        model_production_data['_ftN_year'] = model_production_data['_UTILITY_puid'].apply(lambda x: treat_timestamp(x)['year'])
        model_production_data['_ftN_month'] = model_production_data['_UTILITY_puid'].apply(lambda x: treat_timestamp(x)['month'])
        model_production_data['_ftN_date'] = model_production_data['_UTILITY_puid'].apply(lambda x: treat_timestamp(x)['date'])
        model_production_data['_ftN_hour'] = model_production_data['_UTILITY_puid'].apply(lambda x: treat_timestamp(x)['hour'])
        model_production_data['_ftN_minutes'] = model_production_data['_UTILITY_puid'].apply(lambda x: treat_timestamp(x)['minutes'])        
        model_production_data['_TARGET'] = model_production_data['_TARGET'].apply(lambda x: 'WIN' if x > 1 else 'LOSS')
        
        ### Model Creation Data
        #######################################
        model_creation_data = Drct.load_pcode(AGG=AGG,PCODE=PCODE,PP=True,LAST_MO_ID=LAST_MO_ID,MONTHS_BACK=12,PRINT=True)
        model_creation_data['_ftN_year'] = model_creation_data['_UTILITY_puid'].apply(lambda x: treat_timestamp(x)['year'])
        model_creation_data['_ftN_month'] = model_creation_data['_UTILITY_puid'].apply(lambda x: treat_timestamp(x)['month'])
        model_creation_data['_ftN_date'] = model_creation_data['_UTILITY_puid'].apply(lambda x: treat_timestamp(x)['date'])
        model_creation_data['_ftN_hour'] = model_creation_data['_UTILITY_puid'].apply(lambda x: treat_timestamp(x)['hour'])
        model_creation_data['_ftN_minutes'] = model_creation_data['_UTILITY_puid'].apply(lambda x: treat_timestamp(x)['minutes'])

        model_creation_data['_TARGET'] = model_creation_data['_TARGET'].apply(lambda x: 'WIN' if x > 1 else 'LOSS')        
                
        ### Define Column Types
        #######################################
        cols = model_creation_data.columns.tolist()
        basic_is = lambda x: True in [ i in x for i in ['_ftN_',]]
        category_is = lambda x: True in [ i in x for i in ['_ftC_',]]
        utility_is = lambda x: True in [ i in x for i in ['_UTILITY_',]]
        cols_attributes_basic = [ i for i in cols if basic_is(i) ]
        cols_attributes_category = [ i for i in cols if category_is(i) ]
        cols_utility = [ i for i in cols if utility_is(i) ]
        column_target = '_TARGET'
        
        columns_dist = {'cols_attributes_basic': cols_attributes_basic,
                'cols_attributes_category': cols_attributes_category,
                'column_target': column_target}
        
        
        ## RUN :: Model Creation
        ######################################
#         create_model(model_creation_data,AGG,PCODE,MONTH_ID, columns_dist)
        
        ### RUN :: Model Production Predict
        #######################################
        model_production_data, stats = run_production_predict(model_production_data,AGG,PCODE,MONTH_ID, columns_dist)
        stats['n_training'] = model_creation_data.shape[0]
        stats['WIN:LOSS'] = f"{model_creation_data._TARGET.value_counts()['WIN']}:{model_creation_data._TARGET.value_counts()['LOSS']}"
        lst.append(stats)



Run for 2022_11 	 Lookback Range 2021-11-01 to 2022-10-31

3BPD 	 Pre-Processed 		 1 Months 	 2022-11-01 to 2022-11-30 	 Len: 673 	 Cols: 212
3BPD 	 Pre-Processed 		 12 Months 	 2021-11-01 to 2022-10-31 	 Len: 6,066 	 Cols: 212

	------------------------------------------------------------
	RUN PRODUCTION PREDICT FOR '3BPD' | 2022_11
	------------------------------------------------------------
	1. Load Saved Model Files created from 'model_creation_data'
	2. Create Prediction Column in 'model_production_data'
	3. Create Stats KPI CSV File with 'model_production_data' results
	4. Save the updated 'model_production_data' and 'stats' to CSV
	5. Return the updated 'model_production_data' and 'stats' from function
	------------------------------------------------------------

3BPU 	 Pre-Processed 		 1 Months 	 2022-11-01 to 2022-11-30 	 Len: 663 	 Cols: 212
3BPU 	 Pre-Processed 		 12 Months 	 2021-11-01 to 2022-10-31 	 Len: 6,073 	 Cols: 212

	---------------------------------------------

DBB 	 Pre-Processed 		 12 Months 	 2021-11-01 to 2022-10-31 	 Len: 8,310 	 Cols: 206

	------------------------------------------------------------
	RUN PRODUCTION PREDICT FOR 'DBB' | 2022_11
	------------------------------------------------------------
	1. Load Saved Model Files created from 'model_creation_data'
	2. Create Prediction Column in 'model_production_data'
	3. Create Stats KPI CSV File with 'model_production_data' results
	4. Save the updated 'model_production_data' and 'stats' to CSV
	5. Return the updated 'model_production_data' and 'stats' from function
	------------------------------------------------------------

DT 	 Pre-Processed 		 1 Months 	 2022-11-01 to 2022-11-30 	 Len: 898 	 Cols: 210
DT 	 Pre-Processed 		 12 Months 	 2021-11-01 to 2022-10-31 	 Len: 9,283 	 Cols: 210

	------------------------------------------------------------
	RUN PRODUCTION PREDICT FOR 'DT' | 2022_11
	------------------------------------------------------------
	1. Load Saved Model Files c

3BPD 	 Pre-Processed 		 12 Months 	 2021-10-01 to 2022-09-30 	 Len: 5,718 	 Cols: 212

	------------------------------------------------------------
	RUN PRODUCTION PREDICT FOR '3BPD' | 2022_10
	------------------------------------------------------------
	1. Load Saved Model Files created from 'model_creation_data'
	2. Create Prediction Column in 'model_production_data'
	3. Create Stats KPI CSV File with 'model_production_data' results
	4. Save the updated 'model_production_data' and 'stats' to CSV
	5. Return the updated 'model_production_data' and 'stats' from function
	------------------------------------------------------------

3BPU 	 Pre-Processed 		 1 Months 	 2022-10-03 to 2022-10-31 	 Len: 779 	 Cols: 212
3BPU 	 Pre-Processed 		 12 Months 	 2021-10-01 to 2022-09-30 	 Len: 5,532 	 Cols: 212

	------------------------------------------------------------
	RUN PRODUCTION PREDICT FOR '3BPU' | 2022_10
	------------------------------------------------------------
	1. Load Saved Model

	2. Create Prediction Column in 'model_production_data'
	3. Create Stats KPI CSV File with 'model_production_data' results
	4. Save the updated 'model_production_data' and 'stats' to CSV
	5. Return the updated 'model_production_data' and 'stats' from function
	------------------------------------------------------------

DT 	 Pre-Processed 		 1 Months 	 2022-10-03 to 2022-10-31 	 Len: 990 	 Cols: 210
DT 	 Pre-Processed 		 12 Months 	 2021-10-01 to 2022-09-30 	 Len: 8,633 	 Cols: 210

	------------------------------------------------------------
	RUN PRODUCTION PREDICT FOR 'DT' | 2022_10
	------------------------------------------------------------
	1. Load Saved Model Files created from 'model_creation_data'
	2. Create Prediction Column in 'model_production_data'
	3. Create Stats KPI CSV File with 'model_production_data' results
	4. Save the updated 'model_production_data' and 'stats' to CSV
	5. Return the updated 'model_production_data' and 'stats' from function
	---------------------

3BPU 	 Pre-Processed 		 12 Months 	 2021-09-01 to 2022-08-31 	 Len: 5,293 	 Cols: 212

	------------------------------------------------------------
	RUN PRODUCTION PREDICT FOR '3BPU' | 2022_09
	------------------------------------------------------------
	1. Load Saved Model Files created from 'model_creation_data'
	2. Create Prediction Column in 'model_production_data'
	3. Create Stats KPI CSV File with 'model_production_data' results
	4. Save the updated 'model_production_data' and 'stats' to CSV
	5. Return the updated 'model_production_data' and 'stats' from function
	------------------------------------------------------------

BFLD 	 Pre-Processed 		 1 Months 	 2022-09-01 to 2022-09-30 	 Len: 375 	 Cols: 211
BFLD 	 Pre-Processed 		 12 Months 	 2021-09-02 to 2022-08-31 	 Len: 4,068 	 Cols: 211

	------------------------------------------------------------
	RUN PRODUCTION PREDICT FOR 'BFLD' | 2022_09
	------------------------------------------------------------
	1. Load Saved Model

	2. Create Prediction Column in 'model_production_data'
	3. Create Stats KPI CSV File with 'model_production_data' results
	4. Save the updated 'model_production_data' and 'stats' to CSV
	5. Return the updated 'model_production_data' and 'stats' from function
	------------------------------------------------------------

DTS 	 Pre-Processed 		 1 Months 	 2022-09-01 to 2022-09-30 	 Len: 247 	 Cols: 206
DTS 	 Pre-Processed 		 12 Months 	 2021-09-01 to 2022-08-31 	 Len: 3,424 	 Cols: 206

	------------------------------------------------------------
	RUN PRODUCTION PREDICT FOR 'DTS' | 2022_09
	------------------------------------------------------------
	1. Load Saved Model Files created from 'model_creation_data'
	2. Create Prediction Column in 'model_production_data'
	3. Create Stats KPI CSV File with 'model_production_data' results
	4. Save the updated 'model_production_data' and 'stats' to CSV
	5. Return the updated 'model_production_data' and 'stats' from function
	------------------

BFLD 	 Pre-Processed 		 12 Months 	 2021-08-02 to 2022-07-29 	 Len: 3,909 	 Cols: 211

	------------------------------------------------------------
	RUN PRODUCTION PREDICT FOR 'BFLD' | 2022_08
	------------------------------------------------------------
	1. Load Saved Model Files created from 'model_creation_data'
	2. Create Prediction Column in 'model_production_data'
	3. Create Stats KPI CSV File with 'model_production_data' results
	4. Save the updated 'model_production_data' and 'stats' to CSV
	5. Return the updated 'model_production_data' and 'stats' from function
	------------------------------------------------------------

BFLU 	 Pre-Processed 		 1 Months 	 2022-08-01 to 2022-08-31 	 Len: 296 	 Cols: 211
BFLU 	 Pre-Processed 		 12 Months 	 2021-08-02 to 2022-07-29 	 Len: 3,682 	 Cols: 211

	------------------------------------------------------------
	RUN PRODUCTION PREDICT FOR 'BFLU' | 2022_08
	------------------------------------------------------------
	1. Load Saved Model

	2. Create Prediction Column in 'model_production_data'
	3. Create Stats KPI CSV File with 'model_production_data' results
	4. Save the updated 'model_production_data' and 'stats' to CSV
	5. Return the updated 'model_production_data' and 'stats' from function
	------------------------------------------------------------

PBB 	 Pre-Processed 		 1 Months 	 2022-08-01 to 2022-08-26 	 Len: 17 	 Cols: 210
PBB 	 Pre-Processed 		 12 Months 	 2021-08-04 to 2022-07-26 	 Len: 289 	 Cols: 210

	------------------------------------------------------------
	RUN PRODUCTION PREDICT FOR 'PBB' | 2022_08
	------------------------------------------------------------
	1. Load Saved Model Files created from 'model_creation_data'
	2. Create Prediction Column in 'model_production_data'
	3. Create Stats KPI CSV File with 'model_production_data' results
	4. Save the updated 'model_production_data' and 'stats' to CSV
	5. Return the updated 'model_production_data' and 'stats' from function
	---------------------

	2. Create Prediction Column in 'model_production_data'
	3. Create Stats KPI CSV File with 'model_production_data' results
	4. Save the updated 'model_production_data' and 'stats' to CSV
	5. Return the updated 'model_production_data' and 'stats' from function
	------------------------------------------------------------

BFLU 	 Pre-Processed 		 1 Months 	 2022-07-01 to 2022-07-29 	 Len: 467 	 Cols: 211
BFLU 	 Pre-Processed 		 12 Months 	 2021-07-01 to 2022-06-30 	 Len: 3,396 	 Cols: 211

	------------------------------------------------------------
	RUN PRODUCTION PREDICT FOR 'BFLU' | 2022_07
	------------------------------------------------------------
	1. Load Saved Model Files created from 'model_creation_data'
	2. Create Prediction Column in 'model_production_data'
	3. Create Stats KPI CSV File with 'model_production_data' results
	4. Save the updated 'model_production_data' and 'stats' to CSV
	5. Return the updated 'model_production_data' and 'stats' from function
	---------------

PBB 	 Pre-Processed 		 12 Months 	 2021-07-06 to 2022-06-30 	 Len: 279 	 Cols: 210

	------------------------------------------------------------
	RUN PRODUCTION PREDICT FOR 'PBB' | 2022_07
	------------------------------------------------------------
	1. Load Saved Model Files created from 'model_creation_data'
	2. Create Prediction Column in 'model_production_data'
	3. Create Stats KPI CSV File with 'model_production_data' results
	4. Save the updated 'model_production_data' and 'stats' to CSV
	5. Return the updated 'model_production_data' and 'stats' from function
	------------------------------------------------------------

PBS 	 Pre-Processed 		 1 Months 	 2022-07-07 to 2022-07-28 	 Len: 30 	 Cols: 210
PBS 	 Pre-Processed 		 12 Months 	 2021-07-01 to 2022-06-24 	 Len: 247 	 Cols: 210

	------------------------------------------------------------
	RUN PRODUCTION PREDICT FOR 'PBS' | 2022_07
	------------------------------------------------------------
	1. Load Saved Model Files cre

BFLU 	 Pre-Processed 		 12 Months 	 2021-06-01 to 2022-05-31 	 Len: 3,233 	 Cols: 211

	------------------------------------------------------------
	RUN PRODUCTION PREDICT FOR 'BFLU' | 2022_06
	------------------------------------------------------------
	1. Load Saved Model Files created from 'model_creation_data'
	2. Create Prediction Column in 'model_production_data'
	3. Create Stats KPI CSV File with 'model_production_data' results
	4. Save the updated 'model_production_data' and 'stats' to CSV
	5. Return the updated 'model_production_data' and 'stats' from function
	------------------------------------------------------------

BOD 	 Pre-Processed 		 1 Months 	 2022-06-01 to 2022-06-30 	 Len: 1,765 	 Cols: 210
BOD 	 Pre-Processed 		 12 Months 	 2021-06-01 to 2022-05-31 	 Len: 9,492 	 Cols: 210

	------------------------------------------------------------
	RUN PRODUCTION PREDICT FOR 'BOD' | 2022_06
	------------------------------------------------------------
	1. Load Saved Model 

	2. Create Prediction Column in 'model_production_data'
	3. Create Stats KPI CSV File with 'model_production_data' results
	4. Save the updated 'model_production_data' and 'stats' to CSV
	5. Return the updated 'model_production_data' and 'stats' from function
	------------------------------------------------------------

SBRT 	 Pre-Processed 		 1 Months 	 2022-06-01 to 2022-06-30 	 Len: 962 	 Cols: 210
SBRT 	 Pre-Processed 		 12 Months 	 2021-06-01 to 2022-05-31 	 Len: 4,435 	 Cols: 210

	------------------------------------------------------------
	RUN PRODUCTION PREDICT FOR 'SBRT' | 2022_06
	------------------------------------------------------------
	1. Load Saved Model Files created from 'model_creation_data'
	2. Create Prediction Column in 'model_production_data'
	3. Create Stats KPI CSV File with 'model_production_data' results
	4. Save the updated 'model_production_data' and 'stats' to CSV
	5. Return the updated 'model_production_data' and 'stats' from function
	---------------

	2. Create Prediction Column in 'model_production_data'
	3. Create Stats KPI CSV File with 'model_production_data' results
	4. Save the updated 'model_production_data' and 'stats' to CSV
	5. Return the updated 'model_production_data' and 'stats' from function
	------------------------------------------------------------

BOU 	 Pre-Processed 		 1 Months 	 2022-05-02 to 2022-05-31 	 Len: 1,460 	 Cols: 210
BOU 	 Pre-Processed 		 12 Months 	 2021-05-03 to 2022-04-29 	 Len: 15,445 	 Cols: 210

	------------------------------------------------------------
	RUN PRODUCTION PREDICT FOR 'BOU' | 2022_05
	------------------------------------------------------------
	1. Load Saved Model Files created from 'model_creation_data'
	2. Create Prediction Column in 'model_production_data'
	3. Create Stats KPI CSV File with 'model_production_data' results
	4. Save the updated 'model_production_data' and 'stats' to CSV
	5. Return the updated 'model_production_data' and 'stats' from function
	---------------

SBRT 	 Pre-Processed 		 12 Months 	 2021-05-03 to 2022-04-29 	 Len: 4,140 	 Cols: 210

	------------------------------------------------------------
	RUN PRODUCTION PREDICT FOR 'SBRT' | 2022_05
	------------------------------------------------------------
	1. Load Saved Model Files created from 'model_creation_data'
	2. Create Prediction Column in 'model_production_data'
	3. Create Stats KPI CSV File with 'model_production_data' results
	4. Save the updated 'model_production_data' and 'stats' to CSV
	5. Return the updated 'model_production_data' and 'stats' from function
	------------------------------------------------------------

SSD 	 Pre-Processed 		 1 Months 	 2022-05-02 to 2022-05-31 	 Len: 1,849 	 Cols: 206
SSD 	 Pre-Processed 		 12 Months 	 2021-05-03 to 2022-04-29 	 Len: 20,673 	 Cols: 206

	------------------------------------------------------------
	RUN PRODUCTION PREDICT FOR 'SSD' | 2022_05
	------------------------------------------------------------
	1. Load Saved Model

BOU 	 Pre-Processed 		 12 Months 	 2021-04-01 to 2022-03-31 	 Len: 15,524 	 Cols: 210

	------------------------------------------------------------
	RUN PRODUCTION PREDICT FOR 'BOU' | 2022_04
	------------------------------------------------------------
	1. Load Saved Model Files created from 'model_creation_data'
	2. Create Prediction Column in 'model_production_data'
	3. Create Stats KPI CSV File with 'model_production_data' results
	4. Save the updated 'model_production_data' and 'stats' to CSV
	5. Return the updated 'model_production_data' and 'stats' from function
	------------------------------------------------------------

BTRD 	 Pre-Processed 		 1 Months 	 2022-04-01 to 2022-04-29 	 Len: 82 	 Cols: 213
BTRD 	 Pre-Processed 		 12 Months 	 2021-04-01 to 2022-03-30 	 Len: 635 	 Cols: 213

	------------------------------------------------------------
	RUN PRODUCTION PREDICT FOR 'BTRD' | 2022_04
	------------------------------------------------------------
	1. Load Saved Model Fil

	2. Create Prediction Column in 'model_production_data'
	3. Create Stats KPI CSV File with 'model_production_data' results
	4. Save the updated 'model_production_data' and 'stats' to CSV
	5. Return the updated 'model_production_data' and 'stats' from function
	------------------------------------------------------------

SSD 	 Pre-Processed 		 1 Months 	 2022-04-01 to 2022-04-29 	 Len: 1,534 	 Cols: 206
SSD 	 Pre-Processed 		 12 Months 	 2021-04-01 to 2022-03-31 	 Len: 20,863 	 Cols: 206

	------------------------------------------------------------
	RUN PRODUCTION PREDICT FOR 'SSD' | 2022_04
	------------------------------------------------------------
	1. Load Saved Model Files created from 'model_creation_data'
	2. Create Prediction Column in 'model_production_data'
	3. Create Stats KPI CSV File with 'model_production_data' results
	4. Save the updated 'model_production_data' and 'stats' to CSV
	5. Return the updated 'model_production_data' and 'stats' from function
	---------------

	3. Create Stats KPI CSV File with 'model_production_data' results
	4. Save the updated 'model_production_data' and 'stats' to CSV
	5. Return the updated 'model_production_data' and 'stats' from function
	------------------------------------------------------------

BTRD 	 Pre-Processed 		 1 Months 	 2022-03-02 to 2022-03-30 	 Len: 80 	 Cols: 213
BTRD 	 Pre-Processed 		 12 Months 	 2021-03-01 to 2022-02-28 	 Len: 607 	 Cols: 213

	------------------------------------------------------------
	RUN PRODUCTION PREDICT FOR 'BTRD' | 2022_03
	------------------------------------------------------------
	1. Load Saved Model Files created from 'model_creation_data'
	2. Create Prediction Column in 'model_production_data'
	3. Create Stats KPI CSV File with 'model_production_data' results
	4. Save the updated 'model_production_data' and 'stats' to CSV
	5. Return the updated 'model_production_data' and 'stats' from function
	------------------------------------------------------------

BTRU 	 Pre-P

SSD 	 Pre-Processed 		 1 Months 	 2022-03-01 to 2022-03-31 	 Len: 2,269 	 Cols: 206
SSD 	 Pre-Processed 		 12 Months 	 2021-03-01 to 2022-02-28 	 Len: 20,913 	 Cols: 206

	------------------------------------------------------------
	RUN PRODUCTION PREDICT FOR 'SSD' | 2022_03
	------------------------------------------------------------
	1. Load Saved Model Files created from 'model_creation_data'
	2. Create Prediction Column in 'model_production_data'
	3. Create Stats KPI CSV File with 'model_production_data' results
	4. Save the updated 'model_production_data' and 'stats' to CSV
	5. Return the updated 'model_production_data' and 'stats' from function
	------------------------------------------------------------

SSRT 	 Pre-Processed 		 1 Months 	 2022-03-01 to 2022-03-31 	 Len: 268 	 Cols: 210
SSRT 	 Pre-Processed 		 12 Months 	 2021-03-01 to 2022-02-28 	 Len: 1,571 	 Cols: 210

	------------------------------------------------------------
	RUN PRODUCTION PREDICT FOR 'SSRT' | 2022_03

	3. Create Stats KPI CSV File with 'model_production_data' results
	4. Save the updated 'model_production_data' and 'stats' to CSV
	5. Return the updated 'model_production_data' and 'stats' from function
	------------------------------------------------------------

BTRU 	 Pre-Processed 		 1 Months 	 2022-02-01 to 2022-02-28 	 Len: 66 	 Cols: 213
BTRU 	 Pre-Processed 		 12 Months 	 2021-02-01 to 2022-01-31 	 Len: 735 	 Cols: 213

	------------------------------------------------------------
	RUN PRODUCTION PREDICT FOR 'BTRU' | 2022_02
	------------------------------------------------------------
	1. Load Saved Model Files created from 'model_creation_data'
	2. Create Prediction Column in 'model_production_data'
	3. Create Stats KPI CSV File with 'model_production_data' results
	4. Save the updated 'model_production_data' and 'stats' to CSV
	5. Return the updated 'model_production_data' and 'stats' from function
	------------------------------------------------------------

CCB 	 Pre-Pr

	3. Create Stats KPI CSV File with 'model_production_data' results
	4. Save the updated 'model_production_data' and 'stats' to CSV
	5. Return the updated 'model_production_data' and 'stats' from function
	------------------------------------------------------------

SSU 	 Pre-Processed 		 1 Months 	 2022-02-01 to 2022-02-28 	 Len: 1,879 	 Cols: 206
SSU 	 Pre-Processed 		 12 Months 	 2021-02-01 to 2022-01-31 	 Len: 21,840 	 Cols: 206

	------------------------------------------------------------
	RUN PRODUCTION PREDICT FOR 'SSU' | 2022_02
	------------------------------------------------------------
	1. Load Saved Model Files created from 'model_creation_data'
	2. Create Prediction Column in 'model_production_data'
	3. Create Stats KPI CSV File with 'model_production_data' results
	4. Save the updated 'model_production_data' and 'stats' to CSV
	5. Return the updated 'model_production_data' and 'stats' from function
	------------------------------------------------------------

TAG 	 Pre

	5. Return the updated 'model_production_data' and 'stats' from function
	------------------------------------------------------------

CCB 	 Pre-Processed 		 1 Months 	 2022-01-03 to 2022-01-31 	 Len: 523 	 Cols: 207
CCB 	 Pre-Processed 		 12 Months 	 2021-01-04 to 2021-12-30 	 Len: 5,854 	 Cols: 207

	------------------------------------------------------------
	RUN PRODUCTION PREDICT FOR 'CCB' | 2022_01
	------------------------------------------------------------
	1. Load Saved Model Files created from 'model_creation_data'
	2. Create Prediction Column in 'model_production_data'
	3. Create Stats KPI CSV File with 'model_production_data' results
	4. Save the updated 'model_production_data' and 'stats' to CSV
	5. Return the updated 'model_production_data' and 'stats' from function
	------------------------------------------------------------

CCS 	 Pre-Processed 		 1 Months 	 2022-01-03 to 2022-01-31 	 Len: 571 	 Cols: 207
CCS 	 Pre-Processed 		 12 Months 	 2021-01-04 to 2021-12-30 	

	3. Create Stats KPI CSV File with 'model_production_data' results
	4. Save the updated 'model_production_data' and 'stats' to CSV
	5. Return the updated 'model_production_data' and 'stats' from function
	------------------------------------------------------------

TAG 	 Pre-Processed 		 1 Months 	 2022-01-03 to 2022-01-31 	 Len: 405 	 Cols: 211
TAG 	 Pre-Processed 		 12 Months 	 2021-01-04 to 2021-12-30 	 Len: 3,571 	 Cols: 211

	------------------------------------------------------------
	RUN PRODUCTION PREDICT FOR 'TAG' | 2022_01
	------------------------------------------------------------
	1. Load Saved Model Files created from 'model_creation_data'
	2. Create Prediction Column in 'model_production_data'
	3. Create Stats KPI CSV File with 'model_production_data' results
	4. Save the updated 'model_production_data' and 'stats' to CSV
	5. Return the updated 'model_production_data' and 'stats' from function
	------------------------------------------------------------

TAR 	 Pre-Pr

	5. Return the updated 'model_production_data' and 'stats' from function
	------------------------------------------------------------

CCS 	 Pre-Processed 		 1 Months 	 2021-12-01 to 2021-12-30 	 Len: 475 	 Cols: 207
CCS 	 Pre-Processed 		 12 Months 	 2020-12-01 to 2021-11-30 	 Len: 6,199 	 Cols: 207

	------------------------------------------------------------
	RUN PRODUCTION PREDICT FOR 'CCS' | 2021_12
	------------------------------------------------------------
	1. Load Saved Model Files created from 'model_creation_data'
	2. Create Prediction Column in 'model_production_data'
	3. Create Stats KPI CSV File with 'model_production_data' results
	4. Save the updated 'model_production_data' and 'stats' to CSV
	5. Return the updated 'model_production_data' and 'stats' from function
	------------------------------------------------------------

DB 	 Pre-Processed 		 1 Months 	 2021-12-01 to 2021-12-31 	 Len: 1,620 	 Cols: 210
DB 	 Pre-Processed 		 12 Months 	 2020-12-01 to 2021-11-30 	

TAR 	 Pre-Processed 		 12 Months 	 2020-12-01 to 2021-11-30 	 Len: 3,215 	 Cols: 211

	------------------------------------------------------------
	RUN PRODUCTION PREDICT FOR 'TAR' | 2021_12
	------------------------------------------------------------
	1. Load Saved Model Files created from 'model_creation_data'
	2. Create Prediction Column in 'model_production_data'
	3. Create Stats KPI CSV File with 'model_production_data' results
	4. Save the updated 'model_production_data' and 'stats' to CSV
	5. Return the updated 'model_production_data' and 'stats' from function
	------------------------------------------------------------



In [16]:
pd.set_option('display.max_rows', 1000)
df_stats = pd.concat(lst, axis=0)
df_stats[df_stats['pcode']=='3BPU']

Unnamed: 0,pcode,month_id,prediction_accuracy,prediction_precision,prediction_recall,prediction_F1_score,prediction_seconds,n_training,WIN:LOSS
0,3BPU,2022_11,0.639517,0.648062,0.972093,0.777674,0.08721,6073,3995:2078
0,3BPU,2022_10,0.709884,0.711514,0.994575,0.829563,0.074559,5532,3598:1934
0,3BPU,2022_09,0.668103,0.690531,0.937304,0.795213,0.07336,5293,3416:1877
0,3BPU,2022_08,0.643059,0.644699,0.991189,0.78125,0.103237,5154,3322:1832
0,3BPU,2022_07,0.656667,0.673611,0.955665,0.790224,0.106881,4865,3153:1712
0,3BPU,2022_06,0.649007,0.651413,0.987755,0.785077,0.133047,4305,2790:1515
0,3BPU,2022_05,0.598333,0.613309,0.929155,0.738895,0.088472,4066,2647:1419
0,3BPU,2022_04,0.628289,0.637931,0.958549,0.766046,0.095911,4040,2648:1392
0,3BPU,2022_03,0.668301,0.677258,0.975904,0.799605,0.114149,4060,2701:1359
0,3BPU,2022_02,0.659176,0.660305,0.988571,0.791762,0.089605,3966,2634:1332


### NOTES About Column Naming Structure
- There are 3 primary types of columns:
    1. The target column (y) for creating/validating predictions. This column is named '_TARGET'
    2. Utility columns that is just here to help me inspect the data but these should not be used in creating predictions (starts with '_UTILITY_')
    2. Basic Attribute columns (X) that are numerical (but not neccesarily all linier) used for creating predictions (starts with '_ftN_')
    2. Categorical Attribute columns (X) that have already been converted from different strings to intigers used for creating predictions (starts with '_ftC_')


In [None]:
print(f"\nUtility Column Examples \t {cols_utility[0:5]}")
print(f"Basic Attributes Examples \t {cols_attributes_basic[0:5]}")
print(f"Category Attributes Examples \t {cols_attributes_category[0:5]}")
print(f"Target Column Is \t\t {column_target}\n")


### NOTES About model_production_data & model_creation_data
- The function Drct.load_pcode() Loads the model data for a specific date range.
- Notice the date range for the 'model_production_data' is a 1-month period
- Notice the date range for the 'model_creation_data' is the previous 12-month period


## Model Creation Master Function
- This will obviously but a much longer and more involved function but the main takeways are:
   - Even though we need to re-create these models, I think it's save to build off of your analysis that Random Forest & All Features seems to work best, so let's limit the model creation to that general scope for now.
   - It should save the model information for the specific lookback period into that period's folder
   - It should save whatever information is neccesary (i.e. pickle file) so that in production mode it can make as fast of a prediction as possible.

In [None]:
### RUN :: Model Creation
#######################################
# create_model(model_creation_data,AGG,PCODE,MONTH_ID, columns_dist, Scaling)

## Model Production Master Function
- This function is meant to be a "mock version" of what I will end up using with live real-time data.
- In live production mode, I will take real-time data (just like 'model_production_data' except only a few rows at a time and not a whole month) and send it to this function in order to get a new 'predict' column in return.
- For backtesting purposes I would also like to have a 'stats' dataframe returned as well with run-time and accuracy scores.


In [None]:
### RUN :: Model Production Predict
#######################################
model_production_data, stats = run_production_predict(model_production_data,AGG,PCODE,MONTH_ID, columns_dist)

In [None]:
model_production_data

In [None]:
stats