In [1]:
import json
import logging
import views

logging.basicConfig(
    level=logging.DEBUG,
    #level=logging.INFO, # uncomment this and comment debug above for less yelling in red
    format=views.config.LOGFMT,
)

In [2]:
# DATASETS is a dictionary of Dataset objects.
from views import DATASETS
# These are the building blocks of the modelling interface
from views import Ensemble, Model, Downsampling, Period
# These are model specifications from the specfiles
from views.specs.models import cm as model_specs_cm, pgm as model_specs_pgm
from views.specs.periods import get_periods, get_periods_by_name
# Utils
from views.utils import db, io, data as datautils
from views.utils.data import assign_into_df

In [3]:
# These are the core models defined in the ViEWS pipeline
# These are defined in 
from views.apps.pipeline.models_cm import all_cm_models_by_name
from views.apps.pipeline.models_pgm import all_pgm_models_by_name

[2020-11-16 09:40:00,353] - views.utils.io:107 - DEBUG - Loading YAML from /home/kyle/code/Views2/OpenViEWS2/views/specs/periods/periods.yaml
[2020-11-16 09:40:00,409] - views.utils.io:107 - DEBUG - Loading YAML from /home/kyle/code/Views2/OpenViEWS2/views/specs/periods/periods.yaml


In [4]:
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor

In [5]:
dataset = views.DATASETS["cm_africa_imp_0"]

In [6]:
df = dataset.df

[2020-11-16 09:40:03,141] - views.utils.io:65 - DEBUG - Reading parquet at /home/kyle/code/Views2/OpenViEWS2/storage/data/datasets/cm_africa_imp_0.parquet with cols None
[2020-11-16 09:40:03,616] - views.utils.io:72 - DEBUG - Finished reading parquet from /home/kyle/code/Views2/OpenViEWS2/storage/data/datasets/cm_africa_imp_0.parquet.


In [7]:
run_id = "r_2020_10_01"

In [9]:
periods = get_periods(run_id) # as a list
periods_by_name = get_periods_by_name(run_id) # as a dict
period_b = periods_by_name["B"]
period_c = periods_by_name["C"]
period_c

[2020-11-16 09:40:17,893] - views.utils.io:107 - DEBUG - Loading YAML from /home/kyle/code/Views2/OpenViEWS2/views/specs/periods/periods.yaml
[2020-11-16 09:40:17,914] - views.utils.io:107 - DEBUG - Loading YAML from /home/kyle/code/Views2/OpenViEWS2/views/specs/periods/periods.yaml


Period(name='C', train_start=121, train_end=480, predict_start=489, predict_end=526)

In [11]:
model_from_pipeline_spec = all_cm_models_by_name["cm_sb_reign_drought"]
models=[model_from_pipeline_spec]

In [12]:
for model in models:
    model.fit_estimators(df, populate_extras = False)

[2020-11-16 09:41:35,063] - views.apps.model.api:441 - INFO - Fitting estimators for cm_sb_reign_drought
[2020-11-16 09:41:35,064] - views.apps.model.api:444 - DEBUG - Fitting cm_sb_reign_drought for period A step 1
[2020-11-16 09:41:35,071] - views.apps.model.api:413 - DEBUG - Downsampling by Downsampling(share_positive=1.0, share_negative=1.0, threshold=0) for cm_sb_reign_drought
[2020-11-16 09:41:35,071] - views.apps.model.api:422 - DEBUG - cm_sb_reign_drought downsampled away 0
[2020-11-16 09:41:35,072] - views.apps.model.api:424 - DEBUG - Fitting cm_sb_reign_drought on 14850 rows
[2020-11-16 09:41:35,072] - views.apps.model.api:137 - DEBUG - Getting initial_estimator for cm_sb_reign_drought
[2020-11-16 09:42:35,393] - views.apps.model.api:116 - DEBUG - Saving cm_sb_reign_drought A 1 to /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_reign_drought_A_1.joblib
[2020-11-16 09:42:56,919] - views.apps.model.api:119 - DEBUG - cm_sb_reign_drought saved to /home/kyle/code/Views2/Ope

[2020-11-16 09:51:24,894] - views.apps.model.api:137 - DEBUG - Getting initial_estimator for cm_sb_reign_drought
[2020-11-16 09:52:16,216] - views.apps.model.api:116 - DEBUG - Saving cm_sb_reign_drought A 36 to /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_reign_drought_A_36.joblib
[2020-11-16 09:52:38,960] - views.apps.model.api:119 - DEBUG - cm_sb_reign_drought saved to /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_reign_drought_A_36.joblib
[2020-11-16 09:52:38,972] - views.apps.model.api:444 - DEBUG - Fitting cm_sb_reign_drought for period A step 38
[2020-11-16 09:52:38,976] - views.apps.model.api:413 - DEBUG - Downsampling by Downsampling(share_positive=1.0, share_negative=1.0, threshold=0) for cm_sb_reign_drought
[2020-11-16 09:52:38,977] - views.apps.model.api:422 - DEBUG - cm_sb_reign_drought downsampled away 0
[2020-11-16 09:52:38,977] - views.apps.model.api:424 - DEBUG - Fitting cm_sb_reign_drought on 12852 rows
[2020-11-16 09:52:38,978] - views.apps.model.ap

[2020-11-16 10:03:22,052] - views.apps.model.api:422 - DEBUG - cm_sb_reign_drought downsampled away 0
[2020-11-16 10:03:22,052] - views.apps.model.api:424 - DEBUG - Fitting cm_sb_reign_drought on 15228 rows
[2020-11-16 10:03:22,052] - views.apps.model.api:137 - DEBUG - Getting initial_estimator for cm_sb_reign_drought
[2020-11-16 10:04:18,943] - views.apps.model.api:116 - DEBUG - Saving cm_sb_reign_drought B 30 to /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_reign_drought_B_30.joblib
[2020-11-16 10:04:44,522] - views.apps.model.api:119 - DEBUG - cm_sb_reign_drought saved to /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_reign_drought_B_30.joblib
[2020-11-16 10:04:44,545] - views.apps.model.api:444 - DEBUG - Fitting cm_sb_reign_drought for period B step 36
[2020-11-16 10:04:44,552] - views.apps.model.api:413 - DEBUG - Downsampling by Downsampling(share_positive=1.0, share_negative=1.0, threshold=0) for cm_sb_reign_drought
[2020-11-16 10:04:44,553] - views.apps.model.ap

[2020-11-16 10:17:08,463] - views.apps.model.api:444 - DEBUG - Fitting cm_sb_reign_drought for period C step 24
[2020-11-16 10:17:08,469] - views.apps.model.api:413 - DEBUG - Downsampling by Downsampling(share_positive=1.0, share_negative=1.0, threshold=0) for cm_sb_reign_drought
[2020-11-16 10:17:08,469] - views.apps.model.api:422 - DEBUG - cm_sb_reign_drought downsampled away 0
[2020-11-16 10:17:08,470] - views.apps.model.api:424 - DEBUG - Fitting cm_sb_reign_drought on 18144 rows
[2020-11-16 10:17:08,470] - views.apps.model.api:137 - DEBUG - Getting initial_estimator for cm_sb_reign_drought
[2020-11-16 10:18:10,896] - views.apps.model.api:116 - DEBUG - Saving cm_sb_reign_drought C 24 to /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_reign_drought_C_24.joblib
[2020-11-16 10:18:38,773] - views.apps.model.api:119 - DEBUG - cm_sb_reign_drought saved to /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_reign_drought_C_24.joblib
[2020-11-16 10:18:38,795] - views.apps.model.ap

[2020-11-16 10:25:41,613] - views.apps.model.api:1181 - DEBUG - Getting feature importances for cm_sb_reign_droughtfor period B step 18
[2020-11-16 10:25:41,615] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_reign_drought_B_18.joblib
[2020-11-16 10:25:51,553] - views.apps.model.api:1181 - DEBUG - Getting feature importances for cm_sb_reign_droughtfor period B step 24
[2020-11-16 10:25:51,554] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_reign_drought_B_24.joblib
[2020-11-16 10:26:01,715] - views.apps.model.api:1181 - DEBUG - Getting feature importances for cm_sb_reign_droughtfor period B step 30
[2020-11-16 10:26:01,716] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_reign_drought_B_30.joblib
[2020-11-16 10:26:11,194] - views.apps.model.api:1181 - DEBUG - Getting feature importances for cm_sb_reign_droughtfor period B step 36


[2020-11-16 10:54:20,462] - views.apps.model.api:1124 - DEBUG - Building permutation importances for cm_sb_reign_droughtfor period B step 9
[2020-11-16 10:54:20,463] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_reign_drought_B_9.joblib
[2020-11-16 10:56:17,845] - views.apps.model.api:1124 - DEBUG - Building permutation importances for cm_sb_reign_droughtfor period B step 12
[2020-11-16 10:56:17,848] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_reign_drought_B_12.joblib
[2020-11-16 10:58:26,369] - views.apps.model.api:1124 - DEBUG - Building permutation importances for cm_sb_reign_droughtfor period B step 18
[2020-11-16 10:58:26,382] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_reign_drought_B_18.joblib
[2020-11-16 11:01:25,101] - views.apps.model.api:1124 - DEBUG - Building permutation importances for cm_sb_reign_droughtfor

In [13]:
for model in models:
    
    # Uncalibrated predictions
    df_pred = model.predict(df)
    # assign_into_df takes care to only overwrite rows with actual values
    # This way we can keep all periods in the same df
    # It's also idempotent, no joining, so run as many times as you like. 
    df = assign_into_df(df_to=df, df_from=df_pred)
    
    # Calibrated predictions

    df_pred = model.predict_calibrated(
        df=df, 
        period_calib=period_b,
        period_test=period_c,
    )
    df = assign_into_df(df_to=df, df_from=df_pred)

[2020-11-16 12:03:29,583] - views.apps.model.api:552 - INFO - Predicting for cm_sb_reign_drought
[2020-11-16 12:03:29,587] - views.apps.model.api:553 - DEBUG - Predicting for cm_sb_reign_drought periods: [Period(name='A', train_start=121, train_end=396, predict_start=397, predict_end=432), Period(name='B', train_start=121, train_end=432, predict_start=433, predict_end=468), Period(name='C', train_start=121, train_end=480, predict_start=483, predict_end=520)]
[2020-11-16 12:03:30,585] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_reign_drought_A_1.joblib
[2020-11-16 12:03:57,204] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_reign_drought_A_3.joblib
[2020-11-16 12:04:12,520] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_reign_drought_A_6.joblib
[2020-11-16 12:04:22,581] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/co

[2020-11-16 12:11:34,722] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_reign_drought_C_1.joblib
[2020-11-16 12:11:46,667] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_reign_drought_C_3.joblib
[2020-11-16 12:11:58,392] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_reign_drought_C_6.joblib
[2020-11-16 12:12:09,684] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_reign_drought_C_9.joblib
[2020-11-16 12:12:24,367] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_reign_drought_C_12.joblib
[2020-11-16 12:12:35,265] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_reign_drought_C_18.joblib
[2020-11-16 12:12:45,775] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views

In [14]:
for model in models:
    model.evaluate(df)

[2020-11-16 12:13:48,549] - views.apps.model.api:966 - INFO - Evaluating cm_sb_reign_drought
[2020-11-16 12:13:48,552] - views.apps.model.api:970 - DEBUG - Evaluating uncalibrated predictions for cm_sb_reign_drought period A step-combined
[2020-11-16 12:13:48,606] - views.apps.model.api:1009 - DEBUG - Evaluating uncalibrated predictions for cm_sb_reign_drought period A step 1
[2020-11-16 12:13:48,641] - views.apps.model.api:1009 - DEBUG - Evaluating uncalibrated predictions for cm_sb_reign_drought period A step 3
[2020-11-16 12:13:48,674] - views.apps.model.api:1009 - DEBUG - Evaluating uncalibrated predictions for cm_sb_reign_drought period A step 6
[2020-11-16 12:13:48,705] - views.apps.model.api:1009 - DEBUG - Evaluating uncalibrated predictions for cm_sb_reign_drought period A step 9
[2020-11-16 12:13:48,736] - views.apps.model.api:1009 - DEBUG - Evaluating uncalibrated predictions for cm_sb_reign_drought period A step 12
[2020-11-16 12:13:48,767] - views.apps.model.api:1009 - DEBU

ValueError: Only one class present in y_true. ROC AUC score is not defined in that case.

In [15]:
for model in models:
    print(model.name)
    #print(model.scores)
    print("EVAL SCORES:")
    print(json.dumps(model.scores, indent=2))
    print("FEATURE_IMPORTANCES")
    print(json.dumps(model.extras.feature_importances, indent=2))
    print("#"*80)
    

cm_sb_reign_drought
EVAL SCORES:
{
  "A": {
    "1": {
      "uncalibrated": {
        "average_precision": 0.10505309797077135,
        "area_under_roc": 0.5469519934014896,
        "brier": 0.11947332397470352
      },
      "calibrated": {}
    },
    "3": {
      "uncalibrated": {
        "average_precision": 0.10287006981729244,
        "area_under_roc": 0.5286949984629853,
        "brier": 0.11872906991955227
      },
      "calibrated": {}
    },
    "6": {
      "uncalibrated": {
        "average_precision": 0.10573290939749364,
        "area_under_roc": 0.5473020066411207,
        "brier": 0.11912792900260195
      },
      "calibrated": {}
    },
    "9": {
      "uncalibrated": {
        "average_precision": 0.10912196525680684,
        "area_under_roc": 0.5461773988842186,
        "brier": 0.11674739948918389
      },
      "calibrated": {}
    },
    "12": {
      "uncalibrated": {
        "average_precision": 0.0990472867144071,
        "area_under_roc": 0.513277676155576

In [16]:
cols_predict = [model_from_pipeline_spec.col_sc_calibrated for model in models]
df_results=df.loc[period_c.times_predict, cols_predict]
df_results.to_csv("sb_reighn_drought_results.csv")