In [1]:
#cm_sb_demog
import json
import logging
import views

logging.basicConfig(
    level=logging.DEBUG,
    #level=logging.INFO, # uncomment this and comment debug above for less yelling in red
    format=views.config.LOGFMT,
)
# DATASETS is a dictionary of Dataset objects.
from views import DATASETS
# These are the building blocks of the modelling interface
from views import Ensemble, Model, Downsampling, Period
# These are model specifications from the specfiles
from views.specs.models import cm as model_specs_cm, pgm as model_specs_pgm
from views.specs.periods import get_periods, get_periods_by_name
# Utils
from views.utils import db, io, data as datautils
from views.utils.data import assign_into_df
from views.apps.pipeline.models_cm import all_cm_models_by_name
from views.apps.pipeline.models_pgm import all_pgm_models_by_name
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
dataset = views.DATASETS["cm_africa_imp_0"]
df = dataset.df
run_id = "d_2020_04_01"

[2020-11-20 09:59:52,126] - views.utils.io:107 - DEBUG - Loading YAML from /home/kyle/code/Views2/OpenViEWS2/views/specs/periods/periods.yaml
[2020-11-20 09:59:52,183] - views.utils.io:107 - DEBUG - Loading YAML from /home/kyle/code/Views2/OpenViEWS2/views/specs/periods/periods.yaml
[2020-11-20 09:59:52,227] - views.utils.io:65 - DEBUG - Reading parquet at /home/kyle/code/Views2/OpenViEWS2/storage/data/datasets/cm_africa_imp_0.parquet with cols None
[2020-11-20 09:59:53,215] - views.utils.io:72 - DEBUG - Finished reading parquet from /home/kyle/code/Views2/OpenViEWS2/storage/data/datasets/cm_africa_imp_0.parquet.


In [2]:
periods = get_periods(run_id) # as a list
periods_by_name = get_periods_by_name(run_id) # as a dict
period_a = periods_by_name["A"]
period_b = periods_by_name["B"]
period_c = periods_by_name["C"]
period_c

[2020-11-20 09:59:59,655] - views.utils.io:107 - DEBUG - Loading YAML from /home/kyle/code/Views2/OpenViEWS2/views/specs/periods/periods.yaml
[2020-11-20 09:59:59,747] - views.utils.io:107 - DEBUG - Loading YAML from /home/kyle/code/Views2/OpenViEWS2/views/specs/periods/periods.yaml


Period(name='C', train_start=121, train_end=480, predict_start=483, predict_end=520)

In [3]:
model_from_pipeline_spec = all_cm_models_by_name["cm_sb_demog"]
models=[model_from_pipeline_spec]

In [4]:
for model in models:
    model.periods = periods

In [6]:
model_from_pipeline_spec

{
  "name": "cm_sb_demog",
  "col_outcome": "greq_25_ged_best_sb",
  "cols_features": [
    "fvp_grpop200",
    "fvp_population200",
    "fvp_ssp2_edu_sec_15_24_prop",
    "fvp_ssp2_urban_share_iiasa",
    "wdi_sp_dyn_imrt_in",
    "wdi_sp_dyn_tfrt_in"
  ],
  "steps": [
    1,
    3,
    6,
    9,
    12,
    18,
    24,
    30,
    36,
    38
  ],
  "periods": [
    {
      "name": "A",
      "train_start": 121,
      "train_end": 396,
      "predict_start": 397,
      "predict_end": 432
    },
    {
      "name": "B",
      "train_start": 121,
      "train_end": 432,
      "predict_start": 433,
      "predict_end": 468
    },
    {
      "name": "C",
      "train_start": 121,
      "train_end": 480,
      "predict_start": 483,
      "predict_end": 520
    }
  ],
  "outcome_type": "prob",
  "estimators": {
    "name": "cm_sb_demog",
    "initial_estimator": {
      "base_estimator": {
        "criterion": "gini",
        "splitter": "best",
        "max_depth": null,
        "min_samp

In [5]:
for model in models:
    model.fit_estimators(df, populate_extras = False)

[2020-11-20 10:00:07,214] - views.apps.model.api:441 - INFO - Fitting estimators for cm_sb_demog
[2020-11-20 10:00:07,217] - views.apps.model.api:444 - DEBUG - Fitting cm_sb_demog for period A step 1
[2020-11-20 10:00:07,273] - views.apps.model.api:413 - DEBUG - Downsampling by Downsampling(share_positive=1.0, share_negative=1.0, threshold=0) for cm_sb_demog
[2020-11-20 10:00:07,277] - views.apps.model.api:422 - DEBUG - cm_sb_demog downsampled away 0
[2020-11-20 10:00:07,279] - views.apps.model.api:424 - DEBUG - Fitting cm_sb_demog on 14850 rows
[2020-11-20 10:00:07,282] - views.apps.model.api:137 - DEBUG - Getting initial_estimator for cm_sb_demog
[2020-11-20 10:00:59,260] - views.apps.model.api:116 - DEBUG - Saving cm_sb_demog A 1 to /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_demog_A_1.joblib
[2020-11-20 10:01:14,926] - views.apps.model.api:119 - DEBUG - cm_sb_demog saved to /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_demog_A_1.joblib
[2020-11-20 10:01:14,941] 

[2020-11-20 10:09:40,390] - views.apps.model.api:422 - DEBUG - cm_sb_demog downsampled away 0
[2020-11-20 10:09:40,391] - views.apps.model.api:424 - DEBUG - Fitting cm_sb_demog on 12852 rows
[2020-11-20 10:09:40,391] - views.apps.model.api:137 - DEBUG - Getting initial_estimator for cm_sb_demog
[2020-11-20 10:10:31,936] - views.apps.model.api:116 - DEBUG - Saving cm_sb_demog A 38 to /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_demog_A_38.joblib
[2020-11-20 10:10:44,245] - views.apps.model.api:119 - DEBUG - cm_sb_demog saved to /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_demog_A_38.joblib
[2020-11-20 10:10:44,258] - views.apps.model.api:444 - DEBUG - Fitting cm_sb_demog for period B step 1
[2020-11-20 10:10:44,273] - views.apps.model.api:413 - DEBUG - Downsampling by Downsampling(share_positive=1.0, share_negative=1.0, threshold=0) for cm_sb_demog
[2020-11-20 10:10:44,274] - views.apps.model.api:422 - DEBUG - cm_sb_demog downsampled away 0
[2020-11-20 10:10:44,275] 

[2020-11-20 10:20:13,018] - views.apps.model.api:116 - DEBUG - Saving cm_sb_demog B 36 to /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_demog_B_36.joblib
[2020-11-20 10:20:28,819] - views.apps.model.api:119 - DEBUG - cm_sb_demog saved to /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_demog_B_36.joblib
[2020-11-20 10:20:28,834] - views.apps.model.api:444 - DEBUG - Fitting cm_sb_demog for period B step 38
[2020-11-20 10:20:28,846] - views.apps.model.api:413 - DEBUG - Downsampling by Downsampling(share_positive=1.0, share_negative=1.0, threshold=0) for cm_sb_demog
[2020-11-20 10:20:28,846] - views.apps.model.api:422 - DEBUG - cm_sb_demog downsampled away 0
[2020-11-20 10:20:28,847] - views.apps.model.api:424 - DEBUG - Fitting cm_sb_demog on 14796 rows
[2020-11-20 10:20:28,847] - views.apps.model.api:137 - DEBUG - Getting initial_estimator for cm_sb_demog
[2020-11-20 10:21:15,585] - views.apps.model.api:116 - DEBUG - Saving cm_sb_demog B 38 to /home/kyle/code/Views2/OpenVi

[2020-11-20 10:30:50,241] - views.apps.model.api:444 - DEBUG - Fitting cm_sb_demog for period C step 36
[2020-11-20 10:30:50,254] - views.apps.model.api:413 - DEBUG - Downsampling by Downsampling(share_positive=1.0, share_negative=1.0, threshold=0) for cm_sb_demog
[2020-11-20 10:30:50,255] - views.apps.model.api:422 - DEBUG - cm_sb_demog downsampled away 0
[2020-11-20 10:30:50,256] - views.apps.model.api:424 - DEBUG - Fitting cm_sb_demog on 17496 rows
[2020-11-20 10:30:50,257] - views.apps.model.api:137 - DEBUG - Getting initial_estimator for cm_sb_demog
[2020-11-20 10:31:39,810] - views.apps.model.api:116 - DEBUG - Saving cm_sb_demog C 36 to /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_demog_C_36.joblib
[2020-11-20 10:31:55,211] - views.apps.model.api:119 - DEBUG - cm_sb_demog saved to /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_demog_C_36.joblib
[2020-11-20 10:31:55,224] - views.apps.model.api:444 - DEBUG - Fitting cm_sb_demog for period C step 38
[2020-11-20 10:

In [None]:
for model in models:
    df_pred = model.predict(df)
    # assign_into_df takes care to only overwrite rows with actual values
    # This way we can keep all periods in the same df
    # It's also idempotent, no joining, so run as many times as you like. 
    df = assign_into_df(df_to=df, df_from=df_pred)
    
    df_pred = model.predict_calibrated(
        df=df.fillna(0), 
        period_calib=period_a,
        period_test=period_b,)
        
    df = assign_into_df(df_to=df, df_from=df_pred)
    
    df_pred = model.predict_calibrated(
        df=df.fillna(0), 
        period_calib=period_b,
        period_test=period_c,
    )
    df = assign_into_df(df_to=df, df_from=df_pred)

In [None]:
for model in models:
    model.evaluate(df)

In [None]:
for model in models:
    print(model.name)
    #print(model.scores)
    print("EVAL SCORES:")
    print(json.dumps(model.scores, indent=2))
    print("FEATURE_IMPORTANCES")
    print(json.dumps(model.extras.feature_importances, indent=2))
    print("#"*80)