In [1]:
import json
import logging
import views

logging.basicConfig(
    level=logging.DEBUG,
    #level=logging.INFO, # uncomment this and comment debug above for less yelling in red
    format=views.config.LOGFMT,
)

In [2]:
# DATASETS is a dictionary of Dataset objects.
from views import DATASETS
# These are the building blocks of the modelling interface
from views import Ensemble, Model, Downsampling, Period
# These are model specifications from the specfiles
from views.specs.models import cm as model_specs_cm, pgm as model_specs_pgm
from views.specs.periods import get_periods, get_periods_by_name
# Utils
from views.utils import db, io, data as datautils
from views.utils.data import assign_into_df

In [3]:
# These are the core models defined in the ViEWS pipeline
# These are defined in 
from views.apps.pipeline.models_cm import all_cm_models_by_name
from views.apps.pipeline.models_pgm import all_pgm_models_by_name

[2020-11-17 05:33:39,258] - views.utils.io:107 - DEBUG - Loading YAML from /home/kyle/code/Views2/OpenViEWS2/views/specs/periods/periods.yaml
[2020-11-17 05:33:39,288] - views.utils.io:107 - DEBUG - Loading YAML from /home/kyle/code/Views2/OpenViEWS2/views/specs/periods/periods.yaml


In [4]:
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor

In [5]:
# Show the available datasets we have specified
for name, dataset in DATASETS.items():
    print(name)
    

cm_global_imp_0
cm_global_imp_1
cm_global_imp_2
cm_global_imp_3
cm_global_imp_4
cm_africa_imp_0
cm_africa_imp_1
cm_africa_imp_2
cm_africa_imp_3
cm_africa_imp_4
pgm_global_imp_0
pgm_global_imp_1
pgm_global_imp_2
pgm_global_imp_3
pgm_global_imp_4
pgm_africa_imp_0
pgm_africa_imp_1
pgm_africa_imp_2
pgm_africa_imp_3
pgm_africa_imp_4


In [6]:
dataset = views.DATASETS["cm_africa_imp_0"]

In [7]:
df = dataset.df
df.head()

[2020-11-17 05:33:48,544] - views.utils.io:65 - DEBUG - Reading parquet at /home/kyle/code/Views2/OpenViEWS2/storage/data/datasets/cm_africa_imp_0.parquet with cols None
[2020-11-17 05:33:49,042] - views.utils.io:72 - DEBUG - Finished reading parquet from /home/kyle/code/Views2/OpenViEWS2/storage/data/datasets/cm_africa_imp_0.parquet.


Unnamed: 0_level_0,Unnamed: 1_level_0,acled_count_ns,acled_count_os,acled_count_pr,acled_count_sb,acled_dummy_ns,acled_dummy_os,acled_dummy_pr,acled_dummy_sb,cdum_1,cdum_10,...,wdi_tx_val_trvl_zs_wt,wdi_vc_btl_deth,wdi_vc_idp_nwcv,wdi_vc_idp_nwds,wdi_vc_idp_tocv,wdi_vc_ihr_psrc_fe_p5,wdi_vc_ihr_psrc_ma_p5,wdi_vc_ihr_psrc_p5,wdi_vc_pkp_totl_un,year
month_id,country_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1,40,0.0,0.0,0.0,0.0,0,0,0,0,0,0,...,4.124475,814.0,23000.0,2500.0,2100.0,2.8,18.1,3.3,871.0,1980
1,41,0.0,0.0,0.0,0.0,0,0,0,0,0,0,...,10.417314,8.0,500.0,10000.0,342000.0,1.3,18.7,10.9,8536.0,1980
1,42,0.0,0.0,0.0,0.0,0,0,0,0,0,0,...,0.391773,53.857143,5000.0,14000.0,5000.0,3.0,15.8,2.2,15.0,1980
1,43,0.0,0.0,0.0,0.0,0,0,0,0,0,0,...,38.167939,27.0,10000.0,1000.0,23000.0,1.2,0.2,2.8,10947.0,1980
1,47,0.0,0.0,0.0,0.0,0,0,0,0,0,0,...,75.148042,43.0,700.0,28000.0,700.0,2.4,4.4,0.6,930.0,1980


In [8]:
run_id = "d_2020_04_01"

In [10]:
periods = get_periods(run_id) # as a list
periods_by_name = get_periods_by_name(run_id) # as a dict
period_a = periods_by_name["A"]
period_b = periods_by_name["B"]
period_c = periods_by_name["C"]
periods_by_name


[2020-11-17 05:35:05,435] - views.utils.io:107 - DEBUG - Loading YAML from /home/kyle/code/Views2/OpenViEWS2/views/specs/periods/periods.yaml
[2020-11-17 05:35:05,478] - views.utils.io:107 - DEBUG - Loading YAML from /home/kyle/code/Views2/OpenViEWS2/views/specs/periods/periods.yaml


{'A': Period(name='A', train_start=121, train_end=396, predict_start=397, predict_end=432),
 'B': Period(name='B', train_start=121, train_end=432, predict_start=433, predict_end=468),
 'C': Period(name='C', train_start=121, train_end=480, predict_start=483, predict_end=520)}

In [11]:
# Models can also be loaded from the definition in the pipeline
print(all_cm_models_by_name.keys())
model_from_pipeline_spec = all_cm_models_by_name["cm_sb_cflong"]
# # Just printing the object should show everything we care about
model_from_pipeline_spec

dict_keys(['cm_sb_vdem_global', 'cm_sb_wdi_global', 'cm_sb_reign_global', 'cm_sb_all_global', 'cm_ns_vdem_global', 'cm_ns_wdi_global', 'cm_ns_reign_global', 'cm_ns_all_global', 'cm_os_vdem_global', 'cm_os_wdi_global', 'cm_os_reign_global', 'cm_os_all_global', 'cm_sb_all_dummy', 'cm_sb_all_25', 'cm_sb_acled_violence', 'cm_sb_acled_protest', 'cm_sb_cfshort', 'cm_sb_cflong', 'cm_sb_demog', 'cm_sb_demog_conf', 'cm_sb_econ', 'cm_sb_econ_conf', 'cm_sb_inst', 'cm_sb_inst_conf', 'cm_sb_icgcw', 'cm_sb_icgcw_conf', 'cm_sb_neibhist', 'cm_sb_reign', 'cm_sb_reign_conf', 'cm_sb_reign_drought', 'cm_sb_reign_drought_conf', 'cm_sb_reign_coups', 'cm_sb_reign_coups_conf', 'cm_sb_vdem', 'cm_sb_vdem_conf', 'cm_sb_vdem_high', 'cm_ns_all_dummy', 'cm_ns_all_25', 'cm_ns_acled_violence', 'cm_ns_acled_protest', 'cm_ns_cfshort', 'cm_ns_cflong', 'cm_ns_demog', 'cm_ns_demog_conf', 'cm_ns_econ', 'cm_ns_econ_conf', 'cm_ns_inst', 'cm_ns_inst_conf', 'cm_ns_icgcw', 'cm_ns_neibhist', 'cm_ns_reign', 'cm_ns_reign_conf', 'c

{
  "name": "cm_sb_cflong",
  "col_outcome": "greq_25_ged_best_sb",
  "cols_features": [
    "time_since_ged_dummy_ns",
    "time_since_ged_dummy_os",
    "time_since_ged_dummy_sb",
    "time_since_greq_100_ged_best_ns",
    "time_since_greq_100_ged_best_os",
    "time_since_greq_100_ged_best_sb",
    "time_since_greq_100_splag_1_1_ged_best_ns",
    "time_since_greq_100_splag_1_1_ged_best_os",
    "time_since_greq_100_splag_1_1_ged_best_sb",
    "time_since_greq_25_ged_best_ns",
    "time_since_greq_25_ged_best_os",
    "time_since_greq_25_ged_best_sb",
    "time_since_greq_25_splag_1_1_ged_best_ns",
    "time_since_greq_25_splag_1_1_ged_best_os",
    "time_since_greq_25_splag_1_1_ged_best_sb",
    "time_since_greq_500_ged_best_ns",
    "time_since_greq_500_ged_best_os",
    "time_since_greq_500_ged_best_sb",
    "time_since_greq_500_splag_1_1_ged_best_ns",
    "time_since_greq_500_splag_1_1_ged_best_os",
    "time_since_greq_500_splag_1_1_ged_best_sb",
    "time_since_greq_5_ged_best_

In [13]:
models=[model_from_pipeline_spec]

In [14]:
for model in models:
    model.fit_estimators(df, populate_extras = False)

[2020-11-17 05:35:50,011] - views.apps.model.api:441 - INFO - Fitting estimators for cm_sb_cflong
[2020-11-17 05:35:50,013] - views.apps.model.api:444 - DEBUG - Fitting cm_sb_cflong for period A step 1
[2020-11-17 05:35:50,095] - views.apps.model.api:413 - DEBUG - Downsampling by Downsampling(share_positive=1.0, share_negative=1.0, threshold=0) for cm_sb_cflong
[2020-11-17 05:35:50,095] - views.apps.model.api:422 - DEBUG - cm_sb_cflong downsampled away 0
[2020-11-17 05:35:50,096] - views.apps.model.api:424 - DEBUG - Fitting cm_sb_cflong on 14850 rows
[2020-11-17 05:35:50,096] - views.apps.model.api:137 - DEBUG - Getting initial_estimator for cm_sb_cflong
[2020-11-17 05:36:40,970] - views.apps.model.api:116 - DEBUG - Saving cm_sb_cflong A 1 to /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_cflong_A_1.joblib
[2020-11-17 05:36:53,835] - views.apps.model.api:119 - DEBUG - cm_sb_cflong saved to /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_cflong_A_1.joblib
[2020-11-17 05:3

[2020-11-17 05:45:38,966] - views.apps.model.api:413 - DEBUG - Downsampling by Downsampling(share_positive=1.0, share_negative=1.0, threshold=0) for cm_sb_cflong
[2020-11-17 05:45:38,967] - views.apps.model.api:422 - DEBUG - cm_sb_cflong downsampled away 0
[2020-11-17 05:45:38,967] - views.apps.model.api:424 - DEBUG - Fitting cm_sb_cflong on 12852 rows
[2020-11-17 05:45:38,968] - views.apps.model.api:137 - DEBUG - Getting initial_estimator for cm_sb_cflong
[2020-11-17 05:46:25,810] - views.apps.model.api:116 - DEBUG - Saving cm_sb_cflong A 38 to /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_cflong_A_38.joblib
[2020-11-17 05:46:39,894] - views.apps.model.api:119 - DEBUG - cm_sb_cflong saved to /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_cflong_A_38.joblib
[2020-11-17 05:46:39,911] - views.apps.model.api:444 - DEBUG - Fitting cm_sb_cflong for period B step 1
[2020-11-17 05:46:39,994] - views.apps.model.api:413 - DEBUG - Downsampling by Downsampling(share_positive=1.0,

[2020-11-17 05:57:06,390] - views.apps.model.api:424 - DEBUG - Fitting cm_sb_cflong on 14904 rows
[2020-11-17 05:57:06,391] - views.apps.model.api:137 - DEBUG - Getting initial_estimator for cm_sb_cflong
[2020-11-17 05:58:01,722] - views.apps.model.api:116 - DEBUG - Saving cm_sb_cflong B 36 to /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_cflong_B_36.joblib
[2020-11-17 05:58:18,631] - views.apps.model.api:119 - DEBUG - cm_sb_cflong saved to /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_cflong_B_36.joblib
[2020-11-17 05:58:18,647] - views.apps.model.api:444 - DEBUG - Fitting cm_sb_cflong for period B step 38
[2020-11-17 05:58:18,734] - views.apps.model.api:413 - DEBUG - Downsampling by Downsampling(share_positive=1.0, share_negative=1.0, threshold=0) for cm_sb_cflong
[2020-11-17 05:58:18,735] - views.apps.model.api:422 - DEBUG - cm_sb_cflong downsampled away 0
[2020-11-17 05:58:18,735] - views.apps.model.api:424 - DEBUG - Fitting cm_sb_cflong on 14796 rows
[2020-11-17 

[2020-11-17 06:12:35,815] - views.apps.model.api:116 - DEBUG - Saving cm_sb_cflong C 30 to /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_cflong_C_30.joblib
[2020-11-17 06:12:55,214] - views.apps.model.api:119 - DEBUG - cm_sb_cflong saved to /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_cflong_C_30.joblib
[2020-11-17 06:12:55,227] - views.apps.model.api:444 - DEBUG - Fitting cm_sb_cflong for period C step 36
[2020-11-17 06:12:55,322] - views.apps.model.api:413 - DEBUG - Downsampling by Downsampling(share_positive=1.0, share_negative=1.0, threshold=0) for cm_sb_cflong
[2020-11-17 06:12:55,323] - views.apps.model.api:422 - DEBUG - cm_sb_cflong downsampled away 0
[2020-11-17 06:12:55,323] - views.apps.model.api:424 - DEBUG - Fitting cm_sb_cflong on 17496 rows
[2020-11-17 06:12:55,324] - views.apps.model.api:137 - DEBUG - Getting initial_estimator for cm_sb_cflong
[2020-11-17 06:14:11,375] - views.apps.model.api:116 - DEBUG - Saving cm_sb_cflong C 36 to /home/kyle/code/Vie

In [15]:
for model in models:
    
    # Uncalibrated predictions
    df_pred = model.predict(df)
    # assign_into_df takes care to only overwrite rows with actual values
    # This way we can keep all periods in the same df
    # It's also idempotent, no joining, so run as many times as you like. 
    df = assign_into_df(df_to=df, df_from=df_pred)
    
    # Calibrated predictions
    

[2020-11-17 06:16:16,344] - views.apps.model.api:552 - INFO - Predicting for cm_sb_cflong
[2020-11-17 06:16:16,350] - views.apps.model.api:553 - DEBUG - Predicting for cm_sb_cflong periods: [Period(name='A', train_start=121, train_end=396, predict_start=397, predict_end=432), Period(name='B', train_start=121, train_end=432, predict_start=433, predict_end=468), Period(name='C', train_start=121, train_end=480, predict_start=483, predict_end=520)]
[2020-11-17 06:16:16,397] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_cflong_A_1.joblib
[2020-11-17 06:16:24,397] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_cflong_A_3.joblib
[2020-11-17 06:16:32,113] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_cflong_A_6.joblib
[2020-11-17 06:16:39,920] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models

In [16]:
for model in models:
    df_pred = model.predict_calibrated(
        df=df, 
        period_calib=period_a,
        period_test=period_b,
    )
    df = assign_into_df(df_to=df, df_from=df_pred)

    df_pred = model.predict_calibrated(
        df=df, 
        period_calib=period_b,
        period_test=period_c,
    )
    df = assign_into_df(df_to=df, df_from=df_pred)

[2020-11-17 06:21:16,971] - views.apps.model.api:580 - INFO - Predicting calibrated for cm_sb_cflong period_calib: A period_test: B 
[2020-11-17 06:21:16,976] - views.apps.model.api:552 - INFO - Predicting for cm_sb_cflong
[2020-11-17 06:21:16,978] - views.apps.model.api:553 - DEBUG - Predicting for cm_sb_cflong periods: [Period(name='A', train_start=121, train_end=396, predict_start=397, predict_end=432)]
[2020-11-17 06:21:17,014] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_cflong_A_1.joblib
[2020-11-17 06:21:24,425] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_cflong_A_3.joblib
[2020-11-17 06:21:31,536] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_cflong_A_6.joblib
[2020-11-17 06:21:39,439] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_cflong_A_9.joblib
[2020-11-17 06

[2020-11-17 06:25:43,543] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_cflong_C_9.joblib
[2020-11-17 06:25:55,596] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_cflong_C_12.joblib
[2020-11-17 06:26:05,357] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_cflong_C_18.joblib
[2020-11-17 06:26:13,040] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_cflong_C_24.joblib
[2020-11-17 06:26:20,767] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_cflong_C_30.joblib
[2020-11-17 06:26:28,585] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_cflong_C_36.joblib
[2020-11-17 06:26:36,355] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_cflon

In [17]:
# Evaluate all models
for model in models:
    model.evaluate(df)

[2020-11-17 06:26:52,678] - views.apps.model.api:966 - INFO - Evaluating cm_sb_cflong
[2020-11-17 06:26:52,679] - views.apps.model.api:970 - DEBUG - Evaluating uncalibrated predictions for cm_sb_cflong period A step-combined
[2020-11-17 06:26:52,715] - views.apps.model.api:1009 - DEBUG - Evaluating uncalibrated predictions for cm_sb_cflong period A step 1
[2020-11-17 06:26:52,745] - views.apps.model.api:1009 - DEBUG - Evaluating uncalibrated predictions for cm_sb_cflong period A step 3
[2020-11-17 06:26:52,773] - views.apps.model.api:1009 - DEBUG - Evaluating uncalibrated predictions for cm_sb_cflong period A step 6
[2020-11-17 06:26:52,801] - views.apps.model.api:1009 - DEBUG - Evaluating uncalibrated predictions for cm_sb_cflong period A step 9
[2020-11-17 06:26:52,829] - views.apps.model.api:1009 - DEBUG - Evaluating uncalibrated predictions for cm_sb_cflong period A step 12
[2020-11-17 06:26:52,857] - views.apps.model.api:1009 - DEBUG - Evaluating uncalibrated predictions for cm_sb

In [18]:
for model in models:
    print(model.name)
    #print(model.scores)
    print("EVAL SCORES:")
    print(json.dumps(model.scores, indent=2))
    print("FEATURE_IMPORTANCES")
    print(json.dumps(model.extras.feature_importances, indent=2))
    print("#"*80)

cm_sb_cflong
EVAL SCORES:
{
  "A": {
    "1": {
      "uncalibrated": {
        "average_precision": 0.7753302223345409,
        "area_under_roc": 0.9622183534768489,
        "brier": 0.04287270302816865
      },
      "calibrated": {}
    },
    "3": {
      "uncalibrated": {
        "average_precision": 0.7608933496197855,
        "area_under_roc": 0.9578827546955035,
        "brier": 0.0438983806795708
      },
      "calibrated": {}
    },
    "6": {
      "uncalibrated": {
        "average_precision": 0.7449910454360148,
        "area_under_roc": 0.952128841395305,
        "brier": 0.04523789942359755
      },
      "calibrated": {}
    },
    "9": {
      "uncalibrated": {
        "average_precision": 0.7339526510022671,
        "area_under_roc": 0.947549755142912,
        "brier": 0.046662981287976135
      },
      "calibrated": {}
    },
    "12": {
      "uncalibrated": {
        "average_precision": 0.7280038113379804,
        "area_under_roc": 0.9405860134709443,
        "b

In [19]:
cols_predict = [model.col_sc_calibrated for model in models]

In [20]:
df_results=df.loc[period_c.times_predict, cols_predict]

In [21]:
df_results.to_csv("cflong_results.csv")