In [1]:
import json
import logging
import views

logging.basicConfig(
    level=logging.DEBUG,
    #level=logging.INFO, # uncomment this and comment debug above for less yelling in red
    format=views.config.LOGFMT,
)

In [None]:
import pandas as pd

In [2]:
# DATASETS is a dictionary of Dataset objects.
from views import DATASETS
# These are the building blocks of the modelling interface
from views import Ensemble, Model, Downsampling, Period
# These are model specifications from the specfiles
from views.specs.models import cm as model_specs_cm, pgm as model_specs_pgm
from views.specs.periods import get_periods, get_periods_by_name
# Utils
from views.utils import db, io, data as datautils
from views.utils.data import assign_into_df

In [3]:
from views.apps.pipeline.models_cm import all_cm_models_by_name
from views.apps.pipeline.models_pgm import all_pgm_models_by_name

[2020-11-17 06:27:37,569] - views.utils.io:107 - DEBUG - Loading YAML from /home/kyle/code/Views2/OpenViEWS2/views/specs/periods/periods.yaml
[2020-11-17 06:27:37,619] - views.utils.io:107 - DEBUG - Loading YAML from /home/kyle/code/Views2/OpenViEWS2/views/specs/periods/periods.yaml


In [4]:
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor

In [5]:
dataset = views.DATASETS["cm_africa_imp_0"]

In [6]:
df = dataset.df

[2020-11-17 06:27:40,800] - views.utils.io:65 - DEBUG - Reading parquet at /home/kyle/code/Views2/OpenViEWS2/storage/data/datasets/cm_africa_imp_0.parquet with cols None
[2020-11-17 06:27:41,303] - views.utils.io:72 - DEBUG - Finished reading parquet from /home/kyle/code/Views2/OpenViEWS2/storage/data/datasets/cm_africa_imp_0.parquet.


In [7]:
run_id = "d_2020_04_01"

In [8]:
periods = get_periods(run_id) # as a list
periods_by_name = get_periods_by_name(run_id) # as a dict
period_a = periods_by_name["A"]
period_b = periods_by_name["B"]
period_c = periods_by_name["C"]
period_c

[2020-11-17 06:27:47,426] - views.utils.io:107 - DEBUG - Loading YAML from /home/kyle/code/Views2/OpenViEWS2/views/specs/periods/periods.yaml
[2020-11-17 06:27:47,445] - views.utils.io:107 - DEBUG - Loading YAML from /home/kyle/code/Views2/OpenViEWS2/views/specs/periods/periods.yaml


Period(name='C', train_start=121, train_end=480, predict_start=483, predict_end=520)

In [9]:
model_from_pipeline_spec = all_cm_models_by_name["cm_sb_acled_violence"]
models=[model_from_pipeline_spec]

In [10]:
for model in models:
    model.periods = periods

In [11]:
for model in models:
    model.fit_estimators(df, populate_extras = False)

[2020-11-17 06:28:30,149] - views.apps.model.api:441 - INFO - Fitting estimators for cm_sb_acled_violence
[2020-11-17 06:28:30,150] - views.apps.model.api:444 - DEBUG - Fitting cm_sb_acled_violence for period A step 1
[2020-11-17 06:28:30,160] - views.apps.model.api:413 - DEBUG - Downsampling by Downsampling(share_positive=1.0, share_negative=1.0, threshold=0) for cm_sb_acled_violence
[2020-11-17 06:28:30,161] - views.apps.model.api:422 - DEBUG - cm_sb_acled_violence downsampled away 0
[2020-11-17 06:28:30,161] - views.apps.model.api:424 - DEBUG - Fitting cm_sb_acled_violence on 12399 rows
[2020-11-17 06:28:30,161] - views.apps.model.api:137 - DEBUG - Getting initial_estimator for cm_sb_acled_violence
[2020-11-17 06:28:52,940] - views.apps.model.api:116 - DEBUG - Saving cm_sb_acled_violence A 1 to /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_acled_violence_A_1.joblib
[2020-11-17 06:29:06,176] - views.apps.model.api:119 - DEBUG - cm_sb_acled_violence saved to /home/kyle/code/V

[2020-11-17 06:33:52,390] - views.apps.model.api:137 - DEBUG - Getting initial_estimator for cm_sb_acled_violence
[2020-11-17 06:34:18,468] - views.apps.model.api:116 - DEBUG - Saving cm_sb_acled_violence A 36 to /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_acled_violence_A_36.joblib
[2020-11-17 06:34:30,271] - views.apps.model.api:119 - DEBUG - cm_sb_acled_violence saved to /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_acled_violence_A_36.joblib
[2020-11-17 06:34:30,327] - views.apps.model.api:444 - DEBUG - Fitting cm_sb_acled_violence for period A step 38
[2020-11-17 06:34:30,335] - views.apps.model.api:413 - DEBUG - Downsampling by Downsampling(share_positive=1.0, share_negative=1.0, threshold=0) for cm_sb_acled_violence
[2020-11-17 06:34:30,335] - views.apps.model.api:422 - DEBUG - cm_sb_acled_violence downsampled away 0
[2020-11-17 06:34:30,336] - views.apps.model.api:424 - DEBUG - Fitting cm_sb_acled_violence on 10541 rows
[2020-11-17 06:34:30,336] - views.apps

[2020-11-17 06:40:45,903] - views.apps.model.api:413 - DEBUG - Downsampling by Downsampling(share_positive=1.0, share_negative=1.0, threshold=0) for cm_sb_acled_violence
[2020-11-17 06:40:45,904] - views.apps.model.api:422 - DEBUG - cm_sb_acled_violence downsampled away 0
[2020-11-17 06:40:45,905] - views.apps.model.api:424 - DEBUG - Fitting cm_sb_acled_violence on 12777 rows
[2020-11-17 06:40:45,906] - views.apps.model.api:137 - DEBUG - Getting initial_estimator for cm_sb_acled_violence
[2020-11-17 06:41:14,153] - views.apps.model.api:116 - DEBUG - Saving cm_sb_acled_violence B 30 to /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_acled_violence_B_30.joblib
[2020-11-17 06:41:27,782] - views.apps.model.api:119 - DEBUG - cm_sb_acled_violence saved to /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_acled_violence_B_30.joblib
[2020-11-17 06:41:27,807] - views.apps.model.api:444 - DEBUG - Fitting cm_sb_acled_violence for period B step 36
[2020-11-17 06:41:27,815] - views.apps

[2020-11-17 06:48:59,737] - views.apps.model.api:119 - DEBUG - cm_sb_acled_violence saved to /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_acled_violence_C_18.joblib
[2020-11-17 06:48:59,752] - views.apps.model.api:444 - DEBUG - Fitting cm_sb_acled_violence for period C step 24
[2020-11-17 06:48:59,763] - views.apps.model.api:413 - DEBUG - Downsampling by Downsampling(share_positive=1.0, share_negative=1.0, threshold=0) for cm_sb_acled_violence
[2020-11-17 06:48:59,764] - views.apps.model.api:422 - DEBUG - cm_sb_acled_violence downsampled away 0
[2020-11-17 06:48:59,765] - views.apps.model.api:424 - DEBUG - Fitting cm_sb_acled_violence on 15693 rows
[2020-11-17 06:48:59,766] - views.apps.model.api:137 - DEBUG - Getting initial_estimator for cm_sb_acled_violence
[2020-11-17 06:49:34,351] - views.apps.model.api:116 - DEBUG - Saving cm_sb_acled_violence C 24 to /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_acled_violence_C_24.joblib
[2020-11-17 06:49:55,239] - views.apps

In [12]:
for model in models:
    
    # Uncalibrated predictions
    df_pred = model.predict(df)
    # assign_into_df takes care to only overwrite rows with actual values
    # This way we can keep all periods in the same df
    # It's also idempotent, no joining, so run as many times as you like. 
    df = assign_into_df(df_to=df, df_from=df_pred)
    
    # Calibrated predictions
    

[2020-11-17 06:53:53,516] - views.apps.model.api:552 - INFO - Predicting for cm_sb_acled_violence
[2020-11-17 06:53:53,517] - views.apps.model.api:553 - DEBUG - Predicting for cm_sb_acled_violence periods: [Period(name='A', train_start=121, train_end=396, predict_start=397, predict_end=432), Period(name='B', train_start=121, train_end=432, predict_start=433, predict_end=468), Period(name='C', train_start=121, train_end=480, predict_start=483, predict_end=520)]
[2020-11-17 06:53:53,565] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_acled_violence_A_1.joblib
[2020-11-17 06:54:01,426] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_acled_violence_A_3.joblib
[2020-11-17 06:54:09,109] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_acled_violence_A_6.joblib
[2020-11-17 06:54:17,399] - views.apps.model.api:125 - DEBUG - Loading /home/ky

[2020-11-17 06:59:44,748] - views.apps.model.api:553 - DEBUG - Predicting for cm_sb_acled_violence periods: [Period(name='B', train_start=121, train_end=432, predict_start=433, predict_end=468)]
[2020-11-17 06:59:44,769] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_acled_violence_B_1.joblib
[2020-11-17 06:59:53,274] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_acled_violence_B_3.joblib
[2020-11-17 07:00:02,438] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_acled_violence_B_6.joblib
[2020-11-17 07:00:12,514] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_acled_violence_B_9.joblib
[2020-11-17 07:00:21,205] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_acled_violence_B_12.joblib
[2020-11-17 07:00:30,109] - views.apps.model

RuntimeError: Missing values in s_calib_pred

In [21]:
for model in models:

    df_pred = model.predict_calibrated(
        df=df.fillna(0), 
        period_calib=period_a,
        period_test=period_b,)
        
    df = assign_into_df(df_to=df, df_from=df_pred)
   

[2020-11-17 07:09:13,907] - views.apps.model.api:580 - INFO - Predicting calibrated for cm_sb_acled_violence period_calib: A period_test: B 
[2020-11-17 07:09:13,908] - views.apps.model.api:552 - INFO - Predicting for cm_sb_acled_violence
[2020-11-17 07:09:13,908] - views.apps.model.api:553 - DEBUG - Predicting for cm_sb_acled_violence periods: [Period(name='A', train_start=121, train_end=396, predict_start=397, predict_end=432)]
[2020-11-17 07:09:13,957] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_acled_violence_A_1.joblib
[2020-11-17 07:09:21,742] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_acled_violence_A_3.joblib
[2020-11-17 07:09:30,376] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_acled_violence_A_6.joblib
[2020-11-17 07:09:37,792] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/stora

In [24]:
for model in models:
    df_pred = model.predict_calibrated(
        df=df.fillna(0), 
        period_calib=period_b,
        period_test=period_c,
    )
    df = assign_into_df(df_to=df, df_from=df_pred)

[2020-11-17 07:16:11,192] - views.apps.model.api:580 - INFO - Predicting calibrated for cm_sb_acled_violence period_calib: B period_test: C 
[2020-11-17 07:16:11,193] - views.apps.model.api:552 - INFO - Predicting for cm_sb_acled_violence
[2020-11-17 07:16:11,193] - views.apps.model.api:553 - DEBUG - Predicting for cm_sb_acled_violence periods: [Period(name='B', train_start=121, train_end=432, predict_start=433, predict_end=468)]
[2020-11-17 07:16:11,242] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_acled_violence_B_1.joblib
[2020-11-17 07:16:18,684] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_acled_violence_B_3.joblib
[2020-11-17 07:16:25,994] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_acled_violence_B_6.joblib
[2020-11-17 07:16:33,023] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/stora

In [25]:
for model in models:
    model.evaluate(df)

[2020-11-17 07:21:51,643] - views.apps.model.api:966 - INFO - Evaluating cm_sb_acled_violence
[2020-11-17 07:21:51,644] - views.apps.model.api:970 - DEBUG - Evaluating uncalibrated predictions for cm_sb_acled_violence period A step-combined
[2020-11-17 07:21:51,673] - views.apps.model.api:1009 - DEBUG - Evaluating uncalibrated predictions for cm_sb_acled_violence period A step 1
[2020-11-17 07:21:51,700] - views.apps.model.api:1009 - DEBUG - Evaluating uncalibrated predictions for cm_sb_acled_violence period A step 3
[2020-11-17 07:21:51,731] - views.apps.model.api:1009 - DEBUG - Evaluating uncalibrated predictions for cm_sb_acled_violence period A step 6
[2020-11-17 07:21:51,762] - views.apps.model.api:1009 - DEBUG - Evaluating uncalibrated predictions for cm_sb_acled_violence period A step 9
[2020-11-17 07:21:51,791] - views.apps.model.api:1009 - DEBUG - Evaluating uncalibrated predictions for cm_sb_acled_violence period A step 12
[2020-11-17 07:21:51,818] - views.apps.model.api:1009

In [26]:
for model in models:
    print(model.name)
    #print(model.scores)
    print("EVAL SCORES:")
    print(json.dumps(model.scores, indent=2))
    print("FEATURE_IMPORTANCES")
    print(json.dumps(model.extras.feature_importances, indent=2))
    print("#"*80)
    

cm_sb_acled_violence
EVAL SCORES:
{
  "A": {
    "1": {
      "uncalibrated": {
        "average_precision": 0.41147628964023286,
        "area_under_roc": 0.8112059021362982,
        "brier": 0.07194919884907985
      },
      "calibrated": {}
    },
    "3": {
      "uncalibrated": {
        "average_precision": 0.41272729663913355,
        "area_under_roc": 0.8484199184925691,
        "brier": 0.06970944530836402
      },
      "calibrated": {}
    },
    "6": {
      "uncalibrated": {
        "average_precision": 0.4420809911773421,
        "area_under_roc": 0.856826323430495,
        "brier": 0.06731962984389339
      },
      "calibrated": {}
    },
    "9": {
      "uncalibrated": {
        "average_precision": 0.43243737736566623,
        "area_under_roc": 0.8569769813032058,
        "brier": 0.0681970729980068
      },
      "calibrated": {}
    },
    "12": {
      "uncalibrated": {
        "average_precision": 0.37693860428396264,
        "area_under_roc": 0.8336204456429439

In [28]:
cols_predict = [model_from_pipeline_spec.col_sc_calibrated for model in models]
cols_predict
df_results=df.loc[period_c.times_predict, cols_predict]
df_results.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,sc_cm_sb_acled_violence_calibrated
month_id,country_id,Unnamed: 2_level_1
483,40,9.607909e-07
483,41,0.3226727
483,42,0.02818018
