In [2]:
import json
import logging
import views

logging.basicConfig(
    level=logging.DEBUG,
    #level=logging.INFO, # uncomment this and comment debug above for less yelling in red
    format=views.config.LOGFMT,
)

In [3]:
import pandas as pd

In [4]:
# DATASETS is a dictionary of Dataset objects.
from views import DATASETS
# These are the building blocks of the modelling interface
from views import Ensemble, Model, Downsampling, Period
# These are model specifications from the specfiles
from views.specs.models import cm as model_specs_cm, pgm as model_specs_pgm
from views.specs.periods import get_periods, get_periods_by_name
# Utils
from views.utils import db, io, data as datautils
from views.utils.data import assign_into_df

In [5]:
# These are defined in 
from views.apps.pipeline.models_cm import all_cm_models_by_name
from views.apps.pipeline.models_pgm import all_pgm_models_by_name

[2020-11-19 09:19:41,335] - views.utils.io:107 - DEBUG - Loading YAML from /home/kyle/code/Views2/OpenViEWS2/views/specs/periods/periods.yaml
[2020-11-19 09:19:41,376] - views.utils.io:107 - DEBUG - Loading YAML from /home/kyle/code/Views2/OpenViEWS2/views/specs/periods/periods.yaml


In [6]:
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor

In [7]:
dataset = views.DATASETS["cm_africa_imp_0"]
df = dataset.df

[2020-11-19 09:19:44,034] - views.utils.io:65 - DEBUG - Reading parquet at /home/kyle/code/Views2/OpenViEWS2/storage/data/datasets/cm_africa_imp_0.parquet with cols None
[2020-11-19 09:19:44,573] - views.utils.io:72 - DEBUG - Finished reading parquet from /home/kyle/code/Views2/OpenViEWS2/storage/data/datasets/cm_africa_imp_0.parquet.


In [8]:
type(df)

pandas.core.frame.DataFrame

In [9]:
run_id = "d_2020_04_01"

In [10]:
periods = get_periods(run_id) # as a list
periods_by_name = get_periods_by_name(run_id)# as a dict

period_a = periods_by_name["A"]

period_b = periods_by_name["B"]
period_c = periods_by_name["C"]
period_a

[2020-11-19 09:19:48,753] - views.utils.io:107 - DEBUG - Loading YAML from /home/kyle/code/Views2/OpenViEWS2/views/specs/periods/periods.yaml
[2020-11-19 09:19:48,781] - views.utils.io:107 - DEBUG - Loading YAML from /home/kyle/code/Views2/OpenViEWS2/views/specs/periods/periods.yaml


Period(name='A', train_start=121, train_end=396, predict_start=397, predict_end=432)

In [11]:
model_from_pipeline_spec = all_cm_models_by_name["cm_sb_acled_violence"]
model_from_pipeline_spec1 = all_cm_models_by_name["cm_sb_cflong"]

models=[model_from_pipeline_spec1,model_from_pipeline_spec ]
for model in models:
    model.periods = periods

In [12]:
cflong_acled_violence_ensemble = Ensemble(
    name="cflong_acled_violence_ensemble", 
    models=models, 
    outcome_type="prob", 
    col_outcome="greq_25_ged_best_sb", 
    method="average", 
    periods=periods
)
ensembles = [cflong_acled_violence_ensemble]


In [13]:
model_from_pipeline_spec

{
  "name": "cm_sb_acled_violence",
  "col_outcome": "greq_25_ged_best_sb",
  "cols_features": [
    "splag_1_1_acled_count_ns",
    "splag_1_1_acled_count_os",
    "splag_1_1_acled_count_sb",
    "time_since_acled_dummy_ns",
    "time_since_acled_dummy_os",
    "time_since_acled_dummy_sb",
    "time_since_splag_1_1_acled_dummy_ns",
    "time_since_splag_1_1_acled_dummy_os",
    "time_since_splag_1_1_acled_dummy_sb"
  ],
  "steps": [
    1,
    3,
    6,
    9,
    12,
    18,
    24,
    30,
    36,
    38
  ],
  "periods": [
    {
      "name": "A",
      "train_start": 121,
      "train_end": 396,
      "predict_start": 397,
      "predict_end": 432
    },
    {
      "name": "B",
      "train_start": 121,
      "train_end": 432,
      "predict_start": 433,
      "predict_end": 468
    },
    {
      "name": "C",
      "train_start": 121,
      "train_end": 480,
      "predict_start": 483,
      "predict_end": 520
    }
  ],
  "outcome_type": "prob",
  "estimators": {
    "name": "c

In [14]:
for model in models:
    model.periods = periods

In [15]:
for model in models:
    model.fit_estimators(df, populate_extras = False)

[2020-11-19 09:19:59,050] - views.apps.model.api:441 - INFO - Fitting estimators for cm_sb_cflong
[2020-11-19 09:19:59,051] - views.apps.model.api:444 - DEBUG - Fitting cm_sb_cflong for period A step 1
[2020-11-19 09:19:59,175] - views.apps.model.api:413 - DEBUG - Downsampling by Downsampling(share_positive=1.0, share_negative=1.0, threshold=0) for cm_sb_cflong
[2020-11-19 09:19:59,175] - views.apps.model.api:422 - DEBUG - cm_sb_cflong downsampled away 0
[2020-11-19 09:19:59,176] - views.apps.model.api:424 - DEBUG - Fitting cm_sb_cflong on 14850 rows
[2020-11-19 09:19:59,176] - views.apps.model.api:137 - DEBUG - Getting initial_estimator for cm_sb_cflong
[2020-11-19 09:20:56,158] - views.apps.model.api:116 - DEBUG - Saving cm_sb_cflong A 1 to /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_cflong_A_1.joblib
[2020-11-19 09:21:08,916] - views.apps.model.api:119 - DEBUG - cm_sb_cflong saved to /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_cflong_A_1.joblib
[2020-11-19 09:2

[2020-11-19 09:29:35,669] - views.apps.model.api:413 - DEBUG - Downsampling by Downsampling(share_positive=1.0, share_negative=1.0, threshold=0) for cm_sb_cflong
[2020-11-19 09:29:35,670] - views.apps.model.api:422 - DEBUG - cm_sb_cflong downsampled away 0
[2020-11-19 09:29:35,670] - views.apps.model.api:424 - DEBUG - Fitting cm_sb_cflong on 12852 rows
[2020-11-19 09:29:35,670] - views.apps.model.api:137 - DEBUG - Getting initial_estimator for cm_sb_cflong
[2020-11-19 09:30:28,642] - views.apps.model.api:116 - DEBUG - Saving cm_sb_cflong A 38 to /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_cflong_A_38.joblib
[2020-11-19 09:30:44,096] - views.apps.model.api:119 - DEBUG - cm_sb_cflong saved to /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_cflong_A_38.joblib
[2020-11-19 09:30:44,120] - views.apps.model.api:444 - DEBUG - Fitting cm_sb_cflong for period B step 1
[2020-11-19 09:30:44,219] - views.apps.model.api:413 - DEBUG - Downsampling by Downsampling(share_positive=1.0,

[2020-11-19 09:41:09,610] - views.apps.model.api:424 - DEBUG - Fitting cm_sb_cflong on 14904 rows
[2020-11-19 09:41:09,610] - views.apps.model.api:137 - DEBUG - Getting initial_estimator for cm_sb_cflong
[2020-11-19 09:42:01,895] - views.apps.model.api:116 - DEBUG - Saving cm_sb_cflong B 36 to /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_cflong_B_36.joblib
[2020-11-19 09:42:17,059] - views.apps.model.api:119 - DEBUG - cm_sb_cflong saved to /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_cflong_B_36.joblib
[2020-11-19 09:42:17,076] - views.apps.model.api:444 - DEBUG - Fitting cm_sb_cflong for period B step 38
[2020-11-19 09:42:17,166] - views.apps.model.api:413 - DEBUG - Downsampling by Downsampling(share_positive=1.0, share_negative=1.0, threshold=0) for cm_sb_cflong
[2020-11-19 09:42:17,166] - views.apps.model.api:422 - DEBUG - cm_sb_cflong downsampled away 0
[2020-11-19 09:42:17,167] - views.apps.model.api:424 - DEBUG - Fitting cm_sb_cflong on 14796 rows
[2020-11-19 

[2020-11-19 09:54:47,765] - views.apps.model.api:116 - DEBUG - Saving cm_sb_cflong C 30 to /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_cflong_C_30.joblib
[2020-11-19 09:55:06,490] - views.apps.model.api:119 - DEBUG - cm_sb_cflong saved to /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_cflong_C_30.joblib
[2020-11-19 09:55:06,548] - views.apps.model.api:444 - DEBUG - Fitting cm_sb_cflong for period C step 36
[2020-11-19 09:55:06,686] - views.apps.model.api:413 - DEBUG - Downsampling by Downsampling(share_positive=1.0, share_negative=1.0, threshold=0) for cm_sb_cflong
[2020-11-19 09:55:06,688] - views.apps.model.api:422 - DEBUG - cm_sb_cflong downsampled away 0
[2020-11-19 09:55:06,689] - views.apps.model.api:424 - DEBUG - Fitting cm_sb_cflong on 17496 rows
[2020-11-19 09:55:06,690] - views.apps.model.api:137 - DEBUG - Getting initial_estimator for cm_sb_cflong
[2020-11-19 09:56:18,416] - views.apps.model.api:116 - DEBUG - Saving cm_sb_cflong C 36 to /home/kyle/code/Vie

[2020-11-19 10:01:45,712] - views.apps.model.api:422 - DEBUG - cm_sb_acled_violence downsampled away 0
[2020-11-19 10:01:45,712] - views.apps.model.api:424 - DEBUG - Fitting cm_sb_acled_violence on 11199 rows
[2020-11-19 10:01:45,713] - views.apps.model.api:137 - DEBUG - Getting initial_estimator for cm_sb_acled_violence
[2020-11-19 10:02:09,726] - views.apps.model.api:116 - DEBUG - Saving cm_sb_acled_violence A 24 to /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_acled_violence_A_24.joblib
[2020-11-19 10:02:21,323] - views.apps.model.api:119 - DEBUG - cm_sb_acled_violence saved to /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_acled_violence_A_24.joblib
[2020-11-19 10:02:21,345] - views.apps.model.api:444 - DEBUG - Fitting cm_sb_acled_violence for period A step 30
[2020-11-19 10:02:21,353] - views.apps.model.api:413 - DEBUG - Downsampling by Downsampling(share_positive=1.0, share_negative=1.0, threshold=0) for cm_sb_acled_violence
[2020-11-19 10:02:21,353] - views.apps

[2020-11-19 10:07:56,137] - views.apps.model.api:444 - DEBUG - Fitting cm_sb_acled_violence for period B step 18
[2020-11-19 10:07:56,150] - views.apps.model.api:413 - DEBUG - Downsampling by Downsampling(share_positive=1.0, share_negative=1.0, threshold=0) for cm_sb_acled_violence
[2020-11-19 10:07:56,150] - views.apps.model.api:422 - DEBUG - cm_sb_acled_violence downsampled away 0
[2020-11-19 10:07:56,151] - views.apps.model.api:424 - DEBUG - Fitting cm_sb_acled_violence on 13425 rows
[2020-11-19 10:07:56,151] - views.apps.model.api:137 - DEBUG - Getting initial_estimator for cm_sb_acled_violence
[2020-11-19 10:08:28,642] - views.apps.model.api:116 - DEBUG - Saving cm_sb_acled_violence B 18 to /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_acled_violence_B_18.joblib
[2020-11-19 10:08:45,251] - views.apps.model.api:119 - DEBUG - cm_sb_acled_violence saved to /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_acled_violence_B_18.joblib
[2020-11-19 10:08:45,272] - views.apps

[2020-11-19 10:16:02,114] - views.apps.model.api:116 - DEBUG - Saving cm_sb_acled_violence C 9 to /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_acled_violence_C_9.joblib
[2020-11-19 10:16:20,895] - views.apps.model.api:119 - DEBUG - cm_sb_acled_violence saved to /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_acled_violence_C_9.joblib
[2020-11-19 10:16:20,909] - views.apps.model.api:444 - DEBUG - Fitting cm_sb_acled_violence for period C step 12
[2020-11-19 10:16:20,918] - views.apps.model.api:413 - DEBUG - Downsampling by Downsampling(share_positive=1.0, share_negative=1.0, threshold=0) for cm_sb_acled_violence
[2020-11-19 10:16:20,919] - views.apps.model.api:422 - DEBUG - cm_sb_acled_violence downsampled away 0
[2020-11-19 10:16:20,919] - views.apps.model.api:424 - DEBUG - Fitting cm_sb_acled_violence on 16341 rows
[2020-11-19 10:16:20,920] - views.apps.model.api:137 - DEBUG - Getting initial_estimator for cm_sb_acled_violence
[2020-11-19 10:16:53,384] - views.apps.mo

In [16]:
for model in models:
    
    # Uncalibrated predictions
    df_pred = model.predict(df)
    # assign_into_df takes care to only overwrite rows with actual values
    # This way we can keep all periods in the same df
    # It's also idempotent, no joining, so run as many times as you like. 
    df = assign_into_df(df_to=df, df_from=df_pred)
    
    df_pred = model.predict_calibrated(
        df=df.fillna(0), 
        period_calib=period_a,
        period_test=period_b,)
        
    df = assign_into_df(df_to=df, df_from=df_pred)
    df_pred = model.predict_calibrated(
        df=df.fillna(0), 
        period_calib=period_b,
        period_test=period_c,
    )
    df = assign_into_df(df_to=df, df_from=df_pred)
    
    # Calibrated predictions


[2020-11-19 10:21:32,224] - views.apps.model.api:552 - INFO - Predicting for cm_sb_cflong
[2020-11-19 10:21:32,226] - views.apps.model.api:553 - DEBUG - Predicting for cm_sb_cflong periods: [Period(name='A', train_start=121, train_end=396, predict_start=397, predict_end=432), Period(name='B', train_start=121, train_end=432, predict_start=433, predict_end=468), Period(name='C', train_start=121, train_end=480, predict_start=483, predict_end=520)]
[2020-11-19 10:21:32,267] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_cflong_A_1.joblib
[2020-11-19 10:21:38,654] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_cflong_A_3.joblib
[2020-11-19 10:21:44,810] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_cflong_A_6.joblib
[2020-11-19 10:21:51,378] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models

[2020-11-19 10:27:30,455] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_cflong_B_6.joblib
[2020-11-19 10:27:37,921] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_cflong_B_9.joblib
[2020-11-19 10:27:45,849] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_cflong_B_12.joblib
[2020-11-19 10:27:54,857] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_cflong_B_18.joblib
[2020-11-19 10:28:06,451] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_cflong_B_24.joblib
[2020-11-19 10:28:19,215] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_cflong_B_30.joblib
[2020-11-19 10:28:30,989] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_cflong

[2020-11-19 10:32:53,020] - views.utils.data:110 - DEBUG - Inserting col ss_cm_sb_cflong_18_calibrated
[2020-11-19 10:32:53,036] - views.utils.data:110 - DEBUG - Inserting col ss_cm_sb_cflong_24_calibrated
[2020-11-19 10:32:53,049] - views.utils.data:110 - DEBUG - Inserting col ss_cm_sb_cflong_30_calibrated
[2020-11-19 10:32:53,058] - views.utils.data:110 - DEBUG - Inserting col ss_cm_sb_cflong_36_calibrated
[2020-11-19 10:32:53,069] - views.utils.data:110 - DEBUG - Inserting col ss_cm_sb_cflong_38_calibrated
[2020-11-19 10:32:53,080] - views.utils.data:110 - DEBUG - Inserting col sc_cm_sb_cflong_calibrated
[2020-11-19 10:32:53,093] - views.apps.model.api:552 - INFO - Predicting for cm_sb_acled_violence
[2020-11-19 10:32:53,094] - views.apps.model.api:553 - DEBUG - Predicting for cm_sb_acled_violence periods: [Period(name='A', train_start=121, train_end=396, predict_start=397, predict_end=432), Period(name='B', train_start=121, train_end=432, predict_start=433, predict_end=468), Period

[2020-11-19 10:38:18,713] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_acled_violence_A_30.joblib
[2020-11-19 10:38:24,680] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_acled_violence_A_36.joblib
[2020-11-19 10:38:30,403] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_acled_violence_A_38.joblib
[2020-11-19 10:38:36,594] - views.apps.model.api:552 - INFO - Predicting for cm_sb_acled_violence
[2020-11-19 10:38:36,595] - views.apps.model.api:553 - DEBUG - Predicting for cm_sb_acled_violence periods: [Period(name='B', train_start=121, train_end=432, predict_start=433, predict_end=468)]
[2020-11-19 10:38:36,616] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/OpenViEWS2/storage/models/cm_sb_acled_violence_B_1.joblib
[2020-11-19 10:38:43,746] - views.apps.model.api:125 - DEBUG - Loading /home/kyle/code/Views2/Op

[2020-11-19 10:42:14,563] - views.apps.model.api:611 - DEBUG - Calibrating cm_sb_acled_violence step 1
[2020-11-19 10:42:14,574] - views.apps.model.api:611 - DEBUG - Calibrating cm_sb_acled_violence step 3
[2020-11-19 10:42:14,587] - views.apps.model.api:611 - DEBUG - Calibrating cm_sb_acled_violence step 6
[2020-11-19 10:42:14,599] - views.apps.model.api:611 - DEBUG - Calibrating cm_sb_acled_violence step 9
[2020-11-19 10:42:14,611] - views.apps.model.api:611 - DEBUG - Calibrating cm_sb_acled_violence step 12
[2020-11-19 10:42:14,622] - views.apps.model.api:611 - DEBUG - Calibrating cm_sb_acled_violence step 18
[2020-11-19 10:42:14,644] - views.apps.model.api:611 - DEBUG - Calibrating cm_sb_acled_violence step 24
[2020-11-19 10:42:14,658] - views.apps.model.api:611 - DEBUG - Calibrating cm_sb_acled_violence step 30
[2020-11-19 10:42:14,667] - views.apps.model.api:611 - DEBUG - Calibrating cm_sb_acled_violence step 36
[2020-11-19 10:42:14,679] - views.apps.model.api:611 - DEBUG - Calib

In [17]:
for model in models:
    model.evaluate(df)

[2020-11-19 10:43:38,650] - views.apps.model.api:966 - INFO - Evaluating cm_sb_cflong
[2020-11-19 10:43:38,653] - views.apps.model.api:970 - DEBUG - Evaluating uncalibrated predictions for cm_sb_cflong period A step-combined
[2020-11-19 10:43:38,712] - views.apps.model.api:1009 - DEBUG - Evaluating uncalibrated predictions for cm_sb_cflong period A step 1
[2020-11-19 10:43:38,755] - views.apps.model.api:1009 - DEBUG - Evaluating uncalibrated predictions for cm_sb_cflong period A step 3
[2020-11-19 10:43:38,793] - views.apps.model.api:1009 - DEBUG - Evaluating uncalibrated predictions for cm_sb_cflong period A step 6
[2020-11-19 10:43:38,830] - views.apps.model.api:1009 - DEBUG - Evaluating uncalibrated predictions for cm_sb_cflong period A step 9
[2020-11-19 10:43:38,865] - views.apps.model.api:1009 - DEBUG - Evaluating uncalibrated predictions for cm_sb_cflong period A step 12
[2020-11-19 10:43:38,899] - views.apps.model.api:1009 - DEBUG - Evaluating uncalibrated predictions for cm_sb

[2020-11-19 10:43:40,286] - views.apps.model.api:1009 - DEBUG - Evaluating uncalibrated predictions for cm_sb_acled_violence period A step 12
[2020-11-19 10:43:40,320] - views.apps.model.api:1009 - DEBUG - Evaluating uncalibrated predictions for cm_sb_acled_violence period A step 18
[2020-11-19 10:43:40,356] - views.apps.model.api:1009 - DEBUG - Evaluating uncalibrated predictions for cm_sb_acled_violence period A step 24
[2020-11-19 10:43:40,395] - views.apps.model.api:1009 - DEBUG - Evaluating uncalibrated predictions for cm_sb_acled_violence period A step 30
[2020-11-19 10:43:40,443] - views.apps.model.api:1009 - DEBUG - Evaluating uncalibrated predictions for cm_sb_acled_violence period A step 36
[2020-11-19 10:43:40,492] - views.apps.model.api:1009 - DEBUG - Evaluating uncalibrated predictions for cm_sb_acled_violence period A step 38
[2020-11-19 10:43:40,543] - views.apps.model.api:970 - DEBUG - Evaluating uncalibrated predictions for cm_sb_acled_violence period B step-combined
[

In [18]:
for model in models:
    print(model.name)
    #print(model.scores)
    print("EVAL SCORES:")
    print(json.dumps(model.scores, indent=2))
    print("FEATURE_IMPORTANCES")
    print(json.dumps(model.extras.feature_importances, indent=2))
    print("#"*80)
    

cm_sb_cflong
EVAL SCORES:
{
  "A": {
    "1": {
      "uncalibrated": {
        "average_precision": 0.7754431153761843,
        "area_under_roc": 0.9620479122471155,
        "brier": 0.04286612452860683
      },
      "calibrated": {}
    },
    "3": {
      "uncalibrated": {
        "average_precision": 0.7595176822459726,
        "area_under_roc": 0.9576590505814785,
        "brier": 0.043998182828007014
      },
      "calibrated": {}
    },
    "6": {
      "uncalibrated": {
        "average_precision": 0.74595661391252,
        "area_under_roc": 0.9523571108994122,
        "brier": 0.045168314853951244
      },
      "calibrated": {}
    },
    "9": {
      "uncalibrated": {
        "average_precision": 0.7346562588699618,
        "area_under_roc": 0.9472240906503855,
        "brier": 0.04661370021052189
      },
      "calibrated": {}
    },
    "12": {
      "uncalibrated": {
        "average_precision": 0.7268097416754156,
        "area_under_roc": 0.94051296722963,
        "b

In [18]:
type(df)
df.head(4)

Unnamed: 0_level_0,Unnamed: 1_level_0,acled_count_ns,acled_count_os,acled_count_pr,acled_count_sb,acled_dummy_ns,acled_dummy_os,acled_dummy_pr,acled_dummy_sb,cdum_1,cdum_10,...,ss_cm_sb_acled_violence_3_calibrated,ss_cm_sb_acled_violence_6_calibrated,ss_cm_sb_acled_violence_9_calibrated,ss_cm_sb_acled_violence_12_calibrated,ss_cm_sb_acled_violence_18_calibrated,ss_cm_sb_acled_violence_24_calibrated,ss_cm_sb_acled_violence_30_calibrated,ss_cm_sb_acled_violence_36_calibrated,ss_cm_sb_acled_violence_38_calibrated,sc_cm_sb_acled_violence_calibrated
month_id,country_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1,40,0.0,0.0,0.0,0.0,0,0,0,0,0,0,...,,,,,,,,,,
1,41,0.0,0.0,0.0,0.0,0,0,0,0,0,0,...,,,,,,,,,,
1,42,0.0,0.0,0.0,0.0,0,0,0,0,0,0,...,,,,,,,,,,
1,43,0.0,0.0,0.0,0.0,0,0,0,0,0,0,...,,,,,,,,,,


In [25]:

for ensemble in ensembles:
    df_pred = ensemble.predict(
        df=df.fillna(0), 
        period_calib=period_b,
        period_test=period_c,
    )
    df = assign_into_df(df_to=df, df_from=df_pred)

[2020-11-19 11:26:48,011] - views.apps.model.api:814 - INFO - Predicting for cflong_acled_violence_ensemble
[2020-11-19 11:26:48,184] - views.utils.data:110 - DEBUG - Inserting col ss_cflong_acled_violence_ensemble_1
[2020-11-19 11:26:48,190] - views.utils.data:110 - DEBUG - Inserting col ss_cflong_acled_violence_ensemble_3
[2020-11-19 11:26:48,196] - views.utils.data:110 - DEBUG - Inserting col ss_cflong_acled_violence_ensemble_6
[2020-11-19 11:26:48,204] - views.utils.data:110 - DEBUG - Inserting col ss_cflong_acled_violence_ensemble_9
[2020-11-19 11:26:48,211] - views.utils.data:110 - DEBUG - Inserting col ss_cflong_acled_violence_ensemble_12
[2020-11-19 11:26:48,221] - views.utils.data:110 - DEBUG - Inserting col ss_cflong_acled_violence_ensemble_18
[2020-11-19 11:26:48,228] - views.utils.data:110 - DEBUG - Inserting col ss_cflong_acled_violence_ensemble_24
[2020-11-19 11:26:48,238] - views.utils.data:110 - DEBUG - Inserting col ss_cflong_acled_violence_ensemble_30
[2020-11-19 11:2

In [27]:
type(df)
df=df.fillna(0)
df.head(4)

Unnamed: 0_level_0,Unnamed: 1_level_0,acled_count_ns,acled_count_os,acled_count_pr,acled_count_sb,acled_dummy_ns,acled_dummy_os,acled_dummy_pr,acled_dummy_sb,cdum_1,cdum_10,...,ss_cflong_acled_violence_ensemble_3,ss_cflong_acled_violence_ensemble_6,ss_cflong_acled_violence_ensemble_9,ss_cflong_acled_violence_ensemble_12,ss_cflong_acled_violence_ensemble_18,ss_cflong_acled_violence_ensemble_24,ss_cflong_acled_violence_ensemble_30,ss_cflong_acled_violence_ensemble_36,ss_cflong_acled_violence_ensemble_38,sc_cflong_acled_violence_ensemble
month_id,country_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1,40,0.0,0.0,0.0,0.0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,41,0.0,0.0,0.0,0.0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,42,0.0,0.0,0.0,0.0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,43,0.0,0.0,0.0,0.0,0,0,0,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [28]:
type(df)
# df.head(4)
# count = df["sc_cflong_acled_violence_ensemble"].isna().sum()
# count
# len(df)
# df.describe()
print(df[['sc_cflong_acled_violence_ensemble']].describe())

       sc_cflong_acled_violence_ensemble
count                       33048.000000
mean                            0.009693
std                             0.062317
min                             0.000000
25%                             0.000000
50%                             0.000000
75%                             0.000000
max                             0.807549


In [17]:
cflong_acled_violence_ensemble.col_sc

'sc_cflong_acled_violence_ensemble'

In [131]:
count = df["sc_cflong_acled_violence_ensemble"].isna().sum()
count

30996

In [76]:
# Tried to drop this columns but that didn't help
# df.drop(empty_cols,
#         axis=1,
#         inplace=True)

In [21]:
import pdb

In [29]:
# import pdb;  pdb.set_trace()
for ensemble in ensembles:
    ensemble.evaluate(df, period=periods_by_name["B"])
    ensemble.evaluate(df, period=periods_by_name["C"])

[2020-11-19 11:27:38,783] - views.apps.model.api:966 - INFO - Evaluating cflong_acled_violence_ensemble
[2020-11-19 11:27:38,786] - views.apps.model.api:970 - DEBUG - Evaluating uncalibrated predictions for cflong_acled_violence_ensemble period B step-combined
[2020-11-19 11:27:38,832] - views.apps.model.api:992 - DEBUG - Evaluating calibrated predictions for cflong_acled_violence_ensemble period B step-combined
[2020-11-19 11:27:38,835] - views.apps.model.api:1009 - DEBUG - Evaluating uncalibrated predictions for cflong_acled_violence_ensemble period B step 1
[2020-11-19 11:27:38,881] - views.apps.model.api:1033 - DEBUG - Evaluating calibrated predictions for cflong_acled_violence_ensemble period B step 1
[2020-11-19 11:27:38,887] - views.apps.model.api:1009 - DEBUG - Evaluating uncalibrated predictions for cflong_acled_violence_ensemble period B step 3
[2020-11-19 11:27:38,931] - views.apps.model.api:1033 - DEBUG - Evaluating calibrated predictions for cflong_acled_violence_ensemble 

In [30]:
for ensemble in ensembles:
    print(ensemble.name)
    print("Weights:")
    print(json.dumps(ensemble.weights, indent=2))
    print("Eval scores:")
    print(json.dumps(ensemble.evaluation.scores, indent=2))
    print("#"*80)

cflong_acled_violence_ensemble
Weights:
{
  "B": {
    "1": {
      "cm_sb_cflong": 0.5,
      "cm_sb_acled_violence": 0.5
    },
    "3": {
      "cm_sb_cflong": 0.5,
      "cm_sb_acled_violence": 0.5
    },
    "6": {
      "cm_sb_cflong": 0.5,
      "cm_sb_acled_violence": 0.5
    },
    "9": {
      "cm_sb_cflong": 0.5,
      "cm_sb_acled_violence": 0.5
    },
    "12": {
      "cm_sb_cflong": 0.5,
      "cm_sb_acled_violence": 0.5
    },
    "18": {
      "cm_sb_cflong": 0.5,
      "cm_sb_acled_violence": 0.5
    },
    "24": {
      "cm_sb_cflong": 0.5,
      "cm_sb_acled_violence": 0.5
    },
    "30": {
      "cm_sb_cflong": 0.5,
      "cm_sb_acled_violence": 0.5
    },
    "36": {
      "cm_sb_cflong": 0.5,
      "cm_sb_acled_violence": 0.5
    },
    "38": {
      "cm_sb_cflong": 0.5,
      "cm_sb_acled_violence": 0.5
    }
  }
}
Eval scores:
{
  "A": {
    "1": {
      "uncalibrated": {},
      "calibrated": {}
    },
    "3": {
      "uncalibrated": {},
      "calibrated": 

In [101]:
cflong_acled_violence_ensemble.col_sc_calibrated

'sc_cflong_acled_violence_ensemble'