In [62]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [63]:
import numpy as np
import pandas as pd
import sys
sys.path.append("../src")
from margin_calibration import MarginCalibration

# Dataset Generation

In [64]:
# Create a 100 random observations with their respective
# sampling weights, such that those weights sum to 20 %
n_obs = 100
sampling_probabilities = np.random.rand(n_obs, 1).flatten()
sampling_probabilities = sampling_probabilities / sampling_probabilities.sum() *.2

# Create a matrix of size n_obs * n_margins, with margins ranging from 0 to 1000
n_margins = 2 # Let say we have two variables
calibration_matrix = 1000*np.random.rand(n_obs, n_margins)

# Now we create the calibration target
# It is of sier n_margins * 1 and contains the 
# sums of the margins over all the population
calibration_target = np.array([np.sum(calibration_matrix[:, i])*100 for i in range(calibration_matrix.shape[1])])

# The costs should be of size of the number of margins
costs = (1,1) # We give here same costs to both variables

# Margin Calibration

In [65]:
mc=MarginCalibration()
mc_logit=MarginCalibration("logit", .5, 1.5)
mc_rr=MarginCalibration("raking_ratio")
mc_lt=MarginCalibration("truncated_linear", .5, 1.5)

In [48]:
mc.calibration(sampling_probabilities, calibration_matrix, calibration_target).x

array([ 6.13969356e+01,  4.71882769e+02,  3.84449086e+02, -5.09556222e+01,
        5.57095813e+01,  3.20863045e+02,  1.62417446e+02, -1.05865612e+02,
        1.77836254e+04, -3.56604817e+02, -4.25261632e+01,  1.02887393e+03,
        2.87979151e+02,  1.20537706e+03,  5.70938751e+02, -2.05643646e+01,
        8.21692462e+01,  2.42507109e+01, -2.38766279e+00, -1.38373723e+02,
       -2.87394591e+02,  1.19913543e+03,  7.31373022e+01,  1.07848396e+02,
        1.64023035e+01,  2.74946345e+03,  1.07734843e+02, -2.21374699e+03,
       -7.54841384e+01,  1.73351106e+01,  1.01679650e+04,  6.33954724e+01,
       -4.63530520e+02, -4.13216370e+01,  1.12366841e+02, -3.76259655e+02,
        2.12306774e+02,  2.54982710e+03,  9.41841357e+01, -8.84221253e+02,
        7.54090630e+02,  3.15049572e+02, -1.44935852e+03, -4.51429490e+01,
        5.62217996e+02, -7.88965309e+01,  1.71594049e+02, -1.55616406e+01,
        6.36699019e+01, -1.63147979e+02,  9.43626723e+01,  2.28226608e+02,
       -1.18536429e+02,  

In [49]:
mc_logit.calibration(sampling_probabilities, calibration_matrix, calibration_target).x

array([-1.25200196e+02,  2.76406743e+02,  9.40386002e+01, -6.42555193e+02,
        3.25944848e+02,  2.92379956e+01, -1.69935891e+02, -1.12107492e+03,
        1.67349377e+04, -9.11450204e+02, -1.09404232e+02,  2.14088079e+03,
        7.16865374e+01,  5.26097162e+02,  1.75009589e+02, -4.06338004e+02,
       -1.01860550e+02, -3.02469604e+02, -5.06003375e+02, -4.85935233e+02,
       -1.04487934e+03,  1.91060049e+03, -4.47836982e+01, -9.23793622e+01,
       -3.39637316e+02,  1.01025701e+04, -1.11405152e+02,  7.19322768e+02,
       -9.23711709e+02, -2.44386413e+02,  6.30387500e+03, -2.80021771e+02,
       -2.43208856e+02, -5.88604913e+02, -1.56396863e+01, -9.71457071e+02,
       -4.80008282e+01,  2.83940788e+03,  5.59799716e+02,  3.33189766e+02,
        4.11513811e+03,  2.69726495e+02, -3.83238447e+01, -6.83101862e+02,
        1.80523306e+02, -8.55162743e+02, -2.89303485e+01, -3.88754159e+02,
       -2.33388533e+02, -7.26929517e+02,  1.17031912e+01,  1.34157156e+01,
       -9.41237278e+02,  

In [None]:
mc_rr.calibration(sampling_probabilities, calibration_matrix, calibration_target).x

In [55]:
mc_lt.calibration(sampling_probabilities, calibration_matrix, calibration_target).x

array([ -302.96266589,   125.2225149 ,    47.25587838,  -478.58988813,
         -67.6688399 ,    50.0278599 ,  -180.32897653,  -693.65713367,
       16748.17974579,  -743.69939962,  -394.15052135,  2118.67068999,
         -46.73867718,   637.38515935,   213.07746145,  -378.80424368,
        -233.22645024,  -426.4833793 ,  -534.37783473,  -457.31515319,
        -667.65193803,  1864.58016016,   -40.80644213,  -226.51562065,
        -448.44583761, 10193.44221544,  -264.17416938,   767.10432249,
        -648.8456342 ,  -300.08486021,  6304.61906132,  -322.96149334,
        -320.56449758,  -558.13961287,   -20.49604706,  -761.65329788,
         -46.88281165,  3010.96905172,    85.43312099,   241.10280028,
        4403.87064111,   -53.28261294,   -19.5110039 ,  -612.47216807,
         343.75568132,  -515.78296096,  -141.79394454,  -216.07016729,
        -451.95786044,  -505.7824692 ,  -146.69132288,    65.88572672,
        -722.69355391,    47.382848  ,  -208.70981273,  -531.06820999,
      

# Penalized Margin Calibration

In [56]:
mc_pen=MarginCalibration(penalty=.1, costs=costs)
mc_logit_pen=MarginCalibration("logit", .5, 1.5, penalty=.1, costs=costs)
mc_rr_pen=MarginCalibration("raking_ratio", penalty=.1, costs=costs)
mc_lt_pen=MarginCalibration("truncated_linear", .5, 1.5, penalty=.1, costs=costs)

In [57]:
mc_pen.calibration(sampling_probabilities, calibration_matrix, calibration_target).x

array([-8.01182349e+00,  1.94238101e+02,  4.86336207e+02, -1.31946674e+02,
       -1.57810559e+02,  3.69966635e+02,  2.56835441e+02, -2.53118850e+02,
        2.57631750e+04, -3.47106880e+02, -1.44856735e+02,  7.37872720e+02,
        2.65471728e+02,  1.10188612e+03,  4.52415023e+02, -1.70957661e+02,
        1.81959212e+02, -5.06175475e+01, -2.61611067e+02, -2.01841601e+02,
       -2.79360445e+02,  8.06196183e+02, -2.45493623e+02,  1.75025843e+02,
        2.15125016e+02,  8.08695398e+03,  7.92895688e+02, -1.63149117e+03,
       -2.10489275e+02,  7.17071790e+01,  1.15136705e+04,  2.10034565e+02,
       -9.98530364e+02, -1.48894335e+02, -2.29678571e+02, -2.49996351e+02,
        2.41970305e+02,  2.38685310e+03, -2.16648700e+02, -1.36309192e+03,
        9.75070572e+02, -1.76472522e+02, -1.26262486e+03, -1.89372386e+02,
        5.95252226e+02, -2.75800332e+02,  1.84479101e+02, -3.35445893e+02,
        1.83741734e+02, -3.82065663e+02, -3.17922262e+02,  6.89061968e+01,
       -2.83598832e+02, -

In [59]:
mc_logit_pen.calibration(sampling_probabilities, calibration_matrix, calibration_target).x

array([  164.82927322,   556.18101757,   337.23705983,   125.23731664,
         125.97261258,   216.29678307,   147.8787097 ,   149.38879857,
       16747.99010714,   324.14130828,   318.06832239,  2391.40603074,
         334.83228627,   715.21915974,   363.04580371,   171.71069774,
         211.56382273,   130.1141079 ,   138.98592624,   503.25416791,
         372.45280912,  2166.78319908,   210.51079738,   177.24486551,
         144.19334525, 10193.41525389,   169.98132878,  1966.05615932,
         154.04577118,   252.20100329,  6304.06882162,   220.74725376,
         827.88256931,   155.83008252,   245.89842129,   362.96836124,
         155.94067226,  3027.12138589,   229.60361901,  1375.01386281,
        4360.39712421,   553.51869899,  1347.55238813,   139.95148986,
         306.75770396,   137.29338382,   241.09131227,   144.45187035,
         132.798455  ,   243.35495481,   288.95499338,   184.09849356,
         144.27183392,   130.64983285,   231.07704651,   246.32619824,
      

In [60]:
mc_rr_pen.calibration(sampling_probabilities, calibration_matrix, calibration_target).x

array([  295.25627144,  1084.80174741,   654.1245407 ,   202.03691693,
         224.75270611,   421.40589306,   273.04759019,   241.88550966,
       33478.20119644,   583.60134454,   591.9378875 ,  4750.16791107,
         644.3756867 ,  1423.33714978,   716.77625   ,   301.88421851,
         388.41977004,   222.22006641,   232.8537668 ,   955.43646551,
         686.46065476,  4303.6136449 ,   390.43335313,   324.12797512,
         247.58038699, 20351.8581418 ,   308.39610866,  3867.5416456 ,
         253.76681624,   460.90773382, 12599.9752765 ,   399.40826227,
        1602.4986635 ,   262.5429432 ,   463.18444808,   661.69236984,
         297.80695195,  6026.17878315,   430.50224982,  2695.88351723,
        8677.79184123,  1076.1274987 ,  2630.70575165,   229.57170135,
         609.43332343,   223.46016964,   453.98702608,   250.07396647,
         227.23932545,   432.39104938,   546.50589669,   353.18143491,
         230.34747528,   239.62990206,   429.56444092,   437.40449751,
      

In [58]:
mc_lt_pen.calibration(sampling_probabilities, calibration_matrix, calibration_target).x

array([  164.82927322,   556.18101757,   337.23705983,   125.23731664,
         125.97261258,   216.29678307,   147.8787097 ,   149.38879857,
       16747.99010714,   324.14130828,   318.06832239,  2391.40603074,
         334.83228627,   715.21915974,   363.04580371,   171.71069774,
         211.56382273,   130.1141079 ,   138.98592624,   503.25416791,
         372.45280912,  2166.78319908,   210.51079738,   177.24486551,
         144.19334525, 10193.41525389,   169.98132878,  1966.05615932,
         154.04577118,   252.20100329,  6304.06882162,   220.74725376,
         827.88256931,   155.83008252,   245.89842129,   362.96836124,
         155.94067226,  3027.12138589,   229.60361901,  1375.01386281,
        4360.39712421,   553.51869899,  1347.55238813,   139.95148986,
         306.75770396,   137.29338382,   241.09131227,   144.45187035,
         132.798455  ,   243.35495481,   288.95499338,   184.09849356,
         144.27183392,   130.64983285,   231.07704651,   246.32619824,
      