In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import pandas as pd
import sys
sys.path.append("../src")
from margin_calibration import MarginCalibration

# Dataset Generation

In [None]:
# Create a 100 random observations with their respective
# sampling weights, such that those weights sum to 20 %
n_obs = 100
sampling_probabilities = np.random.rand(n_obs, 1).flatten()
sampling_probabilities = sampling_probabilities / sampling_probabilities.sum() *.2

# Create a matrix of size n_obs * n_margins, with margins ranging from 0 to 1000
n_margins = 2 # Let say we have two variables
calibration_matrix = 1000*np.random.rand(n_obs, n_margins)

# Now we create the calibration target
# It is of sier n_margins * 1 and contains the 
# sums of the margins over all the population
calibration_target = np.array([np.sum(calibration_matrix[:, i])*100 for i in range(calibration_matrix.shape[1])])

# The costs should be of size of the number of margins
costs = (1,1) # We give here same costs to both variables

# Margin Calibration

In [12]:
mc=MarginCalibration()
mc_logit=MarginCalibration("logit", .5, 1.5)
mc_rr=MarginCalibration("raking_ratio")
mc_lt=MarginCalibration("truncated_linear", .5, 1.5)

In [42]:
mc.calibration(sampling_probabilities, calibration_matrix, calibration_target).x

array([-1.43970283e+03,  3.96017363e+02, -4.28405784e+03,  2.53723314e+02,
        7.34783988e+01, -1.28019825e+03,  4.06584583e+02, -4.30880680e+02,
        1.28367309e+02, -2.15708284e+02,  1.28293584e+02, -4.98602491e+02,
       -2.09051516e+02,  1.73642604e+01,  2.48362688e+02,  9.84903065e+01,
       -3.45878158e+02, -1.21797631e+02,  2.22675318e+02,  6.84563213e+02,
        4.34579953e+02,  2.17212804e+02, -7.35633600e+01,  4.31917806e+01,
        3.90120052e+03, -5.17544502e+01,  9.23691364e+02, -1.22044535e+02,
       -3.04104824e+01,  1.20465706e+03,  3.63749433e+02,  2.47570922e+03,
       -5.17377844e+01, -3.50429794e+02,  6.65942889e+04, -2.12451373e+02,
        1.94396466e+02,  2.30211174e+01,  7.05297493e+02,  2.65569878e+02,
       -2.53464615e+04,  2.23926625e+02,  2.13201359e+02, -3.14978794e+02,
        1.94205363e+02, -3.18478034e+02, -2.48328743e+02,  4.36307974e+02,
       -1.61552399e+02,  1.70478027e+02,  1.61459916e+02, -9.61067024e+01,
        2.68549925e+02,  

In [None]:
mc_logit.calibration(sampling_probabilities, calibration_matrix, calibration_target).x

In [None]:
mc_rr.calibration(sampling_probabilities, calibration_matrix, calibration_target).x

In [None]:
mc_lt.calibration(sampling_probabilities, calibration_matrix, calibration_target).x

# Penalized Margin Calibration

In [20]:
mc_pen=MarginCalibration(penalty=.1, costs=costs)
mc_logit_pen=MarginCalibration("logit", .5, 1.5, penalty=.1, costs=costs)
mc_rr_pen=MarginCalibration("raking_ratio", penalty=.1, costs=costs)
mc_lt_pen=MarginCalibration("truncated_linear", .5, 1.5, penalty=.1, costs=costs)

In [None]:
mc_pen.calibration(sampling_probabilities, calibration_matrix, calibration_target).x

In [24]:
mc_logit_pen.calibration(sampling_probabilities, calibration_matrix, calibration_target).x

array([ 99.58655609, 149.99999906, 149.99999992, 149.99999996])

In [23]:
mc_rr_pen.calibration(sampling_probabilities, calibration_matrix, calibration_target).x

array([ 71.86637879, 100.30839441, 162.75838331, 234.94483732])

In [22]:
mc_lt_pen.calibration(sampling_probabilities, calibration_matrix, calibration_target).x

array([ 99.58655551, 149.99999835, 150.        , 150.        ])