In [67]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [68]:
import numpy as np
import pandas as pd
import warnings
import sys

sys.path.append("../src")
from margin_calibration import MarginCalibration

In [69]:
warnings.simplefilter("ignore")

# Dataset Generation

In [70]:
# Create a 100 random observations with their respective
# sampling weights, such that those weights sum to 20 %
n_obs = 100
sampling_probabilities = np.random.rand(n_obs, 1).flatten()
sampling_probabilities = sampling_probabilities / sampling_probabilities.sum() *.2

# Create a matrix of size n_obs * n_margins, with margins ranging from 0 to 1000
n_margins = 2 # Let say we have two variables
calibration_matrix = 1000*np.random.rand(n_obs, n_margins)

# Now we create the calibration target
# It is of sier n_margins * 1 and contains the 
# sums of the margins over all the population
calibration_target = np.array([np.sum(calibration_matrix[:, i])*100 for i in range(calibration_matrix.shape[1])])

# The costs should be of size of the number of margins
costs = (1,1) # We give here same costs to both variables

# Margin Calibration

In [71]:
mc=MarginCalibration()
mc_logit=MarginCalibration("logit", .5, 1.5)
mc_rr=MarginCalibration("raking_ratio")
mc_lt=MarginCalibration("truncated_linear", .5, 1.5)

In [72]:
%%time
mc.calibration(sampling_probabilities, calibration_matrix, calibration_target).x

CPU times: user 11 s, sys: 1.48 ms, total: 11 s
Wall time: 1.6 s


array([ 2.29593425e+03,  9.20545208e+02,  1.09096607e+02, -3.03178292e+00,
       -5.00271426e+01, -3.52665001e+01, -4.39878483e+02, -1.12680630e+03,
        2.68243391e+02,  1.40051091e+03, -5.22999938e+01,  9.64619056e+02,
        5.84631144e+02,  4.39468821e+02, -2.56084682e+01,  5.56607315e+02,
        1.45378148e+02,  5.65028612e+02, -1.27357836e+02, -9.39588221e+01,
       -5.43150232e+01,  1.25432775e+02,  1.72686005e+02, -1.11247072e+01,
        8.68234261e+01,  9.98829483e+01,  1.21266091e+02,  7.35376060e+01,
        2.94606832e+02, -8.36416459e+02,  1.44352177e+03,  2.33689340e+02,
        3.28411499e+02, -7.30709386e+03, -6.09947774e+01, -6.03790701e+01,
        2.02604144e+02,  3.15722505e+02,  1.68644747e+02, -1.38443969e+02,
       -1.22604582e+02,  2.51835822e+01,  8.54712141e+00, -7.32591375e+01,
        2.34489502e+02,  2.25250792e+02, -1.88811935e+01,  1.20391518e+03,
        2.13312617e+01, -2.05709991e+01,  4.38651712e+02,  2.47863176e+02,
        1.88896902e+02,  

In [None]:
%%time
mc_logit.calibration(sampling_probabilities, calibration_matrix, calibration_target).x

In [None]:
%%time
mc_rr.calibration(sampling_probabilities, calibration_matrix, calibration_target).x

In [None]:
%%time
mc_lt.calibration(sampling_probabilities, calibration_matrix, calibration_target).x

# Penalized Margin Calibration

In [None]:
mc_pen=MarginCalibration(penalty=.1, costs=costs)
mc_logit_pen=MarginCalibration("logit", .5, 1.5, penalty=.1, costs=costs)
mc_rr_pen=MarginCalibration("raking_ratio", penalty=.1, costs=costs)
mc_lt_pen=MarginCalibration("truncated_linear", .5, 1.5, penalty=.1, costs=costs)

In [None]:
%%time
mc_pen.calibration(sampling_probabilities, calibration_matrix, calibration_target).x

In [None]:
%%time
mc_logit_pen.calibration(sampling_probabilities, calibration_matrix, calibration_target).x

In [None]:
%%time
mc_rr_pen.calibration(sampling_probabilities, calibration_matrix, calibration_target).x

In [None]:
%%time
mc_lt_pen.calibration(sampling_probabilities, calibration_matrix, calibration_target).x