In [44]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [45]:
import numpy as np
import pandas as pd
import sys
sys.path.append("../src")
from margin_calibration import MarginCalibration

# Dataset Generation

In [46]:
# Create a 100 random observations with their respective
# sampling weights, such that those weights sum to 20 %
n_obs = 100
sampling_probabilities = np.random.rand(n_obs, 1).flatten()
sampling_probabilities = sampling_probabilities / sampling_probabilities.sum() *.2

# Create a matrix of size n_obs * n_margins, with margins ranging from 0 to 1000
n_margins = 2 # Let say we have two variables
calibration_matrix = 1000*np.random.rand(n_obs, n_margins)

# Now we create the calibration target
# It is of sier n_margins * 1 and contains the 
# sums of the margins over all the population
calibration_target = np.array([np.sum(calibration_matrix[:, i])*100 for i in range(calibration_matrix.shape[1])])

# The costs should be of size of the number of margins
costs = (1,1) # We give here same costs to both variables

# Margin Calibration

In [47]:
mc=MarginCalibration()
mc_logit=MarginCalibration("logit", .5, 1.5)
mc_rr=MarginCalibration("raking_ratio")
mc_lt=MarginCalibration("truncated_linear", .5, 1.5)

In [48]:
mc.calibration(sampling_probabilities, calibration_matrix, calibration_target).x

array([ 6.13969356e+01,  4.71882769e+02,  3.84449086e+02, -5.09556222e+01,
        5.57095813e+01,  3.20863045e+02,  1.62417446e+02, -1.05865612e+02,
        1.77836254e+04, -3.56604817e+02, -4.25261632e+01,  1.02887393e+03,
        2.87979151e+02,  1.20537706e+03,  5.70938751e+02, -2.05643646e+01,
        8.21692462e+01,  2.42507109e+01, -2.38766279e+00, -1.38373723e+02,
       -2.87394591e+02,  1.19913543e+03,  7.31373022e+01,  1.07848396e+02,
        1.64023035e+01,  2.74946345e+03,  1.07734843e+02, -2.21374699e+03,
       -7.54841384e+01,  1.73351106e+01,  1.01679650e+04,  6.33954724e+01,
       -4.63530520e+02, -4.13216370e+01,  1.12366841e+02, -3.76259655e+02,
        2.12306774e+02,  2.54982710e+03,  9.41841357e+01, -8.84221253e+02,
        7.54090630e+02,  3.15049572e+02, -1.44935852e+03, -4.51429490e+01,
        5.62217996e+02, -7.88965309e+01,  1.71594049e+02, -1.55616406e+01,
        6.36699019e+01, -1.63147979e+02,  9.43626723e+01,  2.28226608e+02,
       -1.18536429e+02,  

In [None]:
mc_logit.calibration(sampling_probabilities, calibration_matrix, calibration_target).x

In [None]:
mc_rr.calibration(sampling_probabilities, calibration_matrix, calibration_target).x

In [None]:
mc_lt.calibration(sampling_probabilities, calibration_matrix, calibration_target).x

# Penalized Margin Calibration

In [None]:
mc_pen=MarginCalibration(penalty=.1, costs=costs)
mc_logit_pen=MarginCalibration("logit", .5, 1.5, penalty=.1, costs=costs)
mc_rr_pen=MarginCalibration("raking_ratio", penalty=.1, costs=costs)
mc_lt_pen=MarginCalibration("truncated_linear", .5, 1.5, penalty=.1, costs=costs)

In [None]:
mc_pen.calibration(sampling_probabilities, calibration_matrix, calibration_target).x

In [None]:
mc_logit_pen.calibration(sampling_probabilities, calibration_matrix, calibration_target).x

In [None]:
mc_rr_pen.calibration(sampling_probabilities, calibration_matrix, calibration_target).x

In [None]:
mc_lt_pen.calibration(sampling_probabilities, calibration_matrix, calibration_target).x