In [2]:
%load_ext autoreload
%autoreload 2

In [18]:
import numpy as np
import pandas as pd
import warnings
import sys

sys.path.append("../src")
from margin_calibration import MarginCalibration

In [19]:
warnings.simplefilter("ignore")

# Dataset Generation

In [20]:
# Create a 100 random observations with their respective
# sampling weights, such that those weights sum to 20 %
n_obs = 100
sampling_probabilities = np.random.rand(n_obs, 1).flatten()
sampling_probabilities = sampling_probabilities / sampling_probabilities.sum() *.2

# Create a matrix of size n_obs * n_margins, with margins ranging from 0 to 1000
n_margins = 2 # Let say we have two variables
calibration_matrix = 1000*np.random.rand(n_obs, n_margins)

# Now we create the calibration target
# It is of sier n_margins * 1 and contains the 
# sums of the margins over all the population
calibration_target = np.array([np.sum(calibration_matrix[:, i])*100 for i in range(calibration_matrix.shape[1])])

# The costs should be of size of the number of margins
costs = (1,1) # We give here same costs to both variables

# Let say we want to work with want to work with pandas dataframes 
# instead of numpy arrays
sampling_probabilities = pd.DataFrame(sampling_probabilities, columns=["weights"])
calibration_matrix = pd.DataFrame(calibration_matrix, columns = ["margin1", "margin2"])
calibration_target = pd.DataFrame(calibration_target, columns = ["margin_sums"])

# Margin Calibration

In [21]:
mc=MarginCalibration()
mc_logit=MarginCalibration("logit", .5, 1.5)
mc_rr=MarginCalibration("raking_ratio")
mc_lt=MarginCalibration("truncated_linear", .5, 1.5)

In [22]:
mc.calibration(sampling_probabilities, calibration_matrix, calibration_target).x

array([ 7.07792502e+01, -2.62140030e+02, -1.24370917e+01,  2.66288041e+02,
        5.45251000e+02,  7.08635853e+01, -8.37956135e+01, -9.86135412e+01,
       -2.39231847e+01,  8.00188009e+01,  2.30923252e+02, -1.45550020e+01,
       -2.12817792e+02,  9.86280855e+01, -1.93353734e+01,  6.41679375e+01,
        1.00625921e+01,  5.42886727e+02,  2.37861455e+03,  2.54512019e+02,
        7.19214066e+01, -3.81435529e+02,  7.03474736e+02,  8.78881960e+02,
       -1.44019177e+02,  1.58068427e+02,  1.88833690e+03,  1.23218383e+02,
        5.45527564e+02,  1.60500200e+02,  3.64307249e+02,  7.18915170e+02,
       -2.19219921e+02,  3.64937120e+01,  2.13727742e+01,  2.67000978e+02,
       -3.68051907e+01,  3.58448508e+02, -1.43913559e+02,  6.98526501e+02,
        3.02951688e+02,  3.57658367e+01,  2.65280074e+02,  9.97289753e+02,
       -3.39032758e+02,  1.80221383e+02,  8.26863554e+01,  1.50146173e+03,
        1.29555122e+02,  4.81445604e+01,  5.67326233e+02,  4.18447328e+02,
       -9.15882035e+01,  

In [None]:
mc_logit.calibration(sampling_probabilities, calibration_matrix, calibration_target).x

In [None]:
mc_rr.calibration(sampling_probabilities, calibration_matrix, calibration_target).x

In [None]:
mc_lt.calibration(sampling_probabilities, calibration_matrix, calibration_target).x

# Penalized Margin Calibration

In [None]:
mc_pen=MarginCalibration(penalty=.1, costs=costs)
mc_logit_pen=MarginCalibration("logit", .5, 1.5, penalty=.1, costs=costs)
mc_rr_pen=MarginCalibration("raking_ratio", penalty=.1, costs=costs)
mc_lt_pen=MarginCalibration("truncated_linear", .5, 1.5, penalty=.1, costs=costs)

In [None]:
mc_pen.calibration(sampling_probabilities, calibration_matrix, calibration_target).x

In [None]:
mc_logit_pen.calibration(sampling_probabilities, calibration_matrix, calibration_target).x

In [None]:
mc_rr_pen.calibration(sampling_probabilities, calibration_matrix, calibration_target).x

In [None]:
mc_lt_pen.calibration(sampling_probabilities, calibration_matrix, calibration_target).x