In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import pandas as pd
import warnings
import sys

sys.path.append("../src")
from margin_calibration import MarginCalibration

In [None]:
warnings.simplefilter("ignore")

# Dataset Generation

In [None]:
# Create a 100 random observations with their respective
# sampling weights, such that those weights sum to 20 %
n_obs = 100
sampling_probabilities = np.random.rand(n_obs, 1).flatten()
sampling_probabilities = sampling_probabilities / sampling_probabilities.sum() *.2

# Create a matrix of size n_obs * n_margins, with margins ranging from 0 to 1000
n_margins = 2 # Let say we have two variables
calibration_matrix = 1000*np.random.rand(n_obs, n_margins)

# Now we create the calibration target
# It is of sier n_margins * 1 and contains the 
# sums of the margins over all the population
calibration_target = np.array([np.sum(calibration_matrix[:, i])*100 for i in range(calibration_matrix.shape[1])])

# The costs should be of size of the number of margins
costs = (1,1) # We give here same costs to both variables

# Margin Calibration

In [None]:
mc=MarginCalibration()
mc_logit=MarginCalibration("logit", .5, 1.5)
mc_rr=MarginCalibration("raking_ratio")
mc_lt=MarginCalibration("truncated_linear", .5, 1.5)

In [None]:
%%time
mc.calibration(sampling_probabilities, calibration_matrix, calibration_target)

In [None]:
%%time
mc_logit.calibration(sampling_probabilities, calibration_matrix, calibration_target)

In [None]:
%%time
mc_rr.calibration(sampling_probabilities, calibration_matrix, calibration_target)

In [None]:
%%time
mc_lt.calibration(sampling_probabilities, calibration_matrix, calibration_target)

# Penalized Margin Calibration

In [None]:
mc_pen=MarginCalibration(penalty=.1, costs=costs)
mc_logit_pen=MarginCalibration("logit", .5, 1.5, penalty=.1, costs=costs)
mc_rr_pen=MarginCalibration("raking_ratio", penalty=.1, costs=costs)
mc_lt_pen=MarginCalibration("truncated_linear", .5, 1.5, penalty=.1, costs=costs)

In [None]:
%%time
mc_pen.calibration(sampling_probabilities, calibration_matrix, calibration_target)

In [None]:
%%time
mc_logit_pen.calibration(sampling_probabilities, calibration_matrix, calibration_target)

In [None]:
%%time
mc_rr_pen.calibration(sampling_probabilities, calibration_matrix, calibration_target)

In [None]:
%%time
mc_lt_pen.calibration(sampling_probabilities, calibration_matrix, calibration_target)