In [44]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [45]:
import numpy as np
import pandas as pd
import warnings
import sys

sys.path.append("../src")
from margin_calibration import MarginCalibration

In [46]:
warnings.simplefilter("ignore")

# Dataset Generation

In [47]:
# Create a 100 random observations with their respective
# sampling weights, such that those weights sum to 20 %
n_obs = 100
sampling_probabilities = np.random.rand(n_obs, 1).flatten()
sampling_probabilities = sampling_probabilities / sampling_probabilities.sum() *.2

# Create a matrix of size n_obs * n_margins, with margins ranging from 0 to 1000
n_margins = 2 # Let say we have two variables
calibration_matrix = 1000*np.random.rand(n_obs, n_margins)

# Now we create the calibration target
# It is of sier n_margins * 1 and contains the 
# sums of the margins over all the population
calibration_target = np.array([np.sum(calibration_matrix[:, i])*100 for i in range(calibration_matrix.shape[1])])

# The costs should be of size of the number of margins
costs = (1,1) # We give here same costs to both variables

# Let say we want to work with want to work with pandas dataframes 
# instead of numpy arrays
sampling_probabilities = pd.DataFrame(sampling_probabilities, columns=["weights"])
calibration_matrix = pd.DataFrame(calibration_matrix, columns = ["margin1", "margin2"])
calibration_target = pd.DataFrame(calibration_target, columns = ["margin_sums"])

# Margin Calibration

In [48]:
mc=MarginCalibration()
mc_logit=MarginCalibration("logit", .5, 1.5)
mc_rr=MarginCalibration("raking_ratio")
mc_lt=MarginCalibration("truncated_linear", .5, 1.5)

In [49]:
%%time
mc.calibration(sampling_probabilities, calibration_matrix, calibration_target).x

CPU times: user 861 ms, sys: 1 μs, total: 861 ms
Wall time: 132 ms


array([-8.62927279e+02,  3.37323904e+03,  6.58306731e+02,  8.58933346e+01,
        1.08068917e+02,  3.18720989e+02,  2.47091483e+02, -1.09729706e+02,
        8.97581530e+01,  1.46036516e+02,  3.73436870e+02, -5.28730137e+02,
        4.82030498e+02,  4.00200083e+02,  8.32482781e+01, -8.03151799e+01,
        5.43909915e+02,  1.87119981e+02,  9.53283493e+02,  4.71954814e+01,
        3.89095256e+02, -8.83156722e+01,  1.50339674e+02, -2.03980305e+02,
       -9.00828151e+01, -6.58302529e+01, -9.38809602e+01,  3.09446654e+02,
        1.41568702e+02, -3.04085370e+02,  2.38639550e+02,  1.51286289e+02,
       -1.51546368e+02, -4.47263914e+01,  3.05395549e+02,  2.14962644e+02,
        1.23056053e+02,  3.90656535e+02,  1.93275278e+01,  5.14721326e+01,
        1.05524808e+02,  2.02963691e+02,  7.97154028e+02,  1.33058955e+02,
       -9.40917453e+01,  2.14797598e+01,  4.85236843e+01,  6.94765987e+02,
        7.53635092e+01, -4.10535822e+00, -6.29667474e+00, -6.97961651e+01,
        7.62053988e+01,  

In [None]:
%%time
mc_logit.calibration(sampling_probabilities, calibration_matrix, calibration_target).x

In [None]:
%%time
mc_rr.calibration(sampling_probabilities, calibration_matrix, calibration_target).x

In [None]:
%%time
mc_lt.calibration(sampling_probabilities, calibration_matrix, calibration_target).x

# Penalized Margin Calibration

In [None]:
mc_pen=MarginCalibration(penalty=.1, costs=costs)
mc_logit_pen=MarginCalibration("logit", .5, 1.5, penalty=.1, costs=costs)
mc_rr_pen=MarginCalibration("raking_ratio", penalty=.1, costs=costs)
mc_lt_pen=MarginCalibration("truncated_linear", .5, 1.5, penalty=.1, costs=costs)

In [None]:
%%time
mc_pen.calibration(sampling_probabilities, calibration_matrix, calibration_target).x

In [None]:
%%time
mc_logit_pen.calibration(sampling_probabilities, calibration_matrix, calibration_target).x

In [None]:
%%time
mc_rr_pen.calibration(sampling_probabilities, calibration_matrix, calibration_target).x

In [None]:
%%time
mc_lt_pen.calibration(sampling_probabilities, calibration_matrix, calibration_target).x