# Minimal Oaxaca Demo

This notebook demonstrates a minimal example of running the Oaxaca-Blinder decomposition and printing the results.

In [1]:
%load_ext autoreload
%autoreload 2
import pandas as pd
import numpy as np
from oaxaca import Oaxaca

In [2]:
# Load sample data
df = pd.read_csv("sample_data.csv")
df.head()

Unnamed: 0,age,female,foreign_born,LTHS,high_school,some_college,college,advanced_degree,education_level,ln_real_wage
0,52,0,1,0,1,0,0,0,high_school,2.140066
1,46,1,1,0,1,0,0,0,high_school,
2,31,1,1,0,1,0,0,0,high_school,2.499795
3,35,0,1,0,1,0,0,0,high_school,2.70805
4,19,0,0,0,1,0,0,0,high_school,2.079442


In [3]:
# Initialize and fit the Oaxaca model
oaxaca_model = Oaxaca()

# Fit the model using a simple formula
results = oaxaca_model.fit(
    formula="exp(ln_real_wage) ~ -1 + age + female + C(education_level)",
    data=df,
    group_variable="foreign_born"
)

print("Model fitted successfully!")
print(f"Groups: {results.groups_}")

Model fitted successfully!
Groups: [0, 1]


In [None]:
# Run two-fold decomposition
twofold_decomposition = results.two_fold()

In [None]:
# Display detailed summary - shows rich HTML output in Jupyter
twofold_decomposition

Oaxaca-Blinder Decomposition Results
Group Variable: foreign_born
Groups: 0 (Group 0) vs 1 (Group 1)
OLS Regression Results by Group

Group: 0
----------------------------------------
Number of observations: 287
R-squared: 0.3268
Mean of dependent variable: 17.5828
Std of dependent variable: 12.0486

Coefficients:
Variable                                      Coeff    Std Err        t    P>|t|
-------------------------------------------------------------
age                                          0.2640     0.0468    5.637    0.000
female                                      -6.1497     1.1929   -5.155    0.000
C(education_level)[LTHS]                     5.6689     2.3922    2.370    0.018
C(education_level)[advanced_degree]         23.6506     2.9415    8.040    0.000
C(education_level)[college]                 19.0245     2.4208    7.859    0.000
C(education_level)[high_school]              8.5831     1.9820    4.331    0.000
C(education_level)[some_college]            10.9798    

In [7]:
np.isclose(1.1330 + 0.3979 + -2.4302, -0.8994, atol=0.0001)

np.True_

In [None]:
threefold_decomposition = results.three_fold()