# Minimal Oaxaca Demo

This notebook demonstrates a minimal example of running the Oaxaca-Blinder decomposition and printing the results.

In [1]:
%load_ext autoreload
%autoreload 2
import pandas as pd

from oaxaca import Oaxaca

In [2]:
# Load sample data
df = pd.read_csv("sample_data.csv")
df.head()

Unnamed: 0,age,female,foreign_born,LTHS,high_school,some_college,college,advanced_degree,education_level,ln_real_wage
0,52,0,1,0,1,0,0,0,high_school,2.140066
1,46,1,1,0,1,0,0,0,high_school,
2,31,1,1,0,1,0,0,0,high_school,2.499795
3,35,0,1,0,1,0,0,0,high_school,2.70805
4,19,0,0,0,1,0,0,0,high_school,2.079442


In [3]:
# Initialize and fit the Oaxaca model
oaxaca_model = Oaxaca()

# Fit the model using a simple formula
results = oaxaca_model.fit(
    formula="exp(ln_real_wage) ~ -1 + age + female + C(education_level)", data=df, group_variable="foreign_born"
)

print("Model fitted successfully!")
print(f"Groups: {results.groups_}")

Model fitted successfully!
Groups: [0, 1]


In [5]:
# Run two-fold decomposition
twofold_decomposition = results.two_fold(weights={0: 1.0, 1: 0.0})

In [10]:
# Display detailed summary - shows rich HTML output in Jupyter
twofold_decomposition

Variable,Mix-shift,Mix %,Within-slice,Within %,Total,Tot %
age,-1.7491,-58.0%,7.5585,250.6%,5.8094,192.6%
female,-0.5231,-17.3%,-1.1653,-38.6%,-1.6883,-56.0%
C(education_level),2.4545,81.4%,-3.5599,-118.1%,-1.1055,-36.7%
C(education_level)[LTHS],-1.5272,-50.6%,-1.5892,-52.7%,-3.1163,-103.3%
C(education_level)[advanced_degree],0.8993,29.8%,-0.4043,-13.4%,0.495,16.4%
C(education_level)[college],0.8965,29.7%,0.2337,7.8%,1.1303,37.5%
C(education_level)[high_school],-0.5832,-19.3%,-1.2954,-43.0%,-1.8786,-62.3%
C(education_level)[some_college],2.769,91.8%,-0.5048,-16.7%,2.2642,75.1%
total,0.1822,6.0%,2.8333,94.0%,3.0156,100.0%


In [11]:
threefold_decomposition = results.three_fold()