In [None]:
import numpy as np
from scipy.optimize import minimize


: 

In [None]:

# --------------------------
# Step 1: Simulate Data
# --------------------------

np.random.seed(123)
N = 200
N_treat = N // 2
N_control = N - N_treat

trend = 1.0
treatment_effect = 2.0

Y_treat_pre = np.random.normal(10, 1, N_treat)
Y_control_pre = np.random.normal(10, 1, N_control)
Y_treat_post = np.random.normal(10 + trend + treatment_effect, 1, N_treat)
Y_control_post = np.random.normal(10 + trend, 1, N_control)

mean_treat_pre = Y_treat_pre.mean()
mean_treat_post = Y_treat_post.mean()
mean_control_pre = Y_control_pre.mean()
mean_control_post = Y_control_post.mean()


In [None]:

# --------------------------
# Step 2: Set up Problem
# --------------------------
M = 5
min_val = min(Y_treat_pre.min(), Y_control_pre.min(), Y_treat_post.min(), Y_control_post.min()) - 1
max_val = max(Y_treat_pre.max(), Y_control_pre.max(), Y_treat_post.max(), Y_control_post.max()) + 1
support = np.linspace(min_val, max_val, M)

# We'll have 4 distributions: T_pre, T_post, C_pre, C_post
# Each distribution has M probabilities. We'll store them in a single vector:
# p = [p_treat_pre, p_treat_post, p_control_pre, p_control_post]
# Dimension of p is 4*M = 20 (if M=5).

# Indices:
def idx(dist_name):
    # Helper to get slice indices
    # dist_name can be 'Tpre', 'Tpost', 'Cpre', 'Cpost'
    dist_map = {
        'Tpre': 0,
        'Tpost': 1,
        'Cpre': 2,
        'Cpost': 3
    }
    base = dist_map[dist_name] * M
    return slice(base, base+M)



In [None]:

# --------------------------
# Step 3: Objective and Constraints
# --------------------------

def total_entropy(p):
    # Entropy = -sum p_i log p_i
    # total entropy is sum of entropy for each distribution
    # Add a small epsilon inside the log to avoid log(0)
    eps = 1e-15
    return -np.sum(p * np.log(p + eps))

# We want to maximize entropy => minimize negative entropy
def objective(p):
    return -total_entropy(p)  # we minimize the negative of total entropy

# Constraints:
# 1) Each distribution sums to 1
def constraint_sum(p, dist_name):
    return np.sum(p[idx(dist_name)]) - 1.0

# 2) Moment matching constraints
def constraint_moment(p, dist_name, observed_mean):
    return np.sum(p[idx(dist_name)] * support) - observed_mean

# We'll provide constraints as dictionaries for scipy minimize
cons = []
# Sum-to-one constraints
cons.append({'type': 'eq', 'fun': lambda p: constraint_sum(p, 'Tpre')})
cons.append({'type': 'eq', 'fun': lambda p: constraint_sum(p, 'Tpost')})
cons.append({'type': 'eq', 'fun': lambda p: constraint_sum(p, 'Cpre')})
cons.append({'type': 'eq', 'fun': lambda p: constraint_sum(p, 'Cpost')})

# Moment constraints
cons.append({'type': 'eq', 'fun': lambda p: constraint_moment(p, 'Tpre', mean_treat_pre)})
cons.append({'type': 'eq', 'fun': lambda p: constraint_moment(p, 'Tpost', mean_treat_post)})
cons.append({'type': 'eq', 'fun': lambda p: constraint_moment(p, 'Cpre', mean_control_pre)})
cons.append({'type': 'eq', 'fun': lambda p: constraint_moment(p, 'Cpost', mean_control_post)})

# Initial guess: uniform distributions
p0 = np.ones(4*M) / M


In [None]:

# --------------------------
# Step 4: Solve the Problem using SLSQP
# --------------------------
res = minimize(objective, p0, constraints=cons, method='SLSQP', options={'ftol':1e-9, 'disp':True})

print("Optimization Status:", res.success)
print("Message:", res.message)
print("Optimal value (negative total entropy):", res.fun)

p_opt = res.x

# Extract each distribution
p_treat_pre_val = p_opt[idx('Tpre')]
p_treat_post_val = p_opt[idx('Tpost')]
p_control_pre_val = p_opt[idx('Cpre')]
p_control_post_val = p_opt[idx('Cpost')]

# Check constraints
print("\nCheck distribution sums:")
print("Tpre sum:", p_treat_pre_val.sum())
print("Tpost sum:", p_treat_post_val.sum())
print("Cpre sum:", p_control_pre_val.sum())
print("Cpost sum:", p_control_post_val.sum())

print("\nCheck expected values:")
print("Tpre expected:", np.sum(p_treat_pre_val*support), "Observed:", mean_treat_pre)
print("Tpost expected:", np.sum(p_treat_post_val*support), "Observed:", mean_treat_post)
print("Cpre expected:", np.sum(p_control_pre_val*support), "Observed:", mean_control_pre)
print("Cpost expected:", np.sum(p_control_post_val*support), "Observed:", mean_control_post)

# Compute a DID-like estimate from the distributions:
did_est = (np.sum(p_treat_post_val * support) - np.sum(p_treat_pre_val * support)) - \
          (np.sum(p_control_post_val * support) - np.sum(p_control_pre_val * support))

print("\nDID estimate from GME distributions:", did_est)

print("\nDistributions:")
print("Tpre:", p_treat_pre_val)
print("Tpost:", p_treat_post_val)
print("Cpre:", p_control_pre_val)
print("Cpost:", p_control_post_val)


In [None]:
# --------------------------
# Step 2: Combine Data into a Single Dataset
# --------------------------
# Let's assign group and period indicators:
# Treat: 1 for treated units, 0 for control units
# Post: 1 for post-treatment period, 0 for pre-treatment period

# Pre-period data
data_treat_pre = np.column_stack([
    Y_treat_pre, 
    np.ones(N_treat),     # Treat=1
    np.zeros(N_treat)     # Post=0
])

data_control_pre = np.column_stack([
    Y_control_pre,
    np.zeros(N_control),  # Treat=0
    np.zeros(N_control)   # Post=0
])

# Post-period data
data_treat_post = np.column_stack([
    Y_treat_post,
    np.ones(N_treat),     # Treat=1
    np.ones(N_treat)      # Post=1
])

data_control_post = np.column_stack([
    Y_control_post,
    np.zeros(N_control),  # Treat=0
    np.ones(N_control)    # Post=1
])

# Stack all together: columns: [Y, Treat, Post]
data = np.vstack([data_treat_pre, data_control_pre, data_treat_post, data_control_post])

Y = data[:,0]
Treat = data[:,1]
Post = data[:,2]

: 