In [1]:
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
from tabulate import tabulate

# Load and clean data
df = pd.read_csv("cci_programs_data_reduced.csv", low_memory=False)
df = df[~df['Reporting Cycle Name'].str.contains('Semi|Mid-Year', na=False)]

# Numeric conversions
df['Total Program GGRFFunding'] = pd.to_numeric(df['Total Program GGRFFunding'], errors='coerce')
df['Total GGRFDisadvantaged Community Funding'] = pd.to_numeric(df['Total GGRFDisadvantaged Community Funding'], errors='coerce')

# Create share_DAC
df['share_DAC'] = df['Total GGRFDisadvantaged Community Funding'] / df['Total Program GGRFFunding']

# Collaboration variables
project_counties = df.groupby('Project ID Number')['County'].nunique()
df['n_partners'] = df['Project ID Number'].map(project_counties)
df['multi_county'] = (df['n_partners'] > 1).astype(int)
df['log_funding'] = np.log1p(df['Total Program GGRFFunding'])

# Region dummy
south_counties = ["Los Angeles", "Orange", "San Diego", "Riverside", "San Bernardino", "Imperial", "Ventura"]
df['Region_South'] = df['County'].isin(south_counties).astype(int)

# Rename agency column for formula use
df = df.rename(columns={'Agency Name': 'Agency_Name'})

# Fit equity models
model1_eq = smf.ols('share_DAC ~ log_funding + C(Agency_Name) + C(County)', data=df).fit()
model2_eq = smf.ols('share_DAC ~ log_funding + C(Agency_Name) + C(County) + n_partners + multi_county', data=df).fit()
model3_eq = smf.ols('share_DAC ~ log_funding + C(Agency_Name) + C(County) + n_partners*Region_South', data=df).fit()

# Extract coefficients of interest
def extract_equity_coeffs(model):
    return {
        'Intercept': (model.params['Intercept'], model.bse['Intercept'], model.pvalues['Intercept']),
        'log_funding': (model.params['log_funding'], model.bse['log_funding'], model.pvalues['log_funding']),
        'n_partners': (model.params.get('n_partners', np.nan), model.bse.get('n_partners', np.nan), model.pvalues.get('n_partners', np.nan)),
        'multi_county': (model.params.get('multi_county', np.nan), model.bse.get('multi_county', np.nan), model.pvalues.get('multi_county', np.nan)),
        'Region_South': (model.params.get('Region_South', np.nan), model.bse.get('Region_South', np.nan), model.pvalues.get('Region_South', np.nan)),
        'n_partners:Region_South': (model.params.get('n_partners:Region_South', np.nan), model.bse.get('n_partners:Region_South', np.nan), model.pvalues.get('n_partners:Region_South', np.nan))
    }

coeffs = {
    'Model 1': extract_equity_coeffs(model1_eq),
    'Model 2': extract_equity_coeffs(model2_eq),
    'Model 3': extract_equity_coeffs(model3_eq)
}

# Format into table
table = []
for var in ['Intercept', 'log_funding', 'n_partners', 'multi_county', 'Region_South', 'n_partners:Region_South']:
    row = [f"**{var}**"]
    for model in ['Model 1', 'Model 2', 'Model 3']:
        coef, se, pval = coeffs[model].get(var, (np.nan, np.nan, np.nan))
        stars = '***' if pval < 0.01 else '**' if pval < 0.05 else '*' if pval < 0.1 else ''
        formatted = f"{coef:.3f}{stars}\n({se:.3f})" if not np.isnan(coef) else ''
        row.append(formatted)
    table.append(row)

# Display table
headers = ['Variable', 'Model 1', 'Model 2', 'Model 3']
print(tabulate(table, headers=headers, tablefmt='github'))


| Variable                    | Model 1   | Model 2   | Model 3   |
|-----------------------------|-----------|-----------|-----------|
| **Intercept**               | 0.521***
(0.016)           | 0.760***
(0.019)           | 0.910***
(0.019)           |
| **log_funding**             | -0.022***
(0.002)           | -0.031***
(0.001)           | -0.025***
(0.001)           |
| **n_partners**              |           | -0.007***
(0.000)           | -0.007***
(0.000)           |
| **multi_county**            |           | 0.199***
(0.009)           |           |
| **Region_South**            |           |           | 0.080***
(0.012)           |
| **n_partners:Region_South** |           |           | 0.000*
(0.000)           |


In [2]:
print("Model 1")
print(f"R²: {model1_eq.rsquared:.3f}, Adjusted R²: {model1_eq.rsquared_adj:.3f}")

print("Model 2")
print(f"R²: {model2_eq.rsquared:.3f}, Adjusted R²: {model2_eq.rsquared_adj:.3f}")

print("Model 3")
print(f"R²: {model3_eq.rsquared:.3f}, Adjusted R²: {model3_eq.rsquared_adj:.3f}")


Model 1
R²: 0.323, Adjusted R²: 0.321
Model 2
R²: 0.355, Adjusted R²: 0.353
Model 3
R²: 0.349, Adjusted R²: 0.347


### **Simplified Regression Results: Equity Outcome (`share_DAC`)**

| Predictor                    | Model 1 (Controls) | Model 2 (+Collab) | Model 3 (+Interaction) |
|-----------------------------|--------------------|--------------------|-------------------------|
| log_funding (Project size)   | -0.022***          | -0.031***          | -0.025***               |
| n_partners (No. of partners) | –                  | -0.007***          | -0.007***               |
| multi_county (Geo. scope)    | –                  | 0.199***           | –                       |
| Region_South (So. CA region) | –                  | –                  | 0.080***                |
| n_partners × Region_South    | –                  | –                  | 0.000*                  |
| Agency fixed effects         | 21                 | 21                 | 21                      |
| Region fixed effects         | 58                 | 58                 | 58                      |
| Adj. R²                      | 0.321              | 0.353              | 0.347                   |
| N                            | 113,346            | 113,346            | 113,346                 |

\*Significance levels: * *p*<0.1, ** *p*<0.05, *** *p*<0.01*


### **Narrative Interpretation of Equity-Focused Regression Results**

The equity-focused regression models reinforce the significance of collaboration and geography in shaping the distribution of climate investment benefits to disadvantaged communities (DACs). Across all three models, *log_funding* is consistently negative and statistically significant (*p* < 0.01), suggesting that larger projects—while potentially delivering other benefits—allocate a smaller proportion of funding to DACs. This pattern highlights a potential tradeoff between project scale and equity, consistent with prior concerns about whether high-cost projects sufficiently prioritize community-level distributional impacts.

In Model 2, the number of collaborating county-level partners (*n_partners*) is negatively associated with DAC share (*p* < 0.001), suggesting that projects involving more partners may experience coordination or mission-drift challenges that dilute equity goals. Interestingly, *multi_county* projects—those operating across multiple counties—are positively associated with DAC share (*p* < 0.001), indicating that projects with a broader geographic footprint may be better positioned to reach disadvantaged communities. This supports prior findings that regional collaborations can expand the scope of service delivery, albeit with tradeoffs in administrative complexity.

Model 3 introduces regional dynamics, revealing that Southern California projects are more likely to deliver a higher share of benefits to DACs (*Region_South* = 0.080, *p* < 0.001). While the main effect of *n_partners* remains negative, the interaction term (*n_partners × Region_South*) is weakly positive and marginally significant (*p* < 0.1), indicating that the negative effect of having more partners is slightly less severe in the southern region. This may reflect stronger administrative infrastructure or better-integrated service delivery networks in these urbanized counties.

Taken together, these results highlight a key tension: while broader collaborations and larger projects are essential for scaling climate investments, they do not automatically translate to equity gains. In fact, more collaborative projects may risk underdelivering for disadvantaged communities unless deliberately structured with equity in mind. These findings reinforce the need to tailor collaborative strategies to regional contexts and to explicitly incorporate equity considerations into project design from the outset.


### **Integrated Findings: Efficiency and Equity Outcomes**

The regression analyses offer empirical support for the central proposition that collaboration and geography shape program performance in the California Climate Investments (CCI) program. When considered jointly, the cost-efficiency and equity models reveal both the potential and limitations of collaborative arrangements in advancing distinct policy goals.

First, the cost-efficiency models demonstrate that collaborative scale is significantly associated with lower greenhouse gas (GHG) reduction costs. In both Model 2 and Model 3, the number of county-level partners (*n_partners*) is negatively and significantly associated with cost per ton of GHG reduced. These results suggest that inter-agency partnerships enhance implementation capacity and enable resource pooling, which, in turn, improves environmental cost-efficiency. However, the interaction term in Model 3 (*n_partners × Region_South*) is positive and significant, indicating that the efficiency gains from additional partners are attenuated in Southern California. This finding is consistent with theoretical expectations that regional institutional capacity, administrative density, and coordination costs may moderate the effectiveness of collaborative governance structures (Emerson et al., 2012; Ansell & Gash, 2008).

Second, the equity-focused models reveal a more complex set of relationships. In contrast to the efficiency results, *n_partners* is negatively associated with the share of funding allocated to disadvantaged communities (DACs), suggesting that broader collaboration may reduce the extent to which projects prioritize equity-focused targeting. This result may reflect coordination challenges, competing institutional mandates, or the crowding-out of equity considerations in multi-partner project design. Notably, *multi_county* projects—those spanning more than one county—are positively and significantly associated with DAC share in Model 2, indicating that geographic scope may facilitate outreach to under-resourced communities. Model 3 further shows that projects located in Southern California allocate a significantly greater share of funding to DACs, and the interaction between *n_partners* and *Region_South* is positive, albeit marginally significant. These results suggest that in certain geographic contexts—particularly those with denser administrative networks or stronger outreach infrastructure—collaboration may be more conducive to achieving equity goals.

Taken together, the findings highlight a critical tension in collaborative climate governance. While multi-agency collaboration is associated with improved environmental cost-efficiency, it does not uniformly enhance equity outcomes. In fact, more expansive collaborations may introduce tradeoffs by diluting the focus on disadvantaged communities or increasing administrative complexity. Conversely, broader geographic reach appears to improve DAC outcomes but may reduce efficiency. These findings underscore the importance of deliberate institutional design and context-sensitive implementation strategies when structuring collaborative governance initiatives to address multiple, and at times competing, policy objectives.