In [2]:
import pandas as pd
import seaborn as sns
from scipy import stats
import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf
from matplotlib.pyplot import subplots
import matplotlib.pyplot as plt  
import sklearn.model_selection as skm                      
!pip install linearmodels
from linearmodels.panel import PanelOLS
from statsmodels.api import OLS, add_constant



Information from Bruhn et al "The Impact of High School Financial Education: Evidence from a Large-Scale Evaluation in Brazil" Read_me.txt file:
- school_admin_data_final.dta contains grade level passing, failing, and dropout rates from administrative school data 
- school_intervention_panel_final.dta contains underlying variables used in the analysis, as well as dummy variables generated based on the underlying data. 
    - Variables with "_bl" at the end are from the baseline survey. 
    - Variables with "_fu" at the end are from the follow-up survey.
    - The data is stacked for the first and second follow-up surveys, with variable round==0 denoting the first follow-up and round==1 the second follow-up.
    - Variables with a "p" after the number in the variables name, e.g. "rp_09p_bl" come from the parent questionnaire.

In [3]:
#load the datasets into a pandas DataFrame
df = pd.read_stata('school_intervention_panel_final.dta', convert_categoricals=False)

One or more strings in the dta file could not be decoded using utf-8, and
so the fallback encoding of latin-1 is being used.  This can happen when a file
has been incorrectly encoded by Stata or some other software. You should verify
the string values returned are correct.
  df = pd.read_stata('school_intervention_panel_final.dta', convert_categoricals=False)


In [4]:
df = df.rename(columns={
    'female' : 'Student is female (Baseline)',
    'miss_f_coded' : 'Student gender could not be coded based on name (Baseline)',
    'dumm_rp_08_bl' : 'Education of mother: At least some secondary (Baseline)',
    'dumm_rp_09_bl' : 'Education of father: At least some secondary (Baseline)',
    'dumm_rp_14_bl' : 'Student\'s Family receives Bolsa Familia cash transfer (Baseline)',
    'dumm_rp_23_bl' : 'Student has computer with internet at home (Baseline)',
    'dumm_rp_24_bl' : 'Student has failed at least one school year (Baseline)',
    'dumm_rp_49_bl' : 'Student is not working at the moment (Baseline)',
    'business_bl' : 'Student works in own or family business (Baseline)',
    'employee_bl' : 'Student works as employee or other (Baseline)',
    'dumm_rp_50_bl' : 'Student receives income (Baseline)',
    'dumm_rp_53B_bl' : 'Student part of income saved is non-zero (Baseline)',
    'dumm_rp_55_bl' : 'Student has borrowed money (any source) (Baseline)',
    'dumm_rp_56_bl' : 'Student is behind on payments (unconditional) (Baseline)',
    'dumm_rp_57s_bl' : 'Student is behind on payments to store (Baseline)',
    'dumm_rp_57i_bl' : 'Student is behind on payments to family friends or other people (Baseline)',
    'dumm_rp_57f_bl' : 'Student is behind on payments to bank or FI (Baseline)',
    'dumm_rp_59_bl' : 'Student says they are a saver (Baseline)',
    'dumm_rp_61_bl' : 'Student has formal savings (Baseline)',
    'dumm_rp_64A_bl' : 'Student makes a list of all monthly expenses (Baseline)',
    'dumm_rp_65A_bl': 'Student saves money for future purchases (Baseline)',
    'dumm_rp88__92C_bl': 'Student has bought electronics shoes or clothing with credit card (Baseline)',
    'dumm_rp88__92D_bl': 'Student has bought electronics shoes or clothing on installments (Baseline)',
    'dumm_rp88__92AB_bl': 'Student has bought electronics shoes or clothing with cash/debit card (Baseline)',
    'dumm_negotiates_bl': 'Student negotiates prices or payment methods (Baseline)',
    'dumm_search_bl': 'Student comparison shops before making purchase (Baseline)',
    'dumm_rp_08p_bl': 'Parent has at least some secondary education (Baseline)',
    'p_employee_bl': 'Parent is an employee (Baseline)',
    'p_selfempl_bl': 'Parent is self-employeed (Baseline)',
    'p_other_bl': 'Parent occupation is other (homemaker, retired, unemployed, other) (Baseline)',
    'dumm_rp_14p_bl': 'Parent: makes a list of all monthly expenses (Baseline)',
    'dumm_rp_18p_bl': 'Parent: has checking account (Baseline)',
    'dumm_rp_18p_bl': 'Parent: has savings account (Baseline)',
    'dumm_rp_19p_bl': 'Parent: has debit card (Baseline)',
    'dumm_rp_23p_bl': 'Parent: has checks (Baseline)',
    'dumm_formal_saving_bl':' Parent: has formal savings (Baseline)',
    'dumm_rp_33p_bl': 'Parent: Student talks to you about finances (Baseline)',
    'dumm_rp_34p_bl': 'Parent: Student helps organize HH budget (Baseline)',
    'dumm_rp_36p_bl': 'Parent: Prefers R50K plus 15 percent interest (Baseline)',
    'dumm_rp_37p_bl': 'Parent: Inflation question dummy for correct (Baseline)',
    'vl_proficiencia_bl': 'Student: Finanical Proficieny Score (Baseline)', 
    'dumm_rp_49_fup' : "Student is not working at the moment" ,  
    'business_fup' : 'Student works in own or family busines' ,
    'employee_fup' : 'Student works as employee or other', 
    'dumm_rp_50_fup': 'Student: Receives income' ,
     'dumm_rp_53B_fup': 'Student: Pct of income saved is non-zero', 
    'dumm_rp_55_fup': 'Student: Has borrowed money (any source)',
    'dumm_rp_56_fup' : 'Student: Is behind on payments (unconditional)',
    'dumm_rp_57f_fup' : 'Student: Is behind on payments to bank or FI' ,
    'dumm_rp_57s_fup' : 'Student: Is behind on payments to store',
    'dumm_rp_57i_fup': 'Student: Is behind on payments to family friends or other people', 
    'dumm_rp_59_fup' : 'Student: Says they are a saver' , 
    'dumm_rp_61_fup' : 'Student: Has formal savings', 
    'dumm_rp_64A_fup' : 'I make a list of all monthly expenses', 
    'dumm_rp_65A_fup' : 'Student: Saves money for future purchases',
    'dumm_rp_88C_fup': 'Student: I have bought cell phone with credit card', 
    'dumm_rp_88D_fup': 'Student: I have bought cell phone on installments',
    'dumm_rp_88AB_fup': 'Student: I have bought a cell phone with cash/debit card',
    'dumm_rp_89C_fup': 'Student: I have bought computer with credit card' ,
    'dumm_rp_89D_fup': 'Student: I have bought computer on installments' ,
    'dumm_rp_89AB_fup': 'Student: I have bought a computer with cash/debit card' ,
    'dumm_rp_90C_fup': 'Student: I have bought an electronic device with credit card' ,
    'dumm_rp_90D_fup': 'Student: I have bought an electronic device on installments' ,
    'dumm_rp_90AB_fup': 'Student: I have bought an electronic device with cash/debit card' ,
    'dumm_rp_91C_fup': 'Student: I have bought shoes with credit card' ,
    'dumm_rp_91D_fup': 'Student: I have bought shoes on installments' ,
    'dumm_rp_91AB_fup': 'Student: I have bought shoes with cash/debit card',
    'dumm_rp_92C_fup': 'Student: I have bought clothing with credit card', 
    'dumm_rp_92D_fup': 'Student: I have bought clothing on installments' ,
    'dumm_rp_92AB_fup' : 'Student: I have bought clothing with cash/debit card',
    'dumm_rp88__92C_fup': 'I have bought electronics shoes or clothing with credit card',
    'dumm_rp88__92D_fup': 'I have bought electronics shoes or clothing on installments' ,
    'dumm_rp88__92AB_fup': 'I have bought electronics shoes or clothing with cash/debit card' ,
    'dumm_rp_93_fup': 'I negotiate the price' ,
    'dumm_rp_94_fup': 'Student I search price in different stores',
    'dumm_rp_95_fup': 'Student I negotiate the payment method' ,
    'dumm_rp_96_fup': 'Student: I search similar models/brands' ,
    'dumm_negotiates_fup': 'Student Negotiates prices or payment methods' ,
    'dumm_search_fup': 'Student Comparison shops before making purchase' ,
    'dumm_rp_08p_fup': 'Parent has at least some secondary education' ,
    'p_employee_fup': 'Parent is an employee' ,
    'p_selfempl_fup': 'Student Parent is self-employeed',
    'p_other_fup': 'Student Parent\'s occupation is other (homemaker, retired, unemployed, other)',
    'dumm_rp_14p_fup': 'Parent: makes a list of all monthly expenses',
    'dumm_rp_18p_fup' : 'Parent: has checking account', 
    'dumm_rp_19p_fup': 'Parent: savings account', 
    'dumm_rp_21p_fup': 'Parent has debit card' ,
    'dumm_rp_23p_fup': 'Parent has checks' ,
    'dumm_formal_saving_fup': 'Parent has formal savings',
    'dumm_rp_33p_fup': 'Parent: Student talks to you about finances' ,
    'dumm_rp_34p_fup': 'Parent: student helps organize HH budget',
    'dumm_rp_36p_fup' : 'Parent: prefers R50K plus 15 percent interest (follow up)',
    'dumm_rp_37p_fup' : 'Parent: Inflation question dummy for correct  (follow up)', 
    'dumm_rp_41p_fup' : 'Parent: budget must have income and expenses (follow up)',

})
column_names = df.columns.tolist()
print(column_names)

['id_geral', 'cd_escola', 'nm_uf_bl', 'matriculas', 'docentes', 'abandonona1sriemdio', 'aprovaona1sriemdio', 'treatment', 'pair_all', 'treatment_workshop', 'strata', 'round', 'female_coded', 'rp_01_bl', 'rp_08_bl', 'rp_09_bl', 'rp_14_bl', 'rp_23_bl', 'rp_24_bl', 'Student: Finanical Proficieny Score (Baseline)', 'bl_test', 'rp_49_bl', 'rp_50_bl', 'rp_53_bl', 'rp_55_bl', 'rp_56_bl', 'rp_57_bl', 'rp_59_bl', 'rp_61_bl', 'rp_64_bl', 'rp_65_bl', 'rp_88_bl', 'rp_89_bl', 'rp_90_bl', 'rp_91_bl', 'rp_92_bl', 'rp_93_bl', 'rp_94_bl', 'rp_95_bl', 'rp_96_bl', 'bl_aluno', 'rp_08p_bl', 'rp_09p_bl', 'rp_14p_bl', 'rp_18p_bl', 'rp_19p_bl', 'rp_21p_bl', 'rp_23p_bl', 'rp_33p_bl', 'rp_34p_bl', 'rp_36p_bl', 'rp_37p_bl', 'vl_proficiencia_fup', 'fu1_test', 'rp_55_fup', 'rp_57_fup', 'rp_49_fup', 'rp_50_fup', 'rp_53_fup', 'rp_56_fup', 'rp_59_fup', 'rp_61_fup', 'rp_64_fup', 'rp_65_fup', 'rp_88_fup', 'rp_89_fup', 'rp_90_fup', 'rp_91_fup', 'rp_92_fup', 'rp_93_fup', 'rp_94_fup', 'rp_95_fup', 'rp_96_fup', 'fu1_aluno'

In [5]:
#useful subdata sets 
follow_up_1_df = df[df['round'] == 0]
follow_up_2_df = df[df['round'] == 1]

## Replication of Paper OLS Regression Results

In [9]:
# Replication of Paper OLS Regression Results 
data = df
outcome = 'I have bought electronics shoes or clothing with credit card'

# Helper function for summarizing control group stats
def summarize_control_group(df, var, treatment_col='treatment'):
    control_group = df[(df[treatment_col] == 0) & df['e_sample']]
    mean = control_group[var].mean()
    std = control_group[var].std()
    return mean, std

# Create e_sample column for the subset used in the model
data['e_sample'] = (~df[outcome].isnull()) & df['treatment'].notnull()

# Results container for Panel A, B, and C
results = []

# Run Panel A and Panel B analysis for round 0 and 1
for round_ in [0, 1]:
    data = data.dropna(subset=['treatment', outcome, 'round', 'cd_escola'])
    data['cd_escola'] = data['cd_escola'].astype('category')
    subset = data[data['round'] == round_]
    subset = subset.reset_index(drop=True)
    
    # Panel A regression (no controls, clustered standard errors)
    model_a = smf.ols(f'Q("{outcome}") ~ treatment', data=subset).fit(cov_type='cluster', 
                                                                  cov_kwds={'groups': subset['cd_escola']})
    control_mean_a, control_sd_a = summarize_control_group(subset, outcome)
    f_test_a = model_a.f_test("treatment = 0").pvalue
    
    results.append({
        'panel': 'A',
        'round': round_,
        'coeff_treatment': model_a.params['treatment'],
        'se_treatment': model_a.bse['treatment'],
        'control_mean': control_mean_a,
        'control_sd': control_sd_a,
        'f_test': f_test_a,
        'r2': model_a.rsquared,
        'N': model_a.nobs,
        'N_clust': subset['cd_escola'].nunique()
    })
    
    # Panel B regression (cluster standard error, school pair dummies control)

    subset[f'flag_{outcome}{round_}'] = subset.groupby('pair_all')['treatment'].transform('mean')
    subset[f'pair_{outcome}{round_}'] = subset['pair_all']
    subset.loc[(subset[f'flag_{outcome}{round_}'] == 0) | (subset[f'flag_{outcome}{round_}'] == 1), f'pair_{outcome}{round_}'] = 0
    subset = subset.dropna(subset='pair_all')
    
    pair_dummies = pd.get_dummies(subset[f'pair_{outcome}{round_}'], prefix=f'pair_{round_}', drop_first=True)
    subset = pd.concat([subset, pair_dummies], axis=1)
    
    X = sm.add_constant(pd.concat([subset['treatment'], pair_dummies], axis=1))
    y = subset[var]
    X = X.astype({col: 'int' for col in X.select_dtypes(include=['bool']).columns})
    X = X.apply(pd.to_numeric, errors='coerce')
    
    model_b = sm.OLS(y, X).fit(cov_type='cluster', cov_kwds={'groups': subset['cd_escola']})

    control_group = subset[subset['treatment'] == 0]
    control_mean = control_group[var].mean()
    control_sd = control_group[var].std()

    # Collect results
    results.append({
        'panel': 'B',
        'round': round_,
        'outcome': var,
        'coeff_treatment': model_b.params['treatment'],
        'se_treatment': model_b.bse['treatment'],
        'control_mean': control_mean,
        'control_sd': control_sd,
        'f_test_p_value': None,  # Add this if you perform F-tests later
        'r2': model_b.rsquared,
        'N': model_b.nobs,
        'N_clust': subset['cd_escola'].nunique()
    })
    
    #Pannel C (With school pair dummies, baseline dependent variable, and student gender controls) 
    bl = 'Student has bought electronics shoes or clothing with credit card (Baseline)'
    subset[f'miss_{bl}'] = 0
    subset.loc[subset[bl].isnull(), f'miss_{bl}'] = 1
    subset[bl].fillna(0, inplace=True)
    
    subset['miss_f_coded'] = 0
    subset.loc[subset['female_coded'].isnull(), 'miss_f_coded'] = 1
    subset['female_coded'].fillna(0, inplace=True)
    
    subset[f'pair_{outcome}{round_}'] = subset['pair_all']
    pair_dummies = pd.get_dummies(subset[f'pair_{outcome}{round_}'], prefix=f'pair_{round_}', drop_first=True)
    subset = pd.concat([subset, pair_dummies], axis=1)
    
    pair_dummies = pair_dummies.astype('int')
    X = sm.add_constant(pd.concat([subset[['treatment', bl, f'miss_{bl}', 
                                           'female_coded', 'miss_f_coded']], pair_dummies], axis=1))
    y = subset[outcome]
    
    model_c = sm.OLS(y, X).fit(cov_type='cluster', cov_kwds={'groups': subset['cd_escola']})
    
    control_group = subset[subset['treatment'] == 0]
    control_mean = control_group[outcome].mean()
    control_sd = control_group[outcome].std()
    
    results.append({
        'panel': 'C',
        'round': round_,
        'outcome': outcome,
        'coeff_treatment': model_c.params['treatment'],
        'se_treatment': model_c.bse['treatment'],
        'control_mean': control_mean,
        'control_sd': control_sd,
        'f_test_p_value': None,  
        'r2': model_c.rsquared,
        'N': model_c.nobs,
        'N_clust': subset['cd_escola'].nunique()
    })
    
for result in results:
    print(result)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['cd_escola'] = data['cd_escola'].astype('category')


NameError: name 'var' is not defined

In [12]:
# Replication of Paper OLS Regression Results 
data = df
outcome = 'I have bought electronics shoes or clothing with credit card'

# Helper function for summarizing control group stats
def summarize_control_group(df, var, treatment_col='treatment'):
    control_group = df[(df[treatment_col] == 0) & df['e_sample']]
    mean = control_group[var].mean()
    std = control_group[var].std()
    return mean, std

# Create e_sample column for the subset used in the model
data['e_sample'] = (~df[outcome].isnull()) & df['treatment'].notnull()

# Results container for Panel A, B, and C
results = []

# Run Panel A and Panel B analysis for round 0 and 1
for round_ in [0, 1]:
    data = data.dropna(subset=['treatment', outcome, 'round', 'cd_escola'])
    data['cd_escola'] = data['cd_escola'].astype('category')
    subset = data[data['round'] == round_]
    subset = subset.reset_index(drop=True)
    
    # Panel A regression (no controls, clustered standard errors)
    model_a = smf.ols(f'Q("{outcome}") ~ treatment', data=subset).fit(cov_type='cluster', 
                                                                      cov_kwds={'groups': subset['cd_escola']})
    control_mean_a, control_sd_a = summarize_control_group(subset, outcome)
    f_test_a = model_a.f_test("treatment = 0").pvalue
    
    results.append({
        'panel': 'A',
        'round': round_,
        'coeff_treatment': model_a.params['treatment'],
        'se_treatment': model_a.bse['treatment'],
        'p_value_treatment': model_a.pvalues['treatment'],  # Add p-value for treatment coefficient
        'control_mean': control_mean_a,
        'control_sd': control_sd_a,
        'f_test': f_test_a,
        'r2': model_a.rsquared,
        'N': model_a.nobs,
        'N_clust': subset['cd_escola'].nunique()
    })
    
    # Panel B regression (cluster standard error, school pair dummies control)
    subset[f'flag_{outcome}{round_}'] = subset.groupby('pair_all')['treatment'].transform('mean')
    subset[f'pair_{outcome}{round_}'] = subset['pair_all']
    subset.loc[(subset[f'flag_{outcome}{round_}'] == 0) | (subset[f'flag_{outcome}{round_}'] == 1), f'pair_{outcome}{round_}'] = 0
    subset = subset.dropna(subset='pair_all')
    
    pair_dummies = pd.get_dummies(subset[f'pair_{outcome}{round_}'], prefix=f'pair_{round_}', drop_first=True)
    subset = pd.concat([subset, pair_dummies], axis=1)
    
    X = sm.add_constant(pd.concat([subset['treatment'], pair_dummies], axis=1))
    y = subset[outcome]
    X = X.astype({col: 'int' for col in X.select_dtypes(include=['bool']).columns})
    X = X.apply(pd.to_numeric, errors='coerce')
    
    model_b = sm.OLS(y, X).fit(cov_type='cluster', cov_kwds={'groups': subset['cd_escola']})

    control_group = subset[subset['treatment'] == 0]
    control_mean = control_group[outcome].mean()
    control_sd = control_group[outcome].std()

    # Collect results
    results.append({
        'panel': 'B',
        'round': round_,
        'outcome': outcome,
        'coeff_treatment': model_b.params['treatment'],
        'se_treatment': model_b.bse['treatment'],
        'p_value_treatment': model_b.pvalues['treatment'],  # Add p-value for treatment coefficient
        'control_mean': control_mean,
        'control_sd': control_sd,
        'f_test_p_value': None,  # Add this if you perform F-tests later
        'r2': model_b.rsquared,
        'N': model_b.nobs,
        'N_clust': subset['cd_escola'].nunique()
    })
    
    # Panel C regression (With school pair dummies, baseline dependent variable, and student gender controls) 
    bl = 'Student has bought electronics shoes or clothing with credit card (Baseline)'
    subset[f'miss_{bl}'] = 0
    subset.loc[subset[bl].isnull(), f'miss_{bl}'] = 1
    subset[bl].fillna(0, inplace=True)
    
    subset['miss_f_coded'] = 0
    subset.loc[subset['female_coded'].isnull(), 'miss_f_coded'] = 1
    subset['female_coded'].fillna(0, inplace=True)
    
    subset[f'pair_{outcome}{round_}'] = subset['pair_all']
    pair_dummies = pd.get_dummies(subset[f'pair_{outcome}{round_}'], prefix=f'pair_{round_}', drop_first=True)
    subset = pd.concat([subset, pair_dummies], axis=1)
    
    pair_dummies = pair_dummies.astype('int')
    X = sm.add_constant(pd.concat([subset[['treatment', bl, f'miss_{bl}', 
                                           'female_coded', 'miss_f_coded']], pair_dummies], axis=1))
    y = subset[outcome]
    
    model_c = sm.OLS(y, X).fit(cov_type='cluster', cov_kwds={'groups': subset['cd_escola']})
    
    control_group = subset[subset['treatment'] == 0]
    control_mean = control_group[outcome].mean()
    control_sd = control_group[outcome].std()
    
    results.append({
        'panel': 'C',
        'round': round_,
        'outcome': outcome,
        'coeff_treatment': model_c.params['treatment'],
        'se_treatment': model_c.bse['treatment'],
        'p_value_treatment': model_c.pvalues['treatment'],  # Add p-value for treatment coefficient
        'control_mean': control_mean,
        'control_sd': control_sd,
        'f_test_p_value': None,  
        'r2': model_c.rsquared,
        'N': model_c.nobs,
        'N_clust': subset['cd_escola'].nunique()
    })

for result in results:
    print(result)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['cd_escola'] = data['cd_escola'].astype('category')


{'panel': 'A', 'round': 0, 'coeff_treatment': -0.006154123533212073, 'se_treatment': 0.00945055430604901, 'p_value_treatment': 0.5149226786374965, 'control_mean': 0.2516129, 'control_sd': 0.43397289514541626, 'f_test': 0.5150999333939976, 'r2': 5.05843004297013e-05, 'N': 16338.0, 'N_clust': 845}
{'panel': 'B', 'round': 0, 'outcome': 'I have bought electronics shoes or clothing with credit card', 'coeff_treatment': -0.007933187804941042, 'se_treatment': 0.006119280628448203, 'p_value_treatment': 0.19482913650840594, 'control_mean': 0.2516129, 'control_sd': 0.43397289514541626, 'f_test_p_value': None, 'r2': 0.056098408042394854, 'N': 16338.0, 'N_clust': 845}
{'panel': 'C', 'round': 0, 'outcome': 'I have bought electronics shoes or clothing with credit card', 'coeff_treatment': -0.0008663793504490409, 'se_treatment': 0.005292955456165285, 'p_value_treatment': 0.8699788384910333, 'control_mean': 0.2516129, 'control_sd': 0.43397289514541626, 'f_test_p_value': None, 'r2': 0.23428691746172248

In [11]:
#Output not used in report, ended up manually creating the table in Latex 
from stargazer.stargazer import Stargazer

# Assuming you have model_a, model_b, and model_c defined
stargazer = Stargazer([model_a, model_b, model_c])

# Customize the table
stargazer.title("OLS Regression Results for Student Purchasing Behavior")
stargazer.custom_columns(['Panel A: No controls', 'Panel B: With school pair dummies', 
                          'Panel C: With school pair dummies, baseline dependent variable, and student gender'], [1, 1, 1])
stargazer.show_r2 = True
stargazer.show_n = True
stargazer.covariate_order(['treatment'])

# Render to LaTeX
latex_table = stargazer.render_latex()

# Add custom footer
footer = r"""
\midrule
\multicolumn{8}{l}{Sample size (number of students)} & 16,667 & 18,033 \\
\multicolumn{8}{l}{Number of schools} & 845 & 845 \\
\multicolumn{8}{l}{Dependent variable mean in control group} & 0.825 & 0.834 \\
\multicolumn{8}{l}{$F$-test $p$-value (treatment on background characteristics)} & 0.283 & 0.232 \\
"""

# Combine the table and footer
full_table = latex_table.replace(r'\bottomrule', footer + r'\bottomrule')

# Print or save the LaTeX table
#print(full_table)

ModuleNotFoundError: No module named 'stargazer'

## Saver Interaction Variable

In [22]:
#'dumm_rp_59_bl' : 'Student says they are a saver (Baseline)'
# Define the new interaction variable
interaction_var = 'Student says they are a saver (Baseline)'
data['interaction_term'] = data['treatment'] * data[interaction_var]

interaction_results = []
# Create Panel D for interaction analysis
for round_ in [0, 1]:
    subset = data[data['round'] == round_].dropna(subset=['treatment', outcome, interaction_var, 'cd_escola'])
    subset = subset.reset_index(drop=True)
    subset['cd_escola'] = subset['cd_escola'].astype('category')

    # Add interaction term
    subset['interaction_term'] = subset['treatment'] * subset[interaction_var]

    # Panel D regression (with interaction term, clustered standard errors)
    X = sm.add_constant(subset[['treatment', interaction_var, 'interaction_term']])
    y = subset[outcome]

    model_d = sm.OLS(y, X).fit(cov_type='cluster', cov_kwds={'groups': subset['cd_escola']})

    # Calculate control group statistics for this panel
    control_group = subset[subset['treatment'] == 0]
    control_mean = control_group[outcome].mean()
    control_sd = control_group[outcome].std()

    # Collect results for Panel D
    interaction_results.append({
        'panel': 'D',
        'round': round_,
        'outcome': outcome,
        'coeff_treatment': model_d.params['treatment'],
        'coeff_interaction': model_d.params['interaction_term'],
        'se_treatment': model_d.bse['treatment'],
        'se_interaction': model_d.bse['interaction_term'],
        'control_mean': control_mean,
        'control_sd': control_sd,
        'f_test_p_value': None,  # Update if performing F-tests later
        'r2': model_d.rsquared,
        'N': model_d.nobs,
        'N_clust': subset['cd_escola'].nunique()
    })

# Print or inspect results
#import ace_tools as tools; tools.display_dataframe_to_user(name="OLS Regression Results with Interaction Term", dataframe=pd.DataFrame(results))
for interaction_results in interaction_results:
    print(interaction_results)

{'panel': 'D', 'round': 0, 'outcome': 'I have bought electronics shoes or clothing with credit card', 'coeff_treatment': -0.017576772070336696, 'coeff_interaction': 0.026554737160763165, 'se_treatment': 0.01160776127985561, 'se_interaction': 0.014241645210263866, 'control_mean': 0.2491794, 'control_sd': 0.4325709044933319, 'f_test_p_value': None, 'r2': 0.008424180894333944, 'N': 14749.0, 'N_clust': 839}
{'panel': 'D', 'round': 1, 'outcome': 'I have bought electronics shoes or clothing with credit card', 'coeff_treatment': -0.0006785541548850649, 'coeff_interaction': 0.013682871082945677, 'se_treatment': 0.013406061461388785, 'se_interaction': 0.017844898749732826, 'control_mean': 0.2757798, 'control_sd': 0.44694268703460693, 'f_test_p_value': None, 'r2': 0.007134713777116608, 'N': 10942.0, 'N_clust': 796}


### Cluster 0 Interaction Variables

In [15]:
interaction_var_0 = ['Education of mother: At least some secondary (Baseline)',
       'Education of father: At least some secondary (Baseline)',
       'Student has failed at least one school year (Baseline)',
       'Student works in own or family business (Baseline)',
       'Student part of income saved is non-zero (Baseline)',
       'Student is behind on payments to bank or FI (Baseline)',
       'Student says they are a saver (Baseline)',
       'Student has formal savings (Baseline)',
       'Student makes a list of all monthly expenses (Baseline)',
       'Student saves money for future purchases (Baseline)',
       'Parent has at least some secondary education (Baseline)',
       'Parent is an employee (Baseline)',
       'Parent is self-employeed (Baseline)',
       'Parent: makes a list of all monthly expenses (Baseline)',
       'Parent: has debit card (Baseline)', 'dumm_rp_21p_bl',
       'Parent: has checks (Baseline)',
       'Parent: Prefers R50K plus 15 percent interest (Baseline)',
       'Parent: Inflation question dummy for correct (Baseline)']
positive_t = []
negative_t = []
outcome = 'I have bought electronics shoes or clothing with credit card'
interaction_results = []
for interaction_var in interaction_var_0:
    data = df
    data = df.dropna(subset=['treatment', outcome, 'round', 'cd_escola', interaction_var]).copy()
    data.loc[:, 'interaction_term'] = data['treatment'] * data[interaction_var]
    # Create Panel D for interaction analysis
    for round_ in [0, 1]:
        subset = data[data['round'] == round_].dropna(subset=['treatment', outcome, interaction_var, 'cd_escola'])
        subset = subset.reset_index(drop=True)
        subset['cd_escola'] = subset['cd_escola'].astype('category')
        
        bl = 'Student has bought electronics shoes or clothing with credit card (Baseline)'
        subset[f'miss_{bl}'] = 0
        subset.loc[subset[bl].isnull(), f'miss_{bl}'] = 1
        subset[bl].fillna(0, inplace=True)
        
        subset['miss_f_coded'] = 0
        subset.loc[subset['female_coded'].isnull(), 'miss_f_coded'] = 1
        subset['female_coded'].fillna(0, inplace=True)
        
        subset[f'pair_{outcome}{round_}'] = subset['pair_all']
        pair_dummies = pd.get_dummies(subset[f'pair_{outcome}{round_}'], prefix=f'pair_{round_}', drop_first=True)
        subset = pd.concat([subset, pair_dummies], axis=1)
        pair_dummies = pair_dummies.astype('int')
        
        # Add interaction term
        subset['interaction_term'] = subset['treatment'] * subset[interaction_var]

        # Panel D regression (with interaction term, clustered standard errors
        X = sm.add_constant(pd.concat([subset[['treatment', bl, f'miss_{bl}', 
                                           'female_coded', 'miss_f_coded', 
                                              interaction_var, 'interaction_term']], pair_dummies], axis=1))
        y = subset[outcome]

        model_d = sm.OLS(y, X).fit(cov_type='cluster', cov_kwds={'groups': subset['cd_escola']})

        # Calculate control group statistics for this panel
        control_group = subset[subset['treatment'] == 0]
        control_mean = control_group[outcome].mean()
        control_sd = control_group[outcome].std()

        # Collect results for Panel D
        interaction_results.append({
            'panel': 'D',
            'round': round_,
            'interaction varialbe': interaction_var,
            'outcome': outcome,
            'coeff_treatment': model_d.params['treatment'],
            'coeff_interaction': model_d.params['interaction_term'],
            'se_treatment': model_d.bse['treatment'],
            'se_interaction': model_d.bse['interaction_term'],
            'control_mean': control_mean,
            'control_sd': control_sd,
            'f_test_p_value': None, 
            'r2': model_d.rsquared,
            'N': model_d.nobs,
            'N_clust': subset['cd_escola'].nunique() })
        
        if model_d.params['treatment'] > 0: 
            positive_t.append({'interaction varialbe': interaction_var, 'round': round_})
  
        if model_d.params['treatment'] < 0: 
            negative_t.append({'interaction varialbe': interaction_var, 'round': round_,})

#Print or inspect results
#import ace_tools as tools; tools.display_dataframe_to_user(name="OLS Regression Results with Interaction Term", dataframe=pd.DataFrame(results))
#for interaction_results in interaction_results:
    #print(interaction_results)

print(positive_t)
print(negative_t.append)

[{'interaction varialbe': 'Student works in own or family business (Baseline)', 'round': 1}, {'interaction varialbe': 'Student part of income saved is non-zero (Baseline)', 'round': 1}, {'interaction varialbe': 'Student is behind on payments to bank or FI (Baseline)', 'round': 1}, {'interaction varialbe': 'Student says they are a saver (Baseline)', 'round': 1}, {'interaction varialbe': 'Student makes a list of all monthly expenses (Baseline)', 'round': 1}, {'interaction varialbe': 'Student saves money for future purchases (Baseline)', 'round': 1}, {'interaction varialbe': 'Parent is an employee (Baseline)', 'round': 0}, {'interaction varialbe': 'Parent: makes a list of all monthly expenses (Baseline)', 'round': 0}, {'interaction varialbe': 'Parent: makes a list of all monthly expenses (Baseline)', 'round': 1}, {'interaction varialbe': 'Parent: has debit card (Baseline)', 'round': 0}, {'interaction varialbe': 'Parent: has debit card (Baseline)', 'round': 1}, {'interaction varialbe': 'Pa

### Cluster 1 Interaction Variables

In [21]:
interaction_var_0 = ['Student is not working at the moment (Baseline)',
       'Student works as employee or other (Baseline)',
       'Student is behind on payments (unconditional) (Baseline)',
       'Student is behind on payments to store (Baseline)',
       'Student has bought electronics shoes or clothing on installments (Baseline)']

outcome = 'I have bought electronics shoes or clothing with credit card'
interaction_results = []
for interaction_var in interaction_var_0:
    data = df
    data = data.dropna(subset=['treatment', outcome, 'round', 'cd_escola', interaction_var])
    data.loc[:, 'interaction_term'] = data['treatment'] * data[interaction_var]
    # Create Panel D for interaction analysis
    for round_ in [0, 1]:
        subset = data[data['round'] == round_].dropna(subset=['treatment', outcome, interaction_var, 'cd_escola'])
        subset = subset.reset_index(drop=True)
        subset['cd_escola'] = subset['cd_escola'].astype('category')
        
        bl = 'Student has bought electronics shoes or clothing with credit card (Baseline)'
        subset[f'miss_{bl}'] = 0
        subset.loc[subset[bl].isnull(), f'miss_{bl}'] = 1
        subset[bl].fillna(0, inplace=True)
        
        subset['miss_f_coded'] = 0
        subset.loc[subset['female_coded'].isnull(), 'miss_f_coded'] = 1
        subset['female_coded'].fillna(0, inplace=True)
        
        subset[f'pair_{outcome}{round_}'] = subset['pair_all']
        pair_dummies = pd.get_dummies(subset[f'pair_{outcome}{round_}'], prefix=f'pair_{round_}', drop_first=True)
        subset = pd.concat([subset, pair_dummies], axis=1)
        pair_dummies = pair_dummies.astype('int')
        
        # Add interaction term
        subset['interaction_term'] = subset['treatment'] * subset[interaction_var]

        # Panel D regression (with interaction term, clustered standard errors
        X = sm.add_constant(pd.concat([subset[['treatment', bl, f'miss_{bl}', 
                                           'female_coded', 'miss_f_coded', 
                                              interaction_var, 'interaction_term']], pair_dummies], axis=1))
        y = subset[outcome]

        model_d = sm.OLS(y, X).fit(cov_type='cluster', cov_kwds={'groups': subset['cd_escola']})

        # Calculate control group statistics for this panel
        control_group = subset[subset['treatment'] == 0]
        control_mean = control_group[outcome].mean()
        control_sd = control_group[outcome].std()

        # Collect results for Panel D
        interaction_results.append({
            'panel': 'D',
            'round': round_,
            'interaction varialbe': interaction_var,
            'outcome': outcome,
            'coeff_treatment': model_d.params['treatment'],
            'coeff_interaction': model_d.params['interaction_term'],
            'se_treatment': model_d.bse['treatment'],
            'se_interaction': model_d.bse['interaction_term'],
            'control_mean': control_mean,
            'control_sd': control_sd,
            'f_test_p_value': None, 
            'r2': model_d.rsquared,
            'N': model_d.nobs,
            'N_clust': subset['cd_escola'].nunique()
        })

   
 for interaction_results in interaction_results:
    print(interaction_results)

Student is not working at the moment (Baseline)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.loc[:, 'interaction_term'] = data['treatment'] * data[interaction_var]


Student works as employee or other (Baseline)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.loc[:, 'interaction_term'] = data['treatment'] * data[interaction_var]


Student is behind on payments (unconditional) (Baseline)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.loc[:, 'interaction_term'] = data['treatment'] * data[interaction_var]


Student is behind on payments to store (Baseline)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.loc[:, 'interaction_term'] = data['treatment'] * data[interaction_var]


Student has bought electronics shoes or clothing on installments (Baseline)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.loc[:, 'interaction_term'] = data['treatment'] * data[interaction_var]


{'panel': 'D', 'round': 0, 'interaction varialbe': 'Student is not working at the moment (Baseline)', 'outcome': 'I have bought electronics shoes or clothing with credit card', 'coeff_treatment': -0.0037873351986393708, 'coeff_interaction': 0.0019564755857185982, 'se_treatment': 0.007048907449884463, 'se_interaction': 0.013824858512800447, 'control_mean': 0.24911536, 'control_sd': 0.43253278732299805, 'f_test_p_value': None, 'r2': 0.25693062220658314, 'N': 14869.0, 'N_clust': 840}
{'panel': 'D', 'round': 1, 'interaction varialbe': 'Student is not working at the moment (Baseline)', 'outcome': 'I have bought electronics shoes or clothing with credit card', 'coeff_treatment': 0.006708606414392982, 'coeff_interaction': -0.011092146662088272, 'se_treatment': 0.00931346265056363, 'se_interaction': 0.017414048781635005, 'control_mean': 0.27704054, 'control_sd': 0.4475654661655426, 'f_test_p_value': None, 'r2': 0.18822524420580156, 'N': 11023.0, 'N_clust': 796}
{'panel': 'D', 'round': 0, 'inte

### Cluster 2 Interaction Variables

In [5]:
interaction_var_0 = ['Student\'s Family receives Bolsa Familia cash transfer (Baseline)',
       'Student has borrowed money (any source) (Baseline)',
       'Student is behind on payments to family friends or other people (Baseline)',
       'Parent occupation is other (homemaker, retired, unemployed, other) (Baseline)']

outcome = 'I have bought electronics shoes or clothing with credit card'
interaction_results = []
for interaction_var in interaction_var_0:
    data = df
    data = data.dropna(subset=['treatment', outcome, 'round', 'cd_escola', interaction_var])
    data.loc[:, 'interaction_term'] = data['treatment'] * data[interaction_var]
    # Create Panel D for interaction analysis
    for round_ in [0, 1]:
        subset = data[data['round'] == round_].dropna(subset=['treatment', outcome, interaction_var, 'cd_escola'])
        subset = subset.reset_index(drop=True)
        subset['cd_escola'] = subset['cd_escola'].astype('category')
        
        bl = 'Student has bought electronics shoes or clothing with credit card (Baseline)'
        subset[f'miss_{bl}'] = 0
        subset.loc[subset[bl].isnull(), f'miss_{bl}'] = 1
        subset[bl].fillna(0, inplace=True)
        
        subset['miss_f_coded'] = 0
        subset.loc[subset['female_coded'].isnull(), 'miss_f_coded'] = 1
        subset['female_coded'].fillna(0, inplace=True)
        
        subset[f'pair_{outcome}{round_}'] = subset['pair_all']
        pair_dummies = pd.get_dummies(subset[f'pair_{outcome}{round_}'], prefix=f'pair_{round_}', drop_first=True)
        subset = pd.concat([subset, pair_dummies], axis=1)
        pair_dummies = pair_dummies.astype('int')
        
        # Add interaction term
        subset['interaction_term'] = subset['treatment'] * subset[interaction_var]

        # Panel D regression (with interaction term, clustered standard errors
        X = sm.add_constant(pd.concat([subset[['treatment', bl, f'miss_{bl}', 
                                           'female_coded', 'miss_f_coded', 
                                              interaction_var, 'interaction_term']], pair_dummies], axis=1))
        y = subset[outcome]

        model_d = sm.OLS(y, X).fit(cov_type='cluster', cov_kwds={'groups': subset['cd_escola']})

        # Calculate control group statistics for this panel
        control_group = subset[subset['treatment'] == 0]
        control_mean = control_group[outcome].mean()
        control_sd = control_group[outcome].std()

        # Collect results for Panel D
        interaction_results.append({
            'panel': 'D',
            'round': round_,
            'interaction varialbe': interaction_var,
            'outcome': outcome,
            'coeff_treatment': model_d.params['treatment'],
            'coeff_interaction': model_d.params['interaction_term'],
            'se_treatment': model_d.bse['treatment'],
            'se_interaction': model_d.bse['interaction_term'],
            'control_mean': control_mean,
            'control_sd': control_sd,
            'f_test_p_value': None, 
            'r2': model_d.rsquared,
            'N': model_d.nobs,
            'N_clust': subset['cd_escola'].nunique()
        })

# Print or inspect results
#import ace_tools as tools; tools.display_dataframe_to_user(name="OLS Regression Results with Interaction Term", dataframe=pd.DataFrame(results))
for interaction_results in interaction_results:
    print(interaction_results)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.loc[:, 'interaction_term'] = data['treatment'] * data[interaction_var]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.loc[:, 'interaction_term'] = data['treatment'] * data[interaction_var]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.loc[:, 'interaction_term'] = data['treatment'] *

{'panel': 'D', 'round': 0, 'interaction varialbe': "Student's Family receives Bolsa Familia cash transfer (Baseline)", 'outcome': 'I have bought electronics shoes or clothing with credit card', 'coeff_treatment': 0.00121846545543026, 'coeff_interaction': -0.010381619405374545, 'se_treatment': 0.00784942145212125, 'se_interaction': 0.014159759912600831, 'control_mean': 0.25022393, 'control_sd': 0.433175653219223, 'f_test_p_value': None, 'r2': 0.25139388843616006, 'N': 13990.0, 'N_clust': 837}
{'panel': 'D', 'round': 1, 'interaction varialbe': "Student's Family receives Bolsa Familia cash transfer (Baseline)", 'outcome': 'I have bought electronics shoes or clothing with credit card', 'coeff_treatment': -0.007443285810048134, 'coeff_interaction': 0.004320906774172677, 'se_treatment': 0.009816451118732446, 'se_interaction': 0.01719832244321929, 'control_mean': 0.2792584, 'control_sd': 0.44868266582489014, 'f_test_p_value': None, 'r2': 0.18141128180109467, 'N': 10375.0, 'N_clust': 791}
{'pa

### Cluster 3 Interaction Variable

In [14]:
interaction_var_0 = ['Student is female (Baseline)',
       'Student has computer with internet at home (Baseline)',
       'Student receives income (Baseline)',
       'Student has bought electronics shoes or clothing with cash/debit card (Baseline)',
       'Student negotiates prices or payment methods (Baseline)',
       'Student comparison shops before making purchase (Baseline)',
       'Parent: has savings account (Baseline)',
       ' Parent: has formal savings (Baseline)',
       'Parent: Student talks to you about finances (Baseline)',
       'Parent: Student helps organize HH budget (Baseline)']

outcome = 'I have bought electronics shoes or clothing with credit card'
interaction_results = []
for interaction_var in interaction_var_0:
    data = df
    data = data.dropna(subset=['treatment', outcome, 'round', 'cd_escola', interaction_var])
    data.loc[:, 'interaction_term'] = data['treatment'] * data[interaction_var]
    # Create Panel D for interaction analysis
    for round_ in [0, 1]:
        subset = data[data['round'] == round_].dropna(subset=['treatment', outcome, interaction_var, 'cd_escola'])
        subset = subset.reset_index(drop=True)
        subset['cd_escola'] = subset['cd_escola'].astype('category')
        
        bl = 'Student has bought electronics shoes or clothing with credit card (Baseline)'
        subset[f'miss_{bl}'] = 0
        subset.loc[subset[bl].isnull(), f'miss_{bl}'] = 1
        subset[bl].fillna(0, inplace=True)
        
        subset['miss_f_coded'] = 0
        subset.loc[subset['female_coded'].isnull(), 'miss_f_coded'] = 1
        subset['female_coded'].fillna(0, inplace=True)
        
        subset[f'pair_{outcome}{round_}'] = subset['pair_all']
        pair_dummies = pd.get_dummies(subset[f'pair_{outcome}{round_}'], prefix=f'pair_{round_}', drop_first=True)
        subset = pd.concat([subset, pair_dummies], axis=1)
        pair_dummies = pair_dummies.astype('int')
        
        # Add interaction term
        subset['interaction_term'] = subset['treatment'] * subset[interaction_var]

        # Panel D regression (with interaction term, clustered standard errors
        X = sm.add_constant(pd.concat([subset[['treatment', bl, f'miss_{bl}', 
                                           'female_coded', 'miss_f_coded', 
                                              interaction_var, 'interaction_term']], pair_dummies], axis=1))
        y = subset[outcome]

        model_d = sm.OLS(y, X).fit(cov_type='cluster', cov_kwds={'groups': subset['cd_escola']})

        # Calculate control group statistics for this panel
        control_group = subset[subset['treatment'] == 0]
        control_mean = control_group[outcome].mean()
        control_sd = control_group[outcome].std()

        # Collect results for Panel D
        interaction_results.append({
            'panel': 'D',
            'round': round_,
            'interaction varialbe': interaction_var,
            'outcome': outcome,
            'coeff_treatment': model_d.params['treatment'],
            'coeff_interaction': model_d.params['interaction_term'],
            'se_treatment': model_d.bse['treatment'],
            'se_interaction': model_d.bse['interaction_term'],
            'control_mean': control_mean,
            'control_sd': control_sd,
            'f_test_p_value': None, 
            'r2': model_d.rsquared,
            'N': model_d.nobs,
            'N_clust': subset['cd_escola'].nunique()
        })

# Print or inspect results
#import ace_tools as tools; tools.display_dataframe_to_user(name="OLS Regression Results with Interaction Term", dataframe=pd.DataFrame(results))
for interaction_results in interaction_results:
    print(interaction_results)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.loc[:, 'interaction_term'] = data['treatment'] * data[interaction_var]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.loc[:, 'interaction_term'] = data['treatment'] * data[interaction_var]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.loc[:, 'interaction_term'] = data['treatment'] *

{'panel': 'D', 'round': 0, 'interaction varialbe': 'Student is female (Baseline)', 'outcome': 'I have bought electronics shoes or clothing with credit card', 'coeff_treatment': 0.0009443534909191441, 'coeff_interaction': -0.006518116620920198, 'se_treatment': 0.009398917691288772, 'se_interaction': 0.013266810819677633, 'control_mean': 0.24944353, 'control_sd': 0.4327235221862793, 'f_test_p_value': None, 'r2': 0.24950294383474336, 'N': 14084.0, 'N_clust': 837}
{'panel': 'D', 'round': 1, 'interaction varialbe': 'Student is female (Baseline)', 'outcome': 'I have bought electronics shoes or clothing with credit card', 'coeff_treatment': -0.0010270456276591823, 'coeff_interaction': -0.007233552096475696, 'se_treatment': 0.012569619176013413, 'se_interaction': 0.01747389326464412, 'control_mean': 0.278119, 'control_sd': 0.4481126666069031, 'f_test_p_value': None, 'r2': 0.18048512916991688, 'N': 10450.0, 'N_clust': 791}
{'panel': 'D', 'round': 0, 'interaction varialbe': 'Student has computer

In [None]:
'Student has computer with internet at home (Baseline)',
'Student says they are a saver (Baseline)',
'Student receives income (Baseline)',
'Student has borrowed money (any source) (Baseline)',
'Student has bought electronics, shoes, or clothing with cash/debit card (Baseline)'

In [7]:
interaction_var_0 = ['Student has computer with internet at home (Baseline)',
'Student says they are a saver (Baseline)',
'Student receives income (Baseline)',
'Student has borrowed money (any source) (Baseline)',
'Student has bought electronics shoes or clothing with cash/debit card (Baseline)']

outcome = 'I have bought electronics shoes or clothing with credit card'
interaction_results = []
for interaction_var in interaction_var_0:
    data = df
    data = data.dropna(subset=['treatment', outcome, 'round', 'cd_escola', interaction_var])
    data.loc[:, 'interaction_term'] = data['treatment'] * data[interaction_var]
    # Create Panel D for interaction analysis
    for round_ in [0, 1]:
        subset = data[data['round'] == round_].dropna(subset=['treatment', outcome, interaction_var, 'cd_escola'])
        subset = subset.reset_index(drop=True)
        subset['cd_escola'] = subset['cd_escola'].astype('category')
        
        bl = 'Student has bought electronics shoes or clothing with credit card (Baseline)'
        subset[f'miss_{bl}'] = 0
        subset.loc[subset[bl].isnull(), f'miss_{bl}'] = 1
        subset[bl].fillna(0, inplace=True)
        
        subset['miss_f_coded'] = 0
        subset.loc[subset['female_coded'].isnull(), 'miss_f_coded'] = 1
        subset['female_coded'].fillna(0, inplace=True)
        
        subset[f'pair_{outcome}{round_}'] = subset['pair_all']
        pair_dummies = pd.get_dummies(subset[f'pair_{outcome}{round_}'], prefix=f'pair_{round_}', drop_first=True)
        subset = pd.concat([subset, pair_dummies], axis=1)
        pair_dummies = pair_dummies.astype('int')
        
        # Add interaction term
        subset['interaction_term'] = subset['treatment'] * subset[interaction_var]

        # Panel D regression (with interaction term, clustered standard errors
        X = sm.add_constant(pd.concat([subset[['treatment', bl, f'miss_{bl}', 
                                           'female_coded', 'miss_f_coded', 
                                              interaction_var, 'interaction_term']], pair_dummies], axis=1))
        y = subset[outcome]

        model_d = sm.OLS(y, X).fit(cov_type='cluster', cov_kwds={'groups': subset['cd_escola']})

        # Calculate control group statistics for this panel
        control_group = subset[subset['treatment'] == 0]
        control_mean = control_group[outcome].mean()
        control_sd = control_group[outcome].std()
        
        # Perform an F-test for joint significance of 'treatment' and 'interaction_term'
        f_test = model_d.f_test("treatment = 0, interaction_term = 0")
        p_value_f_test = f_test.pvalue

        f_test = model_d.f_test("treatment = 0, interaction_term = 0")
        p_value_f_test = f_test.pvalue

        # Collect results for Panel D
        interaction_results.append({
        'panel': 'D',
        'round': round_,
        'interaction varialbe': interaction_var,
        'outcome': outcome,
        'coeff_treatment': model_d.params['treatment'],
        'coeff_interaction': model_d.params['interaction_term'],
        'se_treatment': model_d.bse['treatment'],
        'se_interaction': model_d.bse['interaction_term'],
        'p_value_treatment': model_d.pvalues['treatment'],  # Add p-value for treatment
        'p_value_interaction': model_d.pvalues['interaction_term'],  # Add p-value for interaction term
        'control_mean': control_mean,
        'control_sd': control_sd,
        'f_test_p_value': None, 
        'r2': model_d.rsquared,
        'N': model_d.nobs,
        'N_clust': subset['cd_escola'].nunique()
        })

# Print or inspect results
#import ace_tools as tools; tools.display_dataframe_to_user(name="OLS Regression Results with Interaction Term", dataframe=pd.DataFrame(results))
for interaction_results in interaction_results:
    print(interaction_results)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.loc[:, 'interaction_term'] = data['treatment'] * data[interaction_var]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.loc[:, 'interaction_term'] = data['treatment'] * data[interaction_var]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.loc[:, 'interaction_term'] = data['treatment'] *

{'panel': 'D', 'round': 0, 'interaction varialbe': 'Student has computer with internet at home (Baseline)', 'outcome': 'I have bought electronics shoes or clothing with credit card', 'coeff_treatment': -0.00922743385404752, 'coeff_interaction': 0.010404930430737504, 'se_treatment': 0.009988129214650926, 'se_interaction': 0.013627403565423527, 'p_value_treatment': 0.35556959656534215, 'p_value_interaction': 0.4451474222801831, 'control_mean': 0.24954847, 'control_sd': 0.43277812004089355, 'f_test_p_value': None, 'r2': 0.25495766556144417, 'N': 13888.0, 'N_clust': 837}
{'panel': 'D', 'round': 1, 'interaction varialbe': 'Student has computer with internet at home (Baseline)', 'outcome': 'I have bought electronics shoes or clothing with credit card', 'coeff_treatment': -0.007836750299676676, 'coeff_interaction': 0.0034171470436215982, 'se_treatment': 0.012988156119043168, 'se_interaction': 0.01803956248383264, 'p_value_treatment': 0.5462581433788041, 'p_value_interaction': 0.84975960512649