In [None]:
# Fit mixed effects model with fewer variables
        mixed_model_log = MixedLM(y, X_simplified, groups)
        try:
            mixed_results_log = mixed_model_log.fit()
            mixed_summary_log = str(mixed_results_log.summary())
            print("\nApproximated Multilevel Model Results:")
            print(mixed_summary_log)
            
            # Add to document
            doc.add_paragraph('Model Summary with fewer variables:')
            summary_paragraph_log = doc.add_paragraph()
            summary_run_log = summary_paragraph_log.add_run(mixed_summary_log)
            summary_run_log.font.name = 'Courier New'  # Use monospace font
            #summary_run.font.size = Pt(10)  # Optional: Adjust font size
            #for line in mixed_summary.split('\n'):
                #doc.add_paragraph(line)
            
            # Add variance components
            doc.add_paragraph('\nVariance Components with fewer variables:')
            vc_table = doc.add_table(rows=3, cols=2)
            vc_table.style = 'Table Grid'
            vc_table.cell(0, 0).text = 'Component'
            vc_table.cell(0, 1).text = 'Estimate'
            vc_table.cell(1, 0).text = 'State Random Effect Variance'
            vc_table.cell(1, 1).text = f"{mixed_results_log.cov_re.iloc[0, 0]:.4f}"
            vc_table.cell(2, 0).text = 'Residual Variance'
            vc_table.cell(2, 1).text = f"{mixed_results_log.scale:.4f}"
            
            # Calculate intraclass correlation coefficient (ICC)
            state_var_log = mixed_results_log.cov_re.iloc[0, 0]
            residual_var_log = mixed_results_log.scale
            icc_log = state_var_log / (state_var_log + residual_var_log)

            # Add model summary to document
            #doc.add_paragraph('\nMixed Summary:')
            #for line in mixed_summary.split('\n'):
                #doc.add_paragraph(line)
            
            # Convert coefficients to incident rate ratios (IRR)
            print("\nIncident Rate Ratios (IRR) Mixed Model:")
            irr_mixed_log = np.exp(mixed_results_log.params)
            irr_conf_mixed_log = np.exp(mixed_results_log.conf_int())
            irr_df_mixed_log = pd.DataFrame({'IRR': irr_mixed_log, 'Lower CI': irr_conf_mixed_log[0], 'Upper CI': irr_conf_mixed_log[1], 
                                'P-value': mixed_results_log.pvalues})
            print(irr_df_mixed_log)
            
            # Add IRR table to document
            doc.add_paragraph('\n')
            doc.add_heading('Incident Rate Ratios (IRR) Mixed Model', level=2)
            irr_table_mixed = doc.add_table(rows=len(irr_df_mixed_log)+1, cols=5)
            irr_table_mixed.style = 'Table Grid'
            irr_table_mixed.cell(0, 0).text = 'Variable'
            irr_table_mixed.cell(0, 1).text = 'IRR'
            irr_table_mixed.cell(0, 2).text = 'Lower CI'
            irr_table_mixed.cell(0, 3).text = 'Upper CI'
            irr_table_mixed.cell(0, 4).text = 'P-value'
            
            for i, (var, row) in enumerate(irr_df_mixed_log.iterrows(), 1):
                irr_table_mixed.cell(i, 0).text = str(var)
                irr_table_mixed.cell(i, 1).text = f"{row['IRR']:.4f}"
                irr_table_mixed.cell(i, 2).text = f"{row['Lower CI']:.4f}"
                irr_table_mixed.cell(i, 3).text = f"{row['Upper CI']:.4f}"
                irr_table_mixed.cell(i, 4).text = f"{row['P-value']:.4f}"
            
            doc.add_paragraph(f'\nIntraclass Correlation Coefficient (ICC): {icc:.4f}')
            doc.add_paragraph('The ICC represents the proportion of the total variance in length of stay ' +
                             'that is attributable to differences between states.')
            # After fitting the mixed model
            # Compute residuals and fitted values
            df_clean_model['fitted_log'] = mixed_results_log.fittedvalues
            df_clean_model['residuals_log'] = mixed_results_log.resid

            # Plot residuals vs fitted values
            plt.figure(figsize=(10, 6))
            plt.scatter(df_clean_model['fitted_log'], df_clean_model['residuals_log'], alpha=0.5)
            plt.axhline(y=0, color='r', linestyle='-')
            plt.xlabel('Fitted Values')
            plt.ylabel('Residuals')
            plt.title('Residuals vs Fitted Values')
            plt.tight_layout()
            residuals_vs_fitted_img_log = BytesIO()
            plt.savefig(residuals_vs_fitted_img_log, format='png')
            residuals_vs_fitted_img_log.seek(0)
            plt.close()

            # Add to document
            doc.add_paragraph('\n')
            doc.add_heading('Post-Estimation Diagnostics', level=2)
            doc.add_picture(residuals_vs_fitted_img_log, width=Inches(6))
            doc.add_paragraph('Figure 5: Residuals vs Fitted Values for logr Model')

            # Q-Q plot for normality
            plt.figure(figsize=(10, 6))
            stats.probplot(df_clean_model['residuals_log'], dist="norm", plot=plt)
            plt.title('Q-Q Plot of Residuals logr Model')
            plt.tight_layout()
            qq_plot_img_log = BytesIO()
            plt.savefig(qq_plot_img_log, format='png')
            qq_plot_img_log.seek(0)
            plt.close()

            # Add to document
            doc.add_picture(qq_plot_img_log, width=Inches(6))
            doc.add_paragraph('Figure 6: Q-Q Plot of Residuals with fewer variables')

            # Fit a log linear model (no random effects)
            ols_model = smf.ols(formula, df_clean_nb)
            ols_results = ols_model.fit()

            # Compute the likelihood ratio test
            lr_stat_log = -2 * (ols_results.llf - mixed_results_log.llf)
            p_value_log = stats.chi2.sf(lr_stat_log, df=1)  # df=1 for one random effect
            doc.add_paragraph(f'\nLikelihood Ratio Test for Random Effects with log Model: Statistic = {lr_stat_log:.2f}, P-value = {p_value_log:.4f}')


            # Pseudo-R² (McFadden's R² approximation)
            null_model_log = smf.mixedlm("los_capped ~ 1", df_clean_model, groups=df_clean_model['us_state_enc'])
            null_results_log = null_model_log.fit()
            pseudo_r2_log = 1 - (mixed_results_log.llf / null_results_log.llf)
            doc.add_paragraph(f'\nPseudo-R² (McFadden) fewer variables: {pseudo_r2_log:.4f}')

            

            # Check VIF for continuous variables
            from statsmodels.stats.outliers_influence import variance_inflation_factor
            X_continuous_log = X_simplified[[col for col in X_simplified.columns if col != 'const']]  # Exclude intercept
            vif_data_log = pd.DataFrame()
            vif_data_log["Variable"] = X_continuous_log.columns
            vif_data_log["VIF"] = [variance_inflation_factor(X_continuous_log.values, i) for i in range(X_continuous_log.shape[1])]
            print("VIF for continuous and dummy variables with fewer variables:")
            print(vif_data_log)
            doc.add_paragraph('\nVariance Inflation Factor (VIF) for Continuous and Categorical Variables with fewer variables:')
            vif_table = doc.add_table(rows=len(vif_data_log)+1, cols=2)
            vif_table.style = 'Table Grid'
            vif_table.cell(0, 0).text = 'Variable'
            vif_table.cell(0, 1).text = 'VIF'
            for i, (var, vif) in enumerate(zip(vif_data_log["Variable"], vif_data_log["VIF"]), 1):
                vif_table.cell(i, 0).text = str(var)
                vif_table.cell(i, 1).text = f"{vif:.4f}"
            doc.add_paragraph('VIF values above 10 indicate potential multicollinearity issues.')
        except ValueError as ve:
            error_msg = f"ValueError in mixed model fitting: {str(ve)}"
            print(error_msg)
            doc.add_paragraph(error_msg)
            doc.add_paragraph("The mixed effects model failed to converge. This can happen due to " +
                             "insufficient variation in the grouping variable or other model specification issues.")
        except RuntimeError as re:
            error_msg = f"RuntimeError in mixed model fitting: {str(re)}"
            print(error_msg)
            doc.add_paragraph(error_msg)
            doc.add_paragraph("The mixed effects model failed to converge. This can happen due to " +
                             "insufficient variation in the grouping variable or other model specification issues.")    


        except Exception as e:
            error_msg = f"Error fitting mixed model: {str(e)}"
            print(error_msg)
            doc.add_paragraph(error_msg)
            doc.add_paragraph("The mixed effects model failed to converge. This can happen due to " +
                             "insufficient variation in the grouping variable or other model specification issues.")