In [18]:
import os
import pandas as pd
import numpy as np
from scipy import stats
from statsmodels.stats.multicomp import pairwise_tukeyhsd
from scipy.stats import f_oneway

current_dir = os.getcwd()
project_root = os.path.dirname(os.path.dirname(current_dir))
PROCESSED_DATA_PATH = os.path.join(project_root, "data", "processed")
RAW_DATA_PATH = os.path.join(project_root, "data", "raw")
OUTPUT_PATH = os.path.join(project_root, "output")
REPORTS_TABLES_PATH = os.path.join(project_root, "reports", "tables")

In [19]:
df = pd.read_csv(PROCESSED_DATA_PATH + "/dummy-efa.csv")

In [20]:
data = df

In [21]:
jit_practices = pd.DataFrame()
environmental_performance = pd.DataFrame()
environmental_practices = pd.DataFrame()

for column in data.columns:
    if column.startswith("LINKCN05") \
    or column.startswith("SCHEDN02") \
    or column.startswith("SCHEDN01") \
    or column.startswith("LINKCN03") \
    or column.startswith("LINKCN04") \
    or column.startswith("LINKCN01") \
    or column.startswith("LAYOUTN01") \
    or column.startswith("LAYOUTN04") :
        jit_practices[column] = data[column]

    if ( 
        column.startswith("EPERFX06") 
        or column.startswith("EPERFX04") 
        or column.startswith("EPERFX05") 
        or column.startswith("EPERFX03") 
        or column.startswith("EPERFX02") 
        or column.startswith("EPERFX07") 
        or column.startswith("EPERFX01") 
        or column.startswith("EPERFX09") 
    ):
        environmental_performance[column] = data[column]

    # if (
    #     column.startswith("ENVRTX08")
    #     or column.startswith("ENVRTX05")
    #     or column.startswith("ENVRTX23")
    #     or column.startswith("EPRACX02")
    #     or column.startswith("ENVRTX14")
    #     or column.startswith("ENVRTX15")
    #     or column.startswith("EPRACX03")
    #     or column.startswith("ENVRTX24")
    # ):
    #     environmental_practices[column] = data[column]
    ### SUPPLY CLAIN BELOW
    if (
        column.startswith("ENVRTX32")
        or column.startswith("ENVRTX33")
        or column.startswith("ENVRTX37")
        or column.startswith("ENVRTX36")
        or column.startswith("ENVRTX29")
        or column.startswith("ENVRTX40")
        or column.startswith("ENVRTX34")
        or column.startswith("ENVRTX35")
    ):
        environmental_practices[column] = data[column]

bundles = [jit_practices, environmental_performance, environmental_practices]

In [22]:
environmental_practices.columns

Index(['ENVRTX37', 'ENVRTX33', 'ENVRTX32', 'ENVRTX34', 'ENVRTX29', 'ENVRTX40',
       'ENVRTX35', 'ENVRTX36'],
      dtype='object')

In [23]:
import pandas as pd
import statsmodels.formula.api as smf

# Assuming 'df' is your pandas DataFrame with the data

# Step 1: Create composite scores
df['Env_Score'] = df[['ENVRTX08', 'ENVRTX05', 'ENVRTX23', 'EPRACX02', 'ENVRTX14', 'ENVRTX15',
                       'ENVRTX03', 'ENVRTX24']].mean(axis=1)

df['JIT_Score'] = df[['LINKCN05', 'SCHEDN02', 'SCHEDN01', 'LINKCN03', 'LINKCN04',
                       'LINKCN01', 'LAYOUTN01', 'LAYOUTN04']].mean(axis=1)

# Step 2: Create interaction term
df['JIT_Env_Interaction'] = df['JIT_Score'] * df['Env_Score']

# Step 3: Regression Analysis for Emissions to Air with Control Variable
model_air = smf.ols('EPERFX05 ~ Env_Score + JIT_Score + JIT_Env_Interaction + ACCTGX51', data=df).fit()
print(model_air.summary())

# Step 4: Regression Analysis for Solid Waste Generation with Control Variable
model_waste = smf.ols('EPERFX07 ~ Env_Score + JIT_Score + JIT_Env_Interaction + ACCTGX51', data=df).fit()
print(model_waste.summary())


                            OLS Regression Results                            
Dep. Variable:               EPERFX05   R-squared:                       0.128
Model:                            OLS   Adj. R-squared:                  0.108
Method:                 Least Squares   F-statistic:                     6.263
Date:                Tue, 28 Nov 2023   Prob (F-statistic):           0.000100
Time:                        00:43:10   Log-Likelihood:                -187.06
No. Observations:                 175   AIC:                             384.1
Df Residuals:                     170   BIC:                             399.9
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                          coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------------
Intercept               2.2797    

In [24]:
import pandas as pd

def regression_to_latex(df, file_name, label="tab:regression", caption="Regression Results"):
    """
    Convert a pandas DataFrame containing regression results to a LaTeX table, save it to a file,
    apply formatting to the numeric columns, add a significance column, and include label and caption
    for the table in LaTeX.

    Parameters:
    - df: pandas DataFrame to convert, containing regression results.
    - file_name: Name of the file to save the LaTeX code.
    - label: Label for the table in LaTeX.
    - caption: Caption for the table in LaTeX.
    """
    
    # Reset index to turn the index (coefficient names) into a regular column
    df = df.reset_index()
    df.columns = ['Coefficient', 'Coef.', 'Std.Err.', 't', 'P>|t|', '[0.025', '0.975]']

    # Escape all LaTeX special characters in the DataFrame
    df = df.replace('&', '\\&', regex=True).replace('%', '\\%', regex=True)

    # Escape underscores in coefficient names
    df['Coefficient'] = df['Coefficient'].str.replace('_', '\\_')

    # Format numbers and round to two decimal places
    numeric_columns = ['Coef.', 'Std.Err.', 't', 'P>|t|', '[0.025', '0.975]']
    for col in numeric_columns:
        df[col] = df[col].apply(lambda x: f"{x:.2f}" if isinstance(x, float) else x)

    # Add significance column
    def significance_stars(p_value):
        if p_value < 0.01:
            return '***'
        elif p_value < 0.05:
            return '**'
        elif p_value < 0.1:
            return '*'
        return ''

    df['Sig.'] = df['P>|t|'].apply(lambda p: significance_stars(float(p)))

    # Create the LaTeX table code without the longtable format
    latex_code = df.to_latex(index=False, header=True, escape=False, column_format="lccccccc")

    # Wrap the tabular environment with the table float environment
    table_float = r'''\begin{table}[htbp]
    \centering
    \caption{''' + caption + r'''}
    \label{''' + label + r'''}
    ''' + latex_code + r'''
    \end{table}
    '''

    # Save to file
    with open(file_name, 'w') as file:
        file.write(table_float)

# Example usage
# regression_to_latex(results_air, 'regression_air_table.tex')

In [17]:
results_air = model_air.summary2().tables[1]  
results_waste = model_waste.summary2().tables[1]
regression_to_latex(results_air, REPORTS_TABLES_PATH + '/regression_air_table_efa_sup.tex', caption="Emissions to Air - Regression Results (Environmental Practices 2 - Suppliers)")
regression_to_latex(results_waste, REPORTS_TABLES_PATH + '/regression_waste_table_efa_sup.tex', caption="Solid Waste Generation - Regression Results (Environmental Practices 2 - Suppliers)")