In [1]:
import os
import pandas as pd
import numpy as np
from scipy import stats
from statsmodels.stats.multicomp import pairwise_tukeyhsd
from scipy.stats import f_oneway

current_dir = os.getcwd()
project_root = os.path.dirname(os.path.dirname(current_dir))
PROCESSED_DATA_PATH = os.path.join(project_root, "data", "processed")
RAW_DATA_PATH = os.path.join(project_root, "data", "raw")
OUTPUT_PATH = os.path.join(project_root, "output")
REPORTS_TABLES_PATH = os.path.join(project_root, "reports", "tables")

In [2]:
df = pd.read_csv(PROCESSED_DATA_PATH + "/dummy.csv")

In [3]:
data = df

In [4]:
environmental_practices = pd.DataFrame()
environmental_performance = pd.DataFrame()
jit_practices = pd.DataFrame()

for column in data.columns:
    if column.startswith("ENVRTX") or column.startswith("EPRACX"):
        environmental_practices[column] = data[column]

    if column.startswith("EPERFX"):
        environmental_performance[column] = data[column]

    if (
        column.startswith("LAYOUT")
        or column.startswith("JITDEL")
        or column.startswith("KANBAN")
        or column.startswith("LINKCN")
        or column.startswith("SCHEDN")
        or column.startswith("SETUPN")
    ):
        jit_practices[column] = data[column]

bundles = [jit_practices, environmental_practices, environmental_performance]

In [5]:
environmental_practices.columns
jit_practices.columns

Index(['LAYOUTN01', 'LAYOUTN02', 'LAYOUTN03', 'LAYOUTN04', 'JITDELN01',
       'JITDELN02', 'JITDELN03', 'KANBANN01', 'KANBANN02', 'KANBANN03',
       'LINKCN01', 'LINKCN02', 'LINKCN03', 'LINKCN04', 'LINKCN05', 'SCHEDN01',
       'SCHEDN02', 'SETUPN01', 'SETUPN02', 'SETUPN03'],
      dtype='object')

In [26]:
import pandas as pd
import statsmodels.formula.api as smf

# Assuming 'df' is your pandas DataFrame with the data

# Step 1: Create composite scores
df['Env_Score'] = df[['ENVRTX21', 'ENVRTX37', 'ENVRTX02', 'ENVRTX22', 'ENVRTX39', 'ENVRTX23',
                       'ENVRTX18', 'ENVRTX13', 'ENVRTX33', 'ENVRTX03', 'ENVRTX20', 'ENVRTX38',
                       'ENVRTX08', 'ENVRTX05', 'ENVRTX30', 'ENVRTX24', 'ENVRTX32', 'ENVRTX34',
                       'ENVRTX04', 'ENVRTX29', 'ENVRTX41', 'ENVRTX40', 'ENVRTX09', 'ENVRTX17',
                       'ENVRTX07', 'ENVRTX11', 'ENVRTX10', 'ENVRTX01', 'ENVRTX14', 'ENVRTX15',
                       'ENVRTX12', 'ENVRTX31', 'ENVRTX35', 'ENVRTX36', 'ENVRTX06', 'EPRACX01',
                       'EPRACX02', 'EPRACX03', 'EPRACX04', 'EPRACX05', 'EPRACX06']].mean(axis=1)

df['JIT_Score'] = df[['LAYOUTN01', 'LAYOUTN02', 'LAYOUTN03', 'LAYOUTN04', 'JITDELN01',
                       'JITDELN02', 'JITDELN03', 'KANBANN01', 'KANBANN02', 'KANBANN03',
                       'LINKCN01', 'LINKCN02', 'LINKCN03', 'LINKCN04', 'LINKCN05', 'SCHEDN01',
                       'SCHEDN02', 'SETUPN01', 'SETUPN02', 'SETUPN03']].mean(axis=1)

# Step 2: Create interaction term
df['JIT_Env_Interaction'] = df['JIT_Score'] * df['Env_Score']

# Step 3: Regression Analysis for Emissions to Air
model_air = smf.ols('EPERFX05 ~ Env_Score + JIT_Score + JIT_Env_Interaction', data=df).fit()
print(model_air.summary())

# Step 4: Regression Analysis for Solid Waste Generation
model_waste = smf.ols('EPERFX07 ~ Env_Score + JIT_Score + JIT_Env_Interaction', data=df).fit()
print(model_waste.summary())


                            OLS Regression Results                            
Dep. Variable:               EPERFX05   R-squared:                       0.202
Model:                            OLS   Adj. R-squared:                  0.188
Method:                 Least Squares   F-statistic:                     14.39
Date:                Wed, 15 Nov 2023   Prob (F-statistic):           2.12e-08
Time:                        21:28:07   Log-Likelihood:                -179.40
No. Observations:                 175   AIC:                             366.8
Df Residuals:                     171   BIC:                             379.5
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                          coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------------
Intercept              -0.8686    

In [36]:
import pandas as pd

def regression_to_latex(df, file_name, label="tab:regression", caption="Regression Results"):
    """
    Convert a pandas DataFrame containing regression results to a LaTeX table, save it to a file,
    apply formatting to the numeric columns, add a significance column, and include label and caption
    for the table in LaTeX.

    Parameters:
    - df: pandas DataFrame to convert, containing regression results.
    - file_name: Name of the file to save the LaTeX code.
    - label: Label for the table in LaTeX.
    - caption: Caption for the table in LaTeX.
    """
    
    # Reset index to turn the index (coefficient names) into a regular column
    df = df.reset_index()
    df.columns = ['Coefficient', 'Coef.', 'Std.Err.', 't', 'P>|t|', '[0.025', '0.975]']

    # Escape all LaTeX special characters in the DataFrame
    df = df.replace('&', '\\&', regex=True).replace('%', '\\%', regex=True)

    # Escape underscores in coefficient names
    df['Coefficient'] = df['Coefficient'].str.replace('_', '\\_')

    # Format numbers and round to two decimal places
    numeric_columns = ['Coef.', 'Std.Err.', 't', 'P>|t|', '[0.025', '0.975]']
    for col in numeric_columns:
        df[col] = df[col].apply(lambda x: f"{x:.2f}" if isinstance(x, float) else x)

    # Add significance column
    def significance_stars(p_value):
        if p_value < 0.01:
            return '***'
        elif p_value < 0.05:
            return '**'
        elif p_value < 0.1:
            return '*'
        return ''

    df['Sig.'] = df['P>|t|'].apply(lambda p: significance_stars(float(p)))

    # Create the LaTeX table code without the longtable format
    latex_code = df.to_latex(index=False, header=True, escape=False, column_format="lccccccc")

    # Wrap the tabular environment with the table float environment
    table_float = r'''\begin{table}[htbp]
    \centering
    \caption{''' + caption + r'''}
    \label{''' + label + r'''}
    ''' + latex_code + r'''
    \end{table}
    '''

    # Save to file
    with open(file_name, 'w') as file:
        file.write(table_float)

# Example usage
# regression_to_latex(results_air, 'regression_air_table.tex')

In [37]:
results_air = model_air.summary2().tables[1]  
results_waste = model_waste.summary2().tables[1]
regression_to_latex(results_air, REPORTS_TABLES_PATH + '/regression_air_table.tex', caption="Emissions to Air - Regression Results")
regression_to_latex(results_waste, REPORTS_TABLES_PATH + '/regression_waste_table.tex', caption="Solid Waste Generation - Regression Results")

In [38]:
results_air

Unnamed: 0,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
Intercept,-0.868631,1.798927,-0.48286,0.629812,-4.419593,2.682332
Env_Score,1.260856,0.516187,2.442634,0.015598,0.241937,2.279775
JIT_Score,0.835716,0.524075,1.59465,0.112637,-0.198773,1.870205
JIT_Env_Interaction,-0.219634,0.146609,-1.498089,0.135954,-0.509031,0.069763
