# Confirmatory Factor Analysis
CFA for testing JIT and environmental practice bundles

In [47]:
import json
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from factor_analyzer import FactorAnalyzer
from factor_analyzer.factor_analyzer import calculate_bartlett_sphericity, calculate_kmo
from factor_analyzer import ConfirmatoryFactorAnalyzer, ModelSpecificationParser
from scipy.stats import norm

current_dir = os.getcwd()
project_root = os.path.dirname(os.path.dirname(current_dir))
PROCESSED_DATA_PATH = os.path.join(project_root, 'data', 'processed')
RAW_DATA_PATH = os.path.join(project_root, 'data', 'raw')
OUTPUT_PATH = os.path.join(project_root, 'output')

In [42]:
data = pd.read_excel(RAW_DATA_PATH + '/HPM data_environmental performance.xlsx')

In [12]:
environmental_practices = pd.DataFrame()
environmental_performance = pd.DataFrame()
jit_practices = pd.DataFrame()

for column in data.columns:
    if column.startswith('ENVRTX') or column.startswith('EPRACX'):
        environmental_practices[column] = data[column]

    if column.startswith('EPERFX'):
        environmental_performance[column] = data[column]

    if column.startswith('LAYOUT') or column.startswith('JITDEL') or column.startswith('KANBAN'):
        jit_practices[column] = data[column]

bundles = [jit_practices, environmental_practices, environmental_performance]

In [13]:
for bundle in bundles:
    print(bundle.shape)

(330, 10)
(330, 41)
(330, 9)


In [14]:
# drop rows with NA values
for bundle in bundles:
    bundle.dropna(inplace=True)

for bundle in bundles:
    print(bundle.shape)

(267, 10)
(243, 41)
(277, 9)


In [35]:
# Model specification
model_dict = {
    "Environmental Practices": [],
    "JIT Practices": []
}

In [36]:
for key in data.keys():
    if key.startswith('ENVRTX') or key.startswith('EPRACX'):
        if key not in model_dict['Environmental Practices']:
            model_dict['Environmental Practices'].append(key)

    if key.startswith('LAYOUT') or key.startswith('JITDEL') or key.startswith('KANBAN'):
        if key not in model_dict['JIT Practices']:
            model_dict["JIT Practices"].append(key)

model_dict["Environmental Practices"].sort()
model_dict["JIT Practices"].sort()

In [37]:
model_dict

{'Environmental Practices': ['ENVRTX01',
  'ENVRTX02',
  'ENVRTX03',
  'ENVRTX04',
  'ENVRTX05',
  'ENVRTX06',
  'ENVRTX07',
  'ENVRTX08',
  'ENVRTX09',
  'ENVRTX10',
  'ENVRTX11',
  'ENVRTX12',
  'ENVRTX13',
  'ENVRTX14',
  'ENVRTX15',
  'ENVRTX17',
  'ENVRTX18',
  'ENVRTX20',
  'ENVRTX21',
  'ENVRTX22',
  'ENVRTX23',
  'ENVRTX24',
  'ENVRTX29',
  'ENVRTX30',
  'ENVRTX31',
  'ENVRTX32',
  'ENVRTX33',
  'ENVRTX34',
  'ENVRTX35',
  'ENVRTX36',
  'ENVRTX37',
  'ENVRTX38',
  'ENVRTX39',
  'ENVRTX40',
  'ENVRTX41',
  'EPRACX01',
  'EPRACX02',
  'EPRACX03',
  'EPRACX04',
  'EPRACX05',
  'EPRACX06'],
 'JIT Practices': ['JITDELN01',
  'JITDELN02',
  'JITDELN03',
  'KANBANN01',
  'KANBANN02',
  'KANBANN03',
  'LAYOUTN01',
  'LAYOUTN02',
  'LAYOUTN03',
  'LAYOUTN04']}

In [44]:
# Extract all column names from the model_dict
desired_columns = [col for sublist in model_dict.values() for col in sublist]
data_filtered = data[desired_columns]

In [45]:
data_filtered

Unnamed: 0,ENVRTX01,ENVRTX02,ENVRTX03,ENVRTX04,ENVRTX05,ENVRTX06,ENVRTX07,ENVRTX08,ENVRTX09,ENVRTX10,...,JITDELN01,JITDELN02,JITDELN03,KANBANN01,KANBANN02,KANBANN03,LAYOUTN01,LAYOUTN02,LAYOUTN03,LAYOUTN04
0,3.0,4.0,4.0,4.0,4.0,5.0,3.0,3.0,,2.0,...,2.0,5.0,2.0,,5.0,5.0,,,,
1,,,,,,,,,,,...,2.0,,5.0,,,,5.0,5.0,5.0,3.0
2,3.0,3.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,3.0,...,,,,,,,4.0,4.0,5.0,2.0
3,1.0,2.0,4.0,3.0,3.0,3.0,1.0,3.0,3.0,1.0,...,1.0,1.0,2.0,1.0,3.0,3.0,3.0,3.0,2.0,3.0
4,1.0,3.0,2.0,4.0,2.0,2.0,5.0,2.0,1.0,1.0,...,1.5,1.5,1.5,,,,2.0,4.0,4.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
325,4.0,4.0,3.0,4.0,4.0,5.0,4.0,5.0,4.0,3.0,...,1.0,5.0,1.0,1.0,1.0,1.0,3.0,4.0,4.0,3.0
326,1.0,3.0,4.0,3.0,3.0,1.0,1.0,1.0,1.0,1.0,...,3.0,3.0,1.0,1.0,1.0,1.0,4.0,4.0,4.0,4.0
327,5.0,3.0,4.0,3.0,5.0,5.0,2.0,4.0,2.0,3.0,...,4.0,2.0,5.0,3.0,3.0,2.0,5.0,4.0,4.0,4.0
328,2.0,3.0,3.0,3.0,3.0,4.0,2.0,2.0,4.0,2.0,...,3.0,2.0,3.0,2.0,1.0,1.0,4.0,4.0,3.0,3.0


In [50]:
# Adjusted model specification
model_spec = ModelSpecificationParser.parse_model_specification_from_dict(data_filtered, model_dict)

# CFA model
cfa = ConfirmatoryFactorAnalyzer(model_spec)
cfa.fit(data_filtered)

# Extract the factor loadings
loadings = cfa.loadings_

# Get the standard errors for loadings and intercepts
se_all = cfa.get_standard_errors()

# Extract standard errors for loadings
se_loadings = se_all[0]

# Compute t-values
t_values = loadings / se_loadings

RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =          156     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  2.18389D+04    |proj g|=  4.76929D+02

At iterate    1    f=  2.10556D+04    |proj g|=  9.59713D+01

At iterate    2    f=  2.08471D+04    |proj g|=  6.07120D+01


 This problem is unconstrained.



At iterate    3    f=  2.07124D+04    |proj g|=  8.05521D+01

At iterate    4    f=  2.06846D+04    |proj g|=  7.24071D+01

At iterate    5    f=  2.06771D+04    |proj g|=  5.11125D+01

At iterate    6    f=  2.06684D+04    |proj g|=  2.41147D+01

At iterate    7    f=  2.06626D+04    |proj g|=  4.77670D+01

At iterate    8    f=  2.06562D+04    |proj g|=  3.60022D+01

At iterate    9    f=  2.06434D+04    |proj g|=  2.50209D+01

At iterate   10    f=  2.06383D+04    |proj g|=  1.16936D+02

At iterate   11    f=  2.06276D+04    |proj g|=  2.77611D+01

At iterate   12    f=  2.06240D+04    |proj g|=  1.04999D+01

At iterate   13    f=  2.06190D+04    |proj g|=  3.01447D+01

At iterate   14    f=  2.06140D+04    |proj g|=  2.68985D+01

At iterate   15    f=  2.06104D+04    |proj g|=  1.96313D+01

At iterate   16    f=  2.06083D+04    |proj g|=  9.93678D+00

At iterate   17    f=  2.06075D+04    |proj g|=  8.06249D+00

At iterate   18    f=  2.06069D+04    |proj g|=  7.70415D+00

At iter

  t_values = loadings / se_loadings


In [51]:
data = {
    'Bundle': [],
    'Item description': [],
    'Loading': [],
    'SE': [],
    't-value': []
}

# Loop through each factor and then each item within that factor
for factor, items in model_dict.items():
    for item in items:
        item_idx = data_filtered.columns.get_loc(item)
        factor_idx = list(model_dict.keys()).index(factor)

        loading_value = loadings[item_idx][factor_idx]
        se_value = se_loadings[item_idx][factor_idx]
        t_value = t_values[item_idx][factor_idx]

        data['Bundle'].append(factor)
        data['Item description'].append(item)  # Using column names as descriptions for now
        data['Loading'].append(loading_value)
        data['SE'].append(se_value)
        data['t-value'].append(t_value)

# Convert the data to a DataFrame
results_df = pd.DataFrame(data)
print(results_df)


                     Bundle Item description   Loading        SE    t-value
0   Environmental Practices         ENVRTX01  0.539651  0.046918  11.501956
1   Environmental Practices         ENVRTX02  0.505532  0.044707  11.307670
2   Environmental Practices         ENVRTX03  0.459415  0.036194  12.693243
3   Environmental Practices         ENVRTX04  0.433565  0.036505  11.877002
4   Environmental Practices         ENVRTX05  0.470751  0.037420  12.580155
5   Environmental Practices         ENVRTX06  0.494043  0.046425  10.641711
6   Environmental Practices         ENVRTX07  0.662335  0.061310  10.802965
7   Environmental Practices         ENVRTX08  0.510175  0.039061  13.061136
8   Environmental Practices         ENVRTX09  0.700705  0.054593  12.835019
9   Environmental Practices         ENVRTX10  0.655690  0.050608  12.956173
10  Environmental Practices         ENVRTX11  0.662983  0.053515  12.388677
11  Environmental Practices         ENVRTX12  0.544365  0.062558   8.701817
12  Environm

In [53]:
# Compute p-values from t-values
p_values = [2 * (1 - norm.cdf(abs(t))) for t in data['t-value']]  # Two-tailed test

# Add p-values to the results dataframe
results_df['p-value'] = p_values

results_df

Unnamed: 0,Bundle,Item description,Loading,SE,t-value,p-value
0,Environmental Practices,ENVRTX01,0.539651,0.046918,11.501956,0.0
1,Environmental Practices,ENVRTX02,0.505532,0.044707,11.30767,0.0
2,Environmental Practices,ENVRTX03,0.459415,0.036194,12.693243,0.0
3,Environmental Practices,ENVRTX04,0.433565,0.036505,11.877002,0.0
4,Environmental Practices,ENVRTX05,0.470751,0.03742,12.580155,0.0
5,Environmental Practices,ENVRTX06,0.494043,0.046425,10.641711,0.0
6,Environmental Practices,ENVRTX07,0.662335,0.06131,10.802965,0.0
7,Environmental Practices,ENVRTX08,0.510175,0.039061,13.061136,0.0
8,Environmental Practices,ENVRTX09,0.700705,0.054593,12.835019,0.0
9,Environmental Practices,ENVRTX10,0.65569,0.050608,12.956173,0.0


In [54]:
significant_results = results_df[results_df['p-value'] < 0.01]
significant_results

Unnamed: 0,Bundle,Item description,Loading,SE,t-value,p-value
0,Environmental Practices,ENVRTX01,0.539651,0.046918,11.501956,0.0
1,Environmental Practices,ENVRTX02,0.505532,0.044707,11.30767,0.0
2,Environmental Practices,ENVRTX03,0.459415,0.036194,12.693243,0.0
3,Environmental Practices,ENVRTX04,0.433565,0.036505,11.877002,0.0
4,Environmental Practices,ENVRTX05,0.470751,0.03742,12.580155,0.0
5,Environmental Practices,ENVRTX06,0.494043,0.046425,10.641711,0.0
6,Environmental Practices,ENVRTX07,0.662335,0.06131,10.802965,0.0
7,Environmental Practices,ENVRTX08,0.510175,0.039061,13.061136,0.0
8,Environmental Practices,ENVRTX09,0.700705,0.054593,12.835019,0.0
9,Environmental Practices,ENVRTX10,0.65569,0.050608,12.956173,0.0


In [None]:
# To save the results to a CSV file
significant_results.to_csv(OUTPUT_PATH + '/cfa_ep_git_sig.csv', index=False)