In [1]:
import json
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from factor_analyzer import FactorAnalyzer
from factor_analyzer.factor_analyzer import calculate_bartlett_sphericity, calculate_kmo
from factor_analyzer import ConfirmatoryFactorAnalyzer, ModelSpecificationParser
from scipy.stats import norm

current_dir = os.getcwd()
project_root = os.path.dirname(os.path.dirname(current_dir))
PROCESSED_DATA_PATH = os.path.join(project_root, 'data', 'processed')
RAW_DATA_PATH = os.path.join(project_root, 'data', 'raw')
OUTPUT_PATH = os.path.join(project_root, 'output')

In [2]:
data = pd.read_excel(RAW_DATA_PATH + '/HPM data_environmental performance.xlsx')

In [3]:
# drop na values
data = data.dropna()

In [4]:
# Model specification
model_dict = {
    "Environmental Practices": [],
    "JIT Practices": [],
    "Environmental Performance": []
}

In [5]:
environmental_practices = pd.DataFrame()
environmental_performance = pd.DataFrame()
jit_practices = pd.DataFrame()

for column in data.columns:
    if column.startswith('ENVRTX') or column.startswith('EPRACX'):
        environmental_practices[column] = data[column]

    if column.startswith('EPERFX'):
        environmental_performance[column] = data[column]

    if column.startswith('LAYOUT') or column.startswith('JITDEL') or column.startswith('KANBAN'):
        jit_practices[column] = data[column]

bundles = [jit_practices, environmental_practices, environmental_performance]

In [6]:
# drop rows with NA values
for bundle in bundles:
    bundle.dropna(inplace=True)

for bundle in bundles:
    print(bundle.shape)

(175, 10)
(175, 41)
(175, 9)


In [7]:
eprx_median = np.median(environmental_practices)
eprf_median = np.median(environmental_performance)
jit_median = np.median(jit_practices)
print("Environmental Practices median: ", eprx_median)
print("Environmental Performance median: ", eprf_median)
print("JIT Practices median: ", jit_median)

Environmental Practices median:  3.5
Environmental Performance median:  3.5
JIT Practices median:  3.5


In [8]:
jit_overall_median = jit_practices.stack().median()
jit_row_medians = jit_practices.median(axis=1)
environmental_practices_overall_median = environmental_practices.stack().median()
environmental_practices_row_medians = environmental_practices.median(axis=1)

In [9]:
jit_practices['JIT'] = np.where(jit_row_medians >= jit_overall_median, "High", "Low")
environmental_practices['Environmental Practices'] = np.where(environmental_practices_row_medians >= environmental_practices_overall_median, "High", "Low")

In [10]:
data

Unnamed: 0,COUNTRY,COMPANY CODE,INDUSTRY,ENVRTX21,ENVRTX37,ENVRTX02,ENVRTX22,ENVRTX39,ENVRTX23,ENVRTX18,...,OUTCMX08,OUTCMX09,OUTCMX10,OUTCMX11,OUTCMX12,OUTCMX13,OUTCMX14,OUTCMX15,OUTCMX16,OUTCMX17
3,BRA,1704,3,2.000000,4.0,2.000000,2.000000,3.0,4.000000,4.000000,...,3.0,3.000000,3.0,3.0,2.000000,2.000000,3.000000,3.000000,3.000000,3.000000
12,BRA,1713,1,3.000000,2.0,3.000000,1.000000,2.0,4.000000,2.000000,...,3.0,4.000000,3.0,3.0,4.000000,1.000000,5.000000,4.000000,4.000000,4.000000
16,BRA,1717,2,3.000000,4.0,5.000000,2.000000,4.0,5.000000,4.000000,...,4.0,4.000000,5.0,3.0,5.000000,5.000000,5.000000,5.000000,5.000000,5.000000
18,BRA,1719,3,4.000000,2.0,4.000000,3.000000,3.0,4.000000,3.000000,...,5.0,3.000000,4.0,3.0,4.000000,5.000000,5.000000,5.000000,4.000000,4.000000
24,GER,401,2,4.666667,2.0,4.333333,4.333333,3.0,4.666667,3.333333,...,3.0,3.333333,3.0,3.0,3.666667,3.666667,3.666667,3.666667,4.333333,4.333333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
312,USA,109,2,3.500000,2.5,3.500000,2.500000,3.5,4.000000,4.000000,...,4.0,4.000000,4.5,3.0,4.000000,3.500000,4.500000,5.000000,4.000000,4.500000
313,USA,110,2,2.000000,1.5,3.000000,2.500000,2.5,3.500000,3.500000,...,4.0,4.000000,4.0,2.5,4.000000,4.500000,4.000000,4.000000,4.500000,4.000000
314,USA,111,1,2.000000,1.0,3.000000,2.000000,1.0,4.000000,2.000000,...,3.0,3.000000,3.0,3.0,3.000000,3.000000,3.000000,4.000000,3.000000,4.000000
327,SWI,1809,3,3.000000,2.0,3.000000,4.000000,3.0,5.000000,3.000000,...,4.0,4.000000,4.0,3.0,4.000000,4.000000,4.000000,5.000000,5.000000,5.000000


In [11]:
data['JIT'] = jit_practices['JIT']
data['Environmental'] = environmental_practices['Environmental Practices']

In [12]:
data

Unnamed: 0,COUNTRY,COMPANY CODE,INDUSTRY,ENVRTX21,ENVRTX37,ENVRTX02,ENVRTX22,ENVRTX39,ENVRTX23,ENVRTX18,...,OUTCMX10,OUTCMX11,OUTCMX12,OUTCMX13,OUTCMX14,OUTCMX15,OUTCMX16,OUTCMX17,JIT,Environmental
3,BRA,1704,3,2.000000,4.0,2.000000,2.000000,3.0,4.000000,4.000000,...,3.0,3.0,2.000000,2.000000,3.000000,3.000000,3.000000,3.000000,Low,Low
12,BRA,1713,1,3.000000,2.0,3.000000,1.000000,2.0,4.000000,2.000000,...,3.0,3.0,4.000000,1.000000,5.000000,4.000000,4.000000,4.000000,High,Low
16,BRA,1717,2,3.000000,4.0,5.000000,2.000000,4.0,5.000000,4.000000,...,5.0,3.0,5.000000,5.000000,5.000000,5.000000,5.000000,5.000000,Low,High
18,BRA,1719,3,4.000000,2.0,4.000000,3.000000,3.0,4.000000,3.000000,...,4.0,3.0,4.000000,5.000000,5.000000,5.000000,4.000000,4.000000,Low,Low
24,GER,401,2,4.666667,2.0,4.333333,4.333333,3.0,4.666667,3.333333,...,3.0,3.0,3.666667,3.666667,3.666667,3.666667,4.333333,4.333333,Low,Low
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
312,USA,109,2,3.500000,2.5,3.500000,2.500000,3.5,4.000000,4.000000,...,4.5,3.0,4.000000,3.500000,4.500000,5.000000,4.000000,4.500000,Low,High
313,USA,110,2,2.000000,1.5,3.000000,2.500000,2.5,3.500000,3.500000,...,4.0,2.5,4.000000,4.500000,4.000000,4.000000,4.500000,4.000000,High,Low
314,USA,111,1,2.000000,1.0,3.000000,2.000000,1.0,4.000000,2.000000,...,3.0,3.0,3.000000,3.000000,3.000000,4.000000,3.000000,4.000000,High,Low
327,SWI,1809,3,3.000000,2.0,3.000000,4.000000,3.0,5.000000,3.000000,...,4.0,3.0,4.000000,4.000000,4.000000,5.000000,5.000000,5.000000,High,Low


In [13]:
# Define the categories
low_category = 'Low JIT & Environmental'
mainly_jit_category = 'Mainly JIT'
mainly_environmental_category = 'Mainly Environmental'
high_category = 'High JIT & Environmental'

# Function to categorize each row based on 'JIT' and 'Environmental' values
def categorize(row):
    jit_value = row['JIT']
    environmental_value = row['Environmental']
    
    if jit_value == 'Low' and environmental_value == 'Low':
        return low_category
    elif jit_value == 'High' and environmental_value == 'Low':
        return mainly_jit_category
    elif jit_value == 'Low' and environmental_value == 'High':
        return mainly_environmental_category
    elif jit_value == 'High' and environmental_value == 'High':
        return high_category
    else:
        return 'Uncategorized'  # Handle other cases if necessary

# Apply the categorization function to create a new 'Category' column
data['Category'] = data.apply(categorize, axis=1)

In [14]:
data

Unnamed: 0,COUNTRY,COMPANY CODE,INDUSTRY,ENVRTX21,ENVRTX37,ENVRTX02,ENVRTX22,ENVRTX39,ENVRTX23,ENVRTX18,...,OUTCMX11,OUTCMX12,OUTCMX13,OUTCMX14,OUTCMX15,OUTCMX16,OUTCMX17,JIT,Environmental,Category
3,BRA,1704,3,2.000000,4.0,2.000000,2.000000,3.0,4.000000,4.000000,...,3.0,2.000000,2.000000,3.000000,3.000000,3.000000,3.000000,Low,Low,Low JIT & Environmental
12,BRA,1713,1,3.000000,2.0,3.000000,1.000000,2.0,4.000000,2.000000,...,3.0,4.000000,1.000000,5.000000,4.000000,4.000000,4.000000,High,Low,Mainly JIT
16,BRA,1717,2,3.000000,4.0,5.000000,2.000000,4.0,5.000000,4.000000,...,3.0,5.000000,5.000000,5.000000,5.000000,5.000000,5.000000,Low,High,Mainly Environmental
18,BRA,1719,3,4.000000,2.0,4.000000,3.000000,3.0,4.000000,3.000000,...,3.0,4.000000,5.000000,5.000000,5.000000,4.000000,4.000000,Low,Low,Low JIT & Environmental
24,GER,401,2,4.666667,2.0,4.333333,4.333333,3.0,4.666667,3.333333,...,3.0,3.666667,3.666667,3.666667,3.666667,4.333333,4.333333,Low,Low,Low JIT & Environmental
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
312,USA,109,2,3.500000,2.5,3.500000,2.500000,3.5,4.000000,4.000000,...,3.0,4.000000,3.500000,4.500000,5.000000,4.000000,4.500000,Low,High,Mainly Environmental
313,USA,110,2,2.000000,1.5,3.000000,2.500000,2.5,3.500000,3.500000,...,2.5,4.000000,4.500000,4.000000,4.000000,4.500000,4.000000,High,Low,Mainly JIT
314,USA,111,1,2.000000,1.0,3.000000,2.000000,1.0,4.000000,2.000000,...,3.0,3.000000,3.000000,3.000000,4.000000,3.000000,4.000000,High,Low,Mainly JIT
327,SWI,1809,3,3.000000,2.0,3.000000,4.000000,3.0,5.000000,3.000000,...,3.0,4.000000,4.000000,4.000000,5.000000,5.000000,5.000000,High,Low,Mainly JIT


In [15]:
# To save the results to a CSV file
data.to_csv(PROCESSED_DATA_PATH + '/dummy.csv', index=False)

In [16]:
category_counts = data['Category'].value_counts()


In [17]:
category_counts

Category
High JIT & Environmental    57
Low JIT & Environmental     40
Mainly JIT                  40
Mainly Environmental        38
Name: count, dtype: int64