<a href="https://colab.research.google.com/github/henryonomakpo/The-Impact-of-ESG-Ratings-on-EV-Manufacturing-Industry/blob/main/Data_Envelopment_Analysis_Airlines.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Data Envelopment Analysis for Airlines and
Structural Equation Model for companies leveraging the Metaverse Technology.

Data Envelopment Analysis for Airlines

In [None]:
pip install pulp

Collecting pulp
  Downloading PuLP-2.9.0-py3-none-any.whl.metadata (5.4 kB)
Downloading PuLP-2.9.0-py3-none-any.whl (17.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.7/17.7 MB[0m [31m79.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pulp
Successfully installed pulp-2.9.0


In [None]:
from pulp import LpProblem, LpMinimize, LpVariable, lpSum, value
import csv
import pandas as pd


airlines_dea = pd.read_csv('/root')
airlines_dea.head()
# Set building
K = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M"]
I = ["Aircraft", "Fuel", "Employee"]
J = ["Passenger", "Freight"]

# Parameters building
X = {
    i: {
        k: 0 for k in K
    } for i in I
}
Y = {
    j: {
        k: 0 for k in K
    } for j in J
}

# Import CSV data
with open('airlines_data.csv', newline='') as csvfile:
    rows = csv.DictReader(csvfile)
    k = 0
    for row in rows:
        for i in I:
            X[i][K[k]] = float(row[i])
        for j in J:
            Y[j][K[k]] = float(row[j])
        k += 1

# CRS_DEA_Model
def getOverallEfficiency(r):

    # Model Building
    model = LpProblem('CRS_model', LpMinimize) # 建立一個新的model，命名為model

    # Decision variables Building
    theta_r = LpVariable(f'theta_r')
    lambda_k = LpVariable.dicts(f'lambda_k', lowBound=0, indexs=K)

    # Objective Function setting
    model += theta_r

    # Constraints setting
    for i in I:
        model += lpSum([
                lambda_k[k] * X[i][k]
            for k in K]) <= theta_r * float(X[i][K[r]])
    for j in J:
        model += lpSum([
                lambda_k[k] * Y[j][k]
            for k in K]) >= float(Y[j][K[r]])

    # Model solving
    model.solve()

    return f'{K[r]}：{round(value(model.objective), 3)}\n', value(model.objective)

# VRS_DEA_Model
def getTechnicalEfficiency(r):

    # Model Building
    model = LpProblem('VRS_model', LpMinimize) # model

    # Decision variables Building
    theta_r = LpVariable(f'theta_r')
    lambda_k = LpVariable.dicts(f'lambda_k', lowBound=0, indexs = K)

    # Objective Function setting
    model += theta_r

    # Constraints setting
    for i in I:
        model += lpSum([
                lambda_k[k] * X[i][k]
            for k in K]) <= theta_r * float(X[i][K[r]])
    for j in J:
        model += lpSum([
                lambda_k[k] * Y[j][k]
            for k in K]) >= float(Y[j][K[r]])
    model += lpSum([ lambda_k[k] for k in K]) == 1

    # model solving
    model.solve()

    return f'{K[r]}：{round(value(model.objective), 3)}\n', value(model.objective)


In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Key Steps:
##### Stock Data Download: The yfinance package is used to download historical stock data for the specified companies from 2014-09-01 to 2023-09-01 at a monthly interval.
##### Fama-French 5-Factor Data: The script downloads the Fama-French 5-factor data and merges it with the stock data.
##### Missing Value Imputation: Missing values are handled using median imputation for numerical columns.
##### Multicollinearity: Highly correlated variables are identified and removed based on a correlation threshold (0.9).
##### Excess Return Calculation: The risk-free rate is subtracted from stock returns to calculate the excess return for each stock.
##### Save Data: The final dataset is saved as a CSV file.
##### CAPM, APT, and Fama-French Analysis: Linear regression is performed for each stock's excess return against the Fama-French factors.
##### SEM & CFA: The script includes a model for CFA/SEM analysis and generates a path diagram.

In [None]:
!pip install yfinance



In [None]:
!pip install factor_analyzer

Collecting factor_analyzer
  Downloading factor_analyzer-0.5.1.tar.gz (42 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/42.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.8/42.8 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: factor_analyzer
  Building wheel for factor_analyzer (pyproject.toml) ... [?25l[?25hdone
  Created wheel for factor_analyzer: filename=factor_analyzer-0.5.1-py2.py3-none-any.whl size=42564 sha256=48aec8905df4f63d7dcaf70aed62099cc720ff6ee9371909d55bf34c1304b9c4
  Stored in directory: /root/.cache/pip/wheels/24/59/82/6493618e30ed1cb7a013b9e1b0c9e17de80b04dfcef4ba8a4d
Successfully built factor_analyzer
Installing collected packages: factor_analyzer
Successfully instal

In [None]:
pip install semopy

Collecting semopy
  Downloading semopy-2.3.11.tar.gz (1.6 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.6 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.7/1.6 MB[0m [31m20.9 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m1.6/1.6 MB[0m [31m31.3 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m19.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting numdifftools (from semopy)
  Downloading numdifftools-0.9.41-py2.py3-none-any.whl.metadata (39 kB)
Downloading numdifftools-0.9.41-py2.py3-none-any.whl (100 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m100.2/100.2 kB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: semopy
  Building wheel for semopy (setup.py

In [None]:
import pandas as pd
import numpy as np
import yfinance as yf
import requests
import io
import statsmodels.api as sm
from sklearn.decomposition import PCA
from factor_analyzer import FactorAnalyzer
from semopy import Model, ModelMeans, semplot

# Define the list of companies
companies = ["LOW", "ANSS", "PTC", "GE", "IBM", "SIE.DE", "BMW.DE", "ADS.DE", "ADSK",
             "NKE", "NVDA", "BABA", "ADBE", "AAPL", "AVGO", "PYPL", "MSFT", "META", "AMZN"]

# Download monthly historical stock prices
start_date = "2014-09-01"
end_date = "2023-09-01"

stock_data = yf.download(companies, start=start_date, end=end_date, interval="1mo")
stock_prices = stock_data['Adj Close']

# Download Fama-French 5 Factor data
#ff5_url = "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_5_Factors_2x3_CSV.zip"
#response = requests.get(ff5_url)
#ff5_data = pd.read_csv(io.BytesIO(response.content), skiprows=3, index_col=0)
#ff5_data.index = pd.to_datetime(ff5_data.index, format='%Y%m')
#ff5_data = ff5_data.loc[start_date:end_date]


import pandas as pd

try:
    ff_factors = pd.read_csv("F-F_Research_Data_5_Factors_2x3_CSV.zip",
                             skiprows=3,
                             encoding='latin1',
                             index_col=0)
except Exception as e:
    print(f"Error reading file with latin1 encoding: {e}")

    try:
        ff_factors = pd.read_csv("F-F_Research_Data_5_Factors_2x3_CSV.zip",
                                 skiprows=3,
                                 encoding='iso-8859-1',
                                 index_col=0)
    except Exception as e:
        print(f"Error reading file with iso-8859-1 encoding: {e}")

# Merge datasets
merged_data = pd.merge(stock_prices, ff5_data, left_index=True, right_index=True, how='outer')

# Address NAs by median imputation
merged_data = merged_data.fillna(merged_data.median())

# Calculate monthly returns
returns = merged_data.pct_change()

# Address multicollinearity using PCA
pca = PCA(n_components=0.95)
pca.fit(returns.drop(['RF', 'Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA'], axis=1))

# Calculate excess returns
risk_free_rate = returns['RF']
excess_returns = returns.drop(['RF', 'Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA'], axis=1).sub(risk_free_rate, axis=0)

# Add '_ER' suffix to excess return columns
excess_returns.columns = [col + '_ER' for col in excess_returns.columns]

# Combine excess returns with Fama-French factors
final_data = pd.concat([excess_returns, returns[['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA']]], axis=1)

# Save as CSV
final_data.to_csv("/Users/henryefeonomakpo/Downloads/1-Indra-H-Thesis idea/1-R prog code- stock /Quant-Finance-with-R-master/Tutorial Video Scripts/tidyverse/Metav_ER_ff5_esg.csv")

# Perform CAPM, APT, and Fama-French for each stock
models = {}
for stock in excess_returns.columns:
    # CAPM
    X = sm.add_constant(final_data['Mkt-RF'])
    y = final_data[stock]
    models[stock] = {'CAPM': sm.OLS(y, X).fit()}

    # APT (using Fama-French factors as proxies for economic factors)
    X = sm.add_constant(final_data[['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA']])
    models[stock]['APT'] = sm.OLS(y, X).fit()

    # Fama-French 5-Factor
    models[stock]['FF5'] = sm.OLS(y, X).fit()

# Perform CFA
fa = FactorAnalyzer(n_factors=5, rotation=None)
fa.fit(final_data[['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA']])

# Perform SEM
sem_model = """
# Measurement model
Market =~ Mkt-RF
Size =~ SMB
Value =~ HML
Profitability =~ RMW
Investment =~ CMA

# Structural model
AAPL_ER ~ Market + Size + Value + Profitability + Investment
"""

sem = Model(sem_model)
sem.fit(final_data)

# Draw path diagram
semplot(sem, "sem_path_diagram.png")

print("Analysis complete. Results saved in Metav_ER_ff5_esg.csv and sem_path_diagram.png")

[*********************100%***********************]  19 of 19 completed


Error reading file with latin1 encoding: [Errno 2] No such file or directory: 'F-F_Research_Data_5_Factors_2x3_CSV.zip'
Error reading file with iso-8859-1 encoding: [Errno 2] No such file or directory: 'F-F_Research_Data_5_Factors_2x3_CSV.zip'


TypeError: Cannot convert [[nan nan nan ... '2021   ' '2022  ' '2023   ']
 [nan nan nan ... '16.77   ' '-18.61   ' '13.37   ']
 [nan nan nan ... '-1.18    ' '-7.48   ' '-8.19    ']
 ...
 [nan nan nan ... '9.41   ' '-3.41   ' '0.29    ']
 [nan nan nan ... '-5.10    ' '11.80    ' '5.71    ']
 [nan nan nan ... '0.04' '1.42' '4.95']] to numeric

# Version 2

# New Section

In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns

# Define stock tickers
companies = ["LOW", "ANSS", "PTC", "GE", "IBM", "SIE.DE", "BMW.DE", "ADS.DE",
             "ADSK", "NKE", "NVDA", "BABA", "ADBE", "AAPL", "AVGO", "PYPL",
             "MSFT", "META", "AMZN"]

start_date = "2014-09-01"
end_date = "2023-09-01"

# Download stock data with retry logic for failed downloads
def download_data(tickers, start, end):
    stock_data = yf.download(tickers, start=start, end=end, interval='1mo')
    for ticker in tickers:
        if ticker not in stock_data.columns.levels[1]:
            print(f"Retrying for {ticker}...")
            data_retry = yf.download(ticker, start=start, end=end, interval='1mo')
            stock_data[ticker] = data_retry['Adj Close']
    return stock_data['Adj Close']

# Retry downloading failed stock data
stock_data = download_data(companies, start_date, end_date)

# Download Fama-French 5-factor data
ff5_url = 'https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/Europe_5_Factors_CSV.zip'
ff5_data = pd.read_csv(ff5_url, skiprows=3)

# Cleaning Fama-French 5-factor data
ff5_data = ff5_data.dropna(subset=['Date'])  # Drop rows where 'Date' is missing or improperly formatted
ff5_data['Date'] = ff5_data['Date'].str.strip()  # Remove extra spaces
ff5_data['Date'] = pd.to_datetime(ff5_data['Date'], format='%Y%m', errors='coerce')  # Convert to datetime
ff5_data.dropna(subset=['Date'], inplace=True)  # Drop rows where conversion failed

# Clean and format Fama-French data
ff5_data.columns = ['Date', 'Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA', 'RF']
ff5_data['Date'] = pd.to_datetime(ff5_data['Date'], format='%Y-%m')

# Merge stock data and Fama-French data
data = pd.merge(stock_data, ff5_data, left_index=True, right_on='Date')

# Handle missing values (imputation with median for numeric columns)
imputer = SimpleImputer(strategy='median')
data.iloc[:, 1:] = imputer.fit_transform(data.iloc[:, 1:])

# Check and address multicollinearity
corr_matrix = data.corr()
high_corr = corr_matrix.index[corr_matrix.abs().any() > 0.9]
data.drop(columns=high_corr, inplace=True)

# Calculate Excess Return for each stock and add "_ER" column
rf = data['RF']
for stock in companies:
    data[stock + '_ER'] = data[stock] - rf

# Save dataset to CSV
output_file = '/Users/henryefeonomakpo/Downloads/1-Indra-H-Thesis idea/1-R prog code- stock /Quant-Finance-with-R-master/Tutorial Video Scripts/tidyverse/Metav_ER_ff5_esg.csv'
data.to_csv(output_file, index=False)

# Function to perform CAPM, Fama-French, APT
def perform_regression(stock_er, factors):
    X = sm.add_constant(factors)
    model = sm.OLS(stock_er, X).fit()
    print(model.summary())
    return model

# CAPM, APT, and Fama-French models
factors = data[['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA']]
for stock in companies:
    stock_er = data[stock + '_ER']
    print(f"Results for {stock}:")
    perform_regression(stock_er, factors)

# Structural Equation Modeling (SEM) - using 'lavaan' equivalent in Python
from semopy import Model

# CFA & SEM model structure
sem_model = """
    ESG =~ Env_Risk + Soc_Risk + Gov_Risk
    ER ~ ESG
"""

# Prepare data for SEM (Assume ESG columns exist)
esg_columns = ['Env_Risk', 'Soc_Risk', 'Gov_Risk']
sem_data = data[esg_columns + ['ER']].dropna()

# Build and fit SEM model
model = Model(sem_model)
model.fit(sem_data)

# Path Diagram
from semopy import path_diagram
path_diagram(model, 'sem_path_diagram.png')

# Show the fitted SEM model summary
print(model.inspect())


[*********************100%***********************]  19 of 19 completed


KeyError: ['Date']

In [None]:
# Structural Equation Modeling (SEM) - using 'lavaan' equivalent in Python
from semopy import Model

# CFA & SEM model structure
sem_model = """
    ESG =~ Env_Risk + Soc_Risk + Gov_Risk
    ER ~ ESG
"""

# Prepare data for SEM (Assume ESG columns exist)
esg_columns = ['Env_Risk', 'Soc_Risk', 'Gov_Risk']
sem_data = data[esg_columns + ['ER']].dropna()

# Build and fit SEM model
model = Model(sem_model)
model.fit(sem_data)

# Path Diagram
from semopy import path_diagram
path_diagram(model, 'sem_path_diagram.png')

# Show the fitted SEM model summary
print(model.inspect())
