In [1]:
import yfinance as yf
import pandas as pd
import numpy as np
import statsmodels.api as sm
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.model_selection import train_test_split
from scipy import stats
import matplotlib.pyplot as plt

In [2]:
energy_symbols = ['OXY', 'XOM', 'BP', 'SNPMF', 'COP', 'E', 'TTE', 'PCCYF', 'SHEL']

data_list = []

for symbol in energy_symbols:
    ticker = yf.Ticker(symbol)
    info = ticker.get_info()
    balance = ticker.get_balance_sheet()

    current_ratio = info.get('currentRatio', 'N/A')
    roa = info.get('returnOnAssets', 'N/A')
    roe = info.get('returnOnEquity', 'N/A')
    long_term_debt = balance.at['LongTermDebt', '2022-12-31']
    total_assets = balance.at['TotalAssets', '2022-12-31']
    
    long_assets = long_term_debt / total_assets

    data_list.append({
        'Symbol': symbol,
        'Current Ratio': current_ratio,
        'ROA': roa,
        'ROE': roe,
        'Long Term Debt': long_term_debt,
        'Total Assets': total_assets,
        'Long Term Debt / Total Assets': long_assets
    })

df1 = pd.DataFrame(data_list)

HTTPError: 404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v6/finance/quoteSummary/OXY?modules=financialData&modules=quoteType&modules=defaultKeyStatistics&modules=assetProfile&modules=summaryDetail&ssl=true

In [None]:
tech_symbols = ['META','AMZN','GOOG', 'NFLX', 'IBM']
data_list = []

for symbol in tech_symbols:

    ticker = yf.Ticker(symbol)
    info = ticker.get_info()
    balance = ticker.get_balance_sheet()

    current_ratio = info.get('currentRatio', 'N/A')
    roa = info.get('returnOnAssets', 'N/A')
    roe = info.get('returnOnEquity', 'N/A')
    long_term_debt = balance.loc['LongTermDebt', '2022-12-31']
    total_assets = balance.loc['TotalAssets', '2022-12-31']

    long_assets = long_term_debt / total_assets

    data_list.append({
        'Symbol': symbol,
        'Current Ratio': current_ratio,
        'ROA': roa,
        'ROE': roe,
        'Long Term Debt': long_term_debt,
        'Total Assets': total_assets,
        'Long Term Debt / Total Assets': long_assets
    })

df2 = pd.DataFrame(data_list)

In [None]:
tech_symbols1 = ['AAPL']
data_list = []

for symbol in tech_symbols1:
    ticker = yf.Ticker(symbol)
    info = ticker.get_info()
    balance = ticker.get_balance_sheet()

    current_ratio = info.get('currentRatio', 'N/A')
    roa = info.get('returnOnAssets', 'N/A')
    roe = info.get('returnOnEquity', 'N/A')
    long_term_debt = balance.at['LongTermDebt', '2022-09-30']
    total_assets = balance.at['TotalAssets', '2022-09-30']

    long_assets = long_term_debt / total_assets

    data_list.append({
        'Symbol': symbol,
        'Current Ratio': current_ratio,
        'ROA': roa,
        'ROE': roe,
        'Long Term Debt': long_term_debt,
        'Total Assets': total_assets,
        'Long Term Debt / Total Assets': long_assets
    })

df3 = pd.DataFrame(data_list)

In [None]:
tech_symbols1 = ['MSFT']
data_list = []

for symbol in tech_symbols1:
    ticker = yf.Ticker(symbol)
    info = ticker.get_info()
    balance = ticker.get_balance_sheet()

    current_ratio = info.get('currentRatio', 'N/A')
    roa = info.get('returnOnAssets', 'N/A')
    roe = info.get('returnOnEquity', 'N/A')
    long_term_debt = balance.at['LongTermDebt', '2023-06-30']
    total_assets = balance.at['TotalAssets', '2023-06-30']

    long_assets = long_term_debt / total_assets

    data_list.append({
        'Symbol': symbol,
        'Current Ratio': current_ratio,
        'ROA': roa,
        'ROE': roe,
        'Long Term Debt': long_term_debt,
        'Total Assets': total_assets,
        'Long Term Debt / Total Assets': long_assets
    })

df4 = pd.DataFrame(data_list)

In [None]:
combined_listings = pd.concat([df1, df2,df3,df4]) 
combined_listings['Sector']=['Energy'] * 9 + ['Tech'] * 7
print(combined_listings)

In [None]:
X = combined_listings[['Current Ratio', 'ROA', 'ROE', 'Long Term Debt / Total Assets']]
y = combined_listings['Sector'] 

In [None]:
lda = LinearDiscriminantAnalysis()
model= lda.fit(X, y)

In [None]:
accuracy = lda.score(X, y)
print(f'Accuracy: {accuracy}')
coefficients = lda.coef_
intercept = lda.intercept_
print(f'Coefficients: {coefficients}')
print(f'Intercept: {intercept}')
explained_variance_ratios = lda.explained_variance_ratio_
print("Explained Variance Ratios:", explained_variance_ratios)

# The accuracy of the discriminant analysis model is 81.25%. This means that the model is able to correctly classify 81.25% of the samples in the dataset. The larger coefficients and higher explained variance ratios will suggest the variables with the best discriminative ability. So, in absolute value, ROA has the highest discriminative ability, followed by long-term debt/total assets. The explained variance ratio is 1. This suggests that the first discriminant explains all the variance in the data. This is partly because you have only two groups, so the first discriminant is able to capture all the information needed to separate them.


In [None]:
from itertools import combinations

factor_combinations = list(combinations(['Current Ratio', 'ROA', 'ROE', 'Long Term Debt / Total Assets'], 2))

best_accuracy = 0
best_factors = None

for factors in factor_combinations:
    X = combined_listings[list(factors)]
    lda = LinearDiscriminantAnalysis()
    lda.fit(X, y)
    accuracy = lda.score(X, y)
    coefficients = lda.coef_
    
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_factors = factors

print(f"The best pair of factors is {best_factors} with an accuracy of {best_accuracy}")

In [None]:
best_coefficients = None
best_factors = None

for factors in factor_combinations:
    X = combined_listings[list(factors)]
    lda = LinearDiscriminantAnalysis()
    lda.fit(X, y)
    
    if best_coefficients is None or abs(lda.coef_[0]).sum() > abs(best_coefficients).sum():
        best_coefficients = lda.coef_[0]
        best_factors = factors

print(f"The best pair of factors is {best_factors} with coefficients {best_coefficients}")