In [15]:

import requests
import pandas as pd
import sqlite3
import joblib
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler

In [16]:
import sys
sys.path.append('../keys')  

from config import ALPHA_VANTAGE_API_KEY

In [30]:
# Connect to DB and import data into a pandas dataframe
connection = sqlite3.connect('../data/db2.sqlite')
averages = pd.read_sql_query("SELECT * FROM averages", connection)
averagesJunk = pd.read_sql_query("SELECT * FROM averagesJunk", connection)
averagesInvestment = pd.read_sql_query("SELECT * FROM averagesInvestment", connection)

connection.close()

In [18]:
def getData(ticker, ALPHA_VANTAGE_API_KEY):
    # Pulling from two parts of the API
    # See Documentation Here: https://www.alphavantage.co/documentation/
    income_statement_url = f'https://www.alphavantage.co/query?function=INCOME_STATEMENT&symbol={ticker}&apikey={ALPHA_VANTAGE_API_KEY}'
    balance_sheet_url = f'https://www.alphavantage.co/query?function=BALANCE_SHEET&symbol={ticker}&apikey={ALPHA_VANTAGE_API_KEY}'
    cash_flow_url = f'https://www.alphavantage.co/query?function=CASH_FLOW&symbol={ticker}&apikey={ALPHA_VANTAGE_API_KEY}'

    try:
        # Fetching from multiple sources:
        # Income statement
        income_statement_response = requests.get(income_statement_url)
        income_statement_data = income_statement_response.json()
        
        # Balance sheet
        balance_sheet_response = requests.get(balance_sheet_url)
        balance_sheet_data = balance_sheet_response.json()

        # Cash_flow
        cash_flow_response = requests.get(cash_flow_url)
        cash_flow_data = cash_flow_response.json()

        # Check if data is valid before creating DataFrames
        if 'annualReports' in income_statement_data and 'annualReports' in balance_sheet_data and 'annualReports' in cash_flow_data:
            income_statement_df = pd.DataFrame(income_statement_data['annualReports'])
            balance_sheet_df = pd.DataFrame(balance_sheet_data['annualReports'])
            cash_flow_df = pd.DataFrame(cash_flow_data['annualReports'])

            return income_statement_df, balance_sheet_df, cash_flow_df
        else:
            raise ValueError("Invalid data format")
    except (KeyError, IndexError, requests.exceptions.RequestException, ValueError) as e:
        print(f"Error fetching financial data: {e}")
        return None, None, None

ticker = 'F'
income_statement_df, balance_sheet_df, cash_flow_df = getData(ticker, ALPHA_VANTAGE_API_KEY)

if income_statement_df is not None:
    print("Income Statement DataFrame:")
    print(income_statement_df.head())  
    
    print("\nBalance Sheet DataFrame:")
    print(balance_sheet_df.head())
    
    print("\nCash Flow DataFrame:")
    print(cash_flow_df.head())
else:
    print("Failed to fetch financial data.")

Income Statement DataFrame:
  fiscalDateEnding reportedCurrency  grossProfit  totalRevenue costOfRevenue  \
0       2023-12-31              USD  13823000000  176191000000  162368000000   
1       2022-12-31              USD   8502000000  158057000000  149555000000   
2       2021-12-31              USD   1008000000  136341000000  135333000000   
3       2020-12-31              USD   -800000000  127144000000  127944000000   
4       2019-12-31              USD   3763000000  155900000000  152137000000   

  costofGoodsAndServicesSold operatingIncome sellingGeneralAndAdministrative  \
0               150550000000      5458000000                     10702000000   
1               134397000000      6276000000                     10888000000   
2               114651000000      4523000000                     11915000000   
3               112752000000     -4408000000                     10193000000   
4               134693000000       574000000                     11161000000   

  research

In [19]:
income_statement_df.head()

Unnamed: 0,fiscalDateEnding,reportedCurrency,grossProfit,totalRevenue,costOfRevenue,costofGoodsAndServicesSold,operatingIncome,sellingGeneralAndAdministrative,researchAndDevelopment,operatingExpenses,...,depreciation,depreciationAndAmortization,incomeBeforeTax,incomeTaxExpense,interestAndDebtExpense,netIncomeFromContinuingOperations,comprehensiveIncomeNetOfTax,ebit,ebitda,netIncome
0,2023-12-31,USD,13823000000,176191000000,162368000000,150550000000,5458000000,10702000000,8200000000,22012000000,...,7690000000,3105000000,3985000000,-362000000,7613000000,4329000000,4644000000,11598000000,14703000000,4347000000
1,2022-12-31,USD,8502000000,158057000000,149555000000,134397000000,6276000000,10888000000,7800000000,21723000000,...,7642000000,2938000000,-2845000000,-864000000,4714000000,-2152000000,-2981000000,6276000000,4686000000,-1981000000
2,2021-12-31,USD,1008000000,136341000000,135333000000,114651000000,4523000000,11915000000,7600000000,22215000000,...,7318000000,3020000000,17807000000,-130000000,6295000000,17910000000,17892000000,4523000000,25420000000,17937000000
3,2020-12-31,USD,-800000000,127144000000,127944000000,112752000000,-4408000000,10193000000,7100000000,17848000000,...,8751000000,2819000000,-1119000000,160000000,5052000000,-1276000000,-1845000000,-4408000000,6751000000,-1279000000
4,2019-12-31,USD,3763000000,155900000000,152137000000,134693000000,574000000,11161000000,7400000000,22800000000,...,9689000000,3464000000,-677000000,-724000000,5464000000,84000000,-315000000,574000000,8196000000,47000000


In [20]:
num_cols = len(balance_sheet_df.columns) - 2
cols = list(balance_sheet_df.columns[2:]) 
balance_sheet_transformed_df = balance_sheet_df[cols]
balance_sheet_transformed_df = balance_sheet_transformed_df.apply(pd.to_numeric, errors='coerce', axis=1)

num_cols = len(income_statement_df.columns) - 2
cols = list(income_statement_df.columns[2:]) 
income_statement_transformed_df = income_statement_df[cols]
income_statement_transformed_df = income_statement_transformed_df.apply(pd.to_numeric, errors='coerce', axis=1)

num_cols = len(cash_flow_df.columns) - 2
cols = list(cash_flow_df.columns[2:]) 
cash_flow_transformed_df = cash_flow_df[cols]
cash_flow_transformed_df = cash_flow_transformed_df.apply(pd.to_numeric, errors='coerce', axis=1)

In [21]:
# Extracting needed variables

# Current ratio = totalCurrentAssets / totalCurrentLiabilities
    # Reference: https://www.investopedia.com/terms/c/currentratio.asp 
totalCurrentAssets = balance_sheet_transformed_df['totalCurrentAssets'].iloc[0]
totalCurrentLiabilities = balance_sheet_transformed_df['totalCurrentLiabilities'].iloc[0]
currentRatio = totalCurrentAssets / totalCurrentLiabilities

# Long-term Debt / Capital = longTermDebt / (longTermDebt + totalShareholderEquity)
    # Reference: https://www.investopedia.com/terms/l/longtermdebt-capitalization.asp *Preferred stock doesn't appear to be available on Alpha Vantage and isn't always issued
longTermDebt = balance_sheet_transformed_df['longTermDebt'].iloc[0]
totalShareholderEquity = balance_sheet_transformed_df['totalShareholderEquity'].iloc[0]
longTermDebtCapital = longTermDebt / (longTermDebt + totalShareholderEquity)

# Debt/Equity Ratio = totalLiabilities / totalShareholderEquity
    # Reference: https://www.investopedia.com/terms/d/debtequityratio.asp
totalLiabilities = balance_sheet_transformed_df['totalLiabilities'].iloc[0]
totalShareholderEquity = balance_sheet_transformed_df['totalShareholderEquity'].iloc[0]
debtEquityRatio= totalLiabilities / totalShareholderEquity

# Gross Margin = 100 × (totalRevenue - costofGoodsAndServicesSold) / totalRevenue
    # Reference: https://www.investopedia.com/terms/g/grossmargin.asp, https://www.omnicalculator.com/finance/margin#gross-margin-formula
totalRevenue = income_statement_transformed_df['totalRevenue'].iloc[0]
costofGoodsAndServicesSold = income_statement_transformed_df['costofGoodsAndServicesSold'].iloc[0]
grossMargin = ((totalRevenue - costofGoodsAndServicesSold) / totalRevenue) * 100

# Operating Margin = operatingIncome / totalRevenue
    # Reference: https://www.investopedia.com/terms/o/operatingmargin.asp
operatingIncome = income_statement_transformed_df['operatingIncome'].iloc[0]
operatingMargin = operatingIncome / totalRevenue

# EBIT Margin: ((totalRevenue - costofGoodsAndServicesSold - operatingExpenses) / totalRevenue) * 100
    # Reference: https://www.investopedia.com/terms/e/ebit.asp
operatingExpenses = income_statement_transformed_df['operatingExpenses'].iloc[0]
ebitMargin = ((totalRevenue - costofGoodsAndServicesSold - operatingExpenses) / totalRevenue) * 100

# EBITDA Margin: (incomeBeforeTax + depreciationAndAmortization) / totalRevenue
    # Reference: https://www.investopedia.com/terms/e/ebitda-margin.asp
incomeBeforeTax = income_statement_transformed_df['incomeBeforeTax'].iloc[0]
depreciationAndAmortization = income_statement_transformed_df['depreciationAndAmortization'].iloc[0]
ebitdaMargin = (incomeBeforeTax + depreciationAndAmortization) / totalRevenue

# Pre-Tax Profit Margin: (incomeBeforeTax / totalRevenue) * 100
    # Reference: https://www.investopedia.com/terms/p/pretax-margin.asp
preTaxProfitMargin = (incomeBeforeTax / totalRevenue) * 100

# Net Profit Margin: (netIncome / totalRevenue) * 100
    # Reference: https://www.investopedia.com/terms/n/net_margin.asp
netIncome = income_statement_transformed_df['netIncome'].iloc[0]
netProfitMargin = (netIncome / totalRevenue) * 100

# Asset Turnover Ratio: totalRevenue / ((totalAssets + totalAssetsPrevious) / 2)
    # totalAssetsPrevious: totalAssets(from preYear) [1] = previous year
totalAssets = balance_sheet_transformed_df['totalAssets'].iloc[0]
totalAssetsPrevious = balance_sheet_transformed_df['totalAssets'].iloc[1]
assetTurnoverRatio = totalRevenue / ((totalAssets + totalAssetsPrevious) / 2)

# ROE - Return On Equity: netIncome / ((totalShareholderEquity(curYear) + totalShareholderEquity(preYear)) / 2)
    # Reference: https://www.investopedia.com/terms/r/returnonequity.asp
totalShareholderEquityPrevious = balance_sheet_transformed_df['totalShareholderEquity'].iloc[1]
returnOnEquity = netIncome / ((totalShareholderEquity + totalShareholderEquityPrevious) / 2)

# Return On Tangible Equity: netIncome / (avgShareholderEquity - intangibleAssets)
    # Reference: https://www.wallstreetprep.com/knowledge/return-on-tangible-equity-rote/
totalShareholderEquity = balance_sheet_transformed_df['totalShareholderEquity'].iloc[0]
totalShareholderEquityPrevious = balance_sheet_transformed_df['totalShareholderEquity'].iloc[1]
avgShareholderEquity = (totalShareholderEquity + totalShareholderEquityPrevious) / 2
intangibleAssets = balance_sheet_transformed_df['intangibleAssets'].iloc[0]
returnOnTangibleEquity = netIncome / (avgShareholderEquity - intangibleAssets)

# ROA - Return On Assets: netIncome / totalAssets
    # Reference: https://www.investopedia.com/terms/r/returnonassets.asp
returnOnAssets = netIncome / totalAssets

# ROI - Return On Investment: (netIncome / ((totalShareholderEquity(curYear) + totalShareholderEquity(preYear)) / 2)) * 100 
    # Reference: https://www.wallstreetprep.com/knowledge/return-on-equity-roe/, https://www.investopedia.com/terms/r/returnoninvestment.asp
returnOnInvestment = (netIncome / ((totalShareholderEquity + totalShareholderEquityPrevious) / 2)) * 100

# Operating Cash Flow Per Share: operatingCashflow / commonStockSharesOutstanding
    # Reference: https://www.investopedia.com/terms/c/cashflowpershare.asp, https://www.wallstreetprep.com/knowledge/cash-flow-per-share/
operatingCashflow = cash_flow_transformed_df['operatingCashflow'].iloc[0]
commonStockSharesOutstanding = balance_sheet_transformed_df['commonStockSharesOutstanding'].iloc[0]
operatingCashFlowPerShare = operatingCashflow / commonStockSharesOutstanding

# Free Cash Flow Per Share: (operatingCashflow - capitalExpenditures) / commonStockSharesOutstanding
    # Reference: https://www.investopedia.com/terms/f/freecashflowpershare.asp
capitalExpenditures = cash_flow_transformed_df['capitalExpenditures'].iloc[0]
freeCashFlowPerShare = (operatingCashflow - capitalExpenditures) / commonStockSharesOutstanding



        # Create a DataFrame to store the results
metrics_df = pd.DataFrame({
    "Current Ratio": [currentRatio],
    "Long-term Debt / Capital": [longTermDebtCapital],
    "Debt/Equity Ratio": [debtEquityRatio],
    "Gross Margin": [grossMargin],
    "Operating Margin": [operatingMargin],
    "EBIT Margin": [ebitMargin],
    "EBITDA Margin": [ebitdaMargin],
    "Pre-Tax Profit Margin": [preTaxProfitMargin],
    "Net Profit Margin": [netProfitMargin],
    "Asset Turnover": [assetTurnoverRatio],
    "ROE - Return On Equity": [returnOnEquity],
    "Return On Tangible Equity": [returnOnTangibleEquity],
    "ROA - Return On Assets": [returnOnAssets],
    "ROI - Return On Investment": [returnOnInvestment],
    "Operating Cash Flow Per Share": [operatingCashFlowPerShare],
    "Free Cash Flow Per Share": [freeCashFlowPerShare]
})
metrics_df.head()

Unnamed: 0,Current Ratio,Long-term Debt / Capital,Debt/Equity Ratio,Gross Margin,Operating Margin,EBIT Margin,EBITDA Margin,Pre-Tax Profit Margin,Net Profit Margin,Asset Turnover,ROE - Return On Equity,Return On Tangible Equity,ROA - Return On Assets,ROI - Return On Investment,Operating Cash Flow Per Share,Free Cash Flow Per Share
0,1.196492,0.699491,5.389194,14.552957,0.030978,2.059697,0.04024,2.26175,2.467209,0.665884,0.101075,0.102901,0.015905,10.107539,3.734168,1.672591


In [22]:
for column in metrics_df.columns:
    # Step 2: Check for NaN values in each column
    nan_indices = metrics_df[column].isnull()
    
    # Step 3: Replace NaN values with corresponding averages
    metrics_df.loc[nan_indices, column] = averages.loc[0, column]
metrics_df.head()

Unnamed: 0,Current Ratio,Long-term Debt / Capital,Debt/Equity Ratio,Gross Margin,Operating Margin,EBIT Margin,EBITDA Margin,Pre-Tax Profit Margin,Net Profit Margin,Asset Turnover,ROE - Return On Equity,Return On Tangible Equity,ROA - Return On Assets,ROI - Return On Investment,Operating Cash Flow Per Share,Free Cash Flow Per Share
0,1.196492,0.699491,5.389194,14.552957,0.030978,2.059697,0.04024,2.26175,2.467209,0.665884,0.101075,0.102901,0.015905,10.107539,3.734168,1.672591


In [23]:
# for c in metrics_df.columns:
#     metrics_df[c].iloc[0] = .1

# metrics_df


In [31]:
# Load the model
model = 5
loaded_model = joblib.load(f'../models/random_forest/model{model}.joblib')
data_frame = averagesJunk

# Define features set for new data
X_new = data_frame.copy()

# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_new)

# Scale the new data
X_new_scaled = X_scaler.transform(X_new)

# Make predictions using the loaded model
predictions_new = loaded_model.predict(X_new_scaled)

In [32]:
predictions_new[0]

1

In [26]:
metrics_df.insert(loc=0, column='Binary Rating', value=predictions_new[0])
metrics_df.insert(loc=0, column='Ticker', value=ticker)
metrics_df


Unnamed: 0,Ticker,Binary Rating,Current Ratio,Long-term Debt / Capital,Debt/Equity Ratio,Gross Margin,Operating Margin,EBIT Margin,EBITDA Margin,Pre-Tax Profit Margin,Net Profit Margin,Asset Turnover,ROE - Return On Equity,Return On Tangible Equity,ROA - Return On Assets,ROI - Return On Investment,Operating Cash Flow Per Share,Free Cash Flow Per Share
0,F,1,1.196492,0.699491,5.389194,14.552957,0.030978,2.059697,0.04024,2.26175,2.467209,0.665884,0.101075,0.102901,0.015905,10.107539,3.734168,1.672591


In [27]:
averages

Unnamed: 0,Current Ratio,Long-term Debt / Capital,Debt/Equity Ratio,Gross Margin,Operating Margin,EBIT Margin,EBITDA Margin,Pre-Tax Profit Margin,Net Profit Margin,Asset Turnover,ROE - Return On Equity,Return On Tangible Equity,ROA - Return On Assets,ROI - Return On Investment,Operating Cash Flow Per Share,Free Cash Flow Per Share
0,1.926313,0.452804,0.177349,42.433909,11.531612,11.582683,20.071493,8.659944,5.995566,0.849843,15.950507,21.481589,4.575705,7.076084,0.479756,0.119491


In [28]:
conn = sqlite3.connect('../data/db2.sqlite')
metrics_df.to_sql('query', conn, index=False, if_exists='replace', dtype={'id': 'INTEGER PRIMARY KEY'})

conn.close()