# Data Preparation

### Imports

In [6]:
import investpy
import yfinance as yf
import pandas as pd
import requests 

## US companies ticket extraction

In [None]:
us_stocks = investpy.stocks.get_stocks(country='united states')
us_stocks.to_csv('companies/us_stocks.csv', index=False)
print(us_stocks.head())

## Stocks price extraction from yahoo finance

In [None]:
tickers = us_stocks['symbol'].tolist()

for company in us_stocks.iterrows():
    ticker = company[1]["symbol"]
    try:
        ticker_obj = yf.Ticker(ticker)
        hist = ticker_obj.history(period="max")
        hist.to_csv(f"companies\\price\\{company[1]['name']}-{ticker}.csv", index=False)
        print(ticker)
    except Exception as e:
        print(e)

## Debt ratio extraction from FMP

In [None]:
URL = "https://financialmodelingprep.com/api/v3/ratios/AAPL"

params = {
    'limit': '200',
    'apikey': '*'
}

for company in us_stocks.iterrows():
    ticker = company[1]["symbol"]
    
    try:
        response = requests.get(URL, params=params)
        json_res = response.json()
        company_debtR_history = {"Date": [], "DebtRatio": []}
        for year_data in json_res:
            if year_data['debtRatio'] != 0:
                company_debtR_history["Date"].append(year_data['date'])
                company_debtR_history["DebtRatio"].append(year_data['debtRatio'])
        
        company_df = pd.DataFrame(company_debtR_history)
        company_df.to_csv(f"companies/debt/{company[1]['name']}-{ticker}.csv", index=False)
        print(ticker)
    except Exception as e:
        print(e)

## Concatenate close price of a stock with debt ratio

In [None]:
for company in us_stocks.iterrows():
    ticker = company[1]["symbol"]
    
    try:
        stocks = pd.read_csv(f"companies/price/{company[1]['name']}-{ticker}.csv")
        debt = pd.read_csv(f"companies/debt/{company[1]['name']}-{ticker}.csv")
        
        stocks['Date'] = pd.to_datetime(stocks['Date'], utc=True).dt.date
        debt['Date'] = pd.to_datetime(debt['Date']).dt.date
        
        combined_data = pd.merge(stocks, debt, on='Date', how='inner')
        combined_data = combined_data.dropna()
        
        only_close_debt = combined_data[['Date', 'Close', 'DebtRatio']]
        
        if not only_close_debt.empty:
            print(ticker)
            only_close_debt.to_csv(f"companies/combined/{company[1]['name']}-{ticker}.csv", index=False)
    except Exception as e:
        print(e)

## Add Extra Information (percent)