<a href="https://colab.research.google.com/github/javier-jaime/Stock_Price_Forecasting/blob/main/Financial_Data_from_yfinance.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Importing all the necessary Libraries

In [1]:
!pip install yfinance
import yfinance as yf
import pandas as pd
import numpy as np
import plotly.express as px
from prophet import Prophet
import plotly.io as pio
pio.renderers.default='colab'
from google.colab import files

## Importing the Datasets

In [2]:
# Stock Ticker Input
stock = input('Enter the stock ticker: ')
period = input('Enter the period (1y,2y,3y...max): ')
print(f'You entered {stock} and period {period}')

ticker = yf.Ticker(stock)

Enter the stock ticker: TSLA
Enter the period (1y,2y,3y...max): max
You entered TSLA and period max


In [3]:
# get stock info
print(f'This is the info available from {stock}:\n')
print(ticker.info)

# get historical market data
print('\n This is the historical market data from the last 10 days:\n')
hist = ticker.history(period)
print(hist.tail(10))

# show actions (dividends, splits)
# ticker.actions

# show dividends if any
# ticker.dividends

# show splits if any
# ticker.splits

# show major holders
print('\n These are the Major holders:\n')
print(ticker.major_holders)

# show institutional holders
print('\n These are the institutional holders:\n')
print(ticker.institutional_holders)

# show earnings
#print('\n These are the Annual Earnings:\n')
#print(ticker.earnings)
#print('\n These are the Quarterly Earnings:\n')
#print(ticker.quarterly_earnings)

# show sustainability
#print('\n This is the Reported Sustainability:\n')
#print(ticker.sustainability)

# show analysts recommendations
# ticker.recommendations

# show next event (earnings, etc)
# ticker.calendar

# show ISIN code - *experimental*
# ISIN = International Securities Identification Number
print('\n ISIN:',ticker.isin)

# show options expirations
# ticker.options

# show news
# ticker.news

# get option chain for specific expiration
# opt = ticker.option_chain('YYYY-MM-DD')
# data available via: opt.calls, opt.puts

This is the info available from TSLA:

{'address1': '1 Tesla Road', 'city': 'Austin', 'state': 'TX', 'zip': '78725', 'country': 'United States', 'phone': '512 516 8177', 'website': 'https://www.tesla.com', 'industry': 'Auto Manufacturers', 'industryDisp': 'Auto Manufacturers', 'sector': 'Consumer Cyclical', 'sectorDisp': 'Consumer Cyclical', 'longBusinessSummary': 'Tesla, Inc. designs, develops, manufactures, leases, and sells electric vehicles, and energy generation and storage systems in the United States, China, and internationally. It operates in two segments, Automotive, and Energy Generation and Storage. The Automotive segment offers electric vehicles, as well as sells automotive regulatory credits; and non-warranty after-sales vehicle, used vehicles, retail merchandise, and vehicle insurance services. This segment also provides sedans and sport utility vehicles through direct and used vehicle sales, a network of Tesla Superchargers, and in-app upgrades; purchase financing and le

In [4]:
# show financials
print('\n These are the Annual Income Statements:\n')
print(ticker.income_stmt)
print('\n These are the Quarterly Income Statements:\n')
print(ticker.quarterly_income_stmt)

# show balance sheet
print('\n This is the Annual Balance Sheet:\n')
print(ticker.balance_sheet)
print('\n This is the Quarterly Balance Sheet:\n')
print(ticker.quarterly_balance_sheet)

# show cashflow
print('\n This is the Annual Cashflow:\n')
print(ticker.cashflow)
print('\n This is the Quarterly Cashflow:\n')
print(ticker.quarterly_cashflow)


 These are the Annual Income Statements:

                                                       2022-12-31  \
Tax Effect Of Unusual Items                           -14080000.0   
Tax Rate For Calcs                                           0.08   
Normalized EBITDA                                   17833000000.0   
Total Unusual Items                                  -176000000.0   
Total Unusual Items Excluding Goodwill               -176000000.0   
Net Income From Continuing Operation Net Minori...  12583000000.0   
Reconciled Depreciation                              3747000000.0   
Reconciled Cost Of Revenue                          60609000000.0   
EBIT                                                13910000000.0   
Net Interest Income                                   106000000.0   
Interest Expense                                      191000000.0   
Interest Income                                       297000000.0   
Normalized Income                                   12744920

In [5]:
# Fill Empty/NA cells with zeros, transpose it and rename index
financials = ticker.financials.fillna(0).transpose()
financials.index.rename('Date', inplace=True)
financials = financials.assign(**{'symbol': stock})

balancesheet = ticker.balance_sheet.fillna(0).transpose()
balancesheet.index.rename('Date', inplace=True)
balancesheet = balancesheet.assign(**{'symbol': stock})

cashflow = ticker.cashflow.fillna(0).transpose()
cashflow.index.rename('Date', inplace=True)
cashflow = cashflow.assign(**{'symbol': stock})

info = pd.DataFrame.from_dict(ticker.info, orient ='index').transpose()
info.set_index('symbol', inplace=True)

In [6]:
# Download Financial CSV Files
financials.to_csv(f'{stock}_income_statement.csv')
files.download(f'{stock}_income_statement.csv')
balancesheet.to_csv(f'{stock}_balance_sheet.csv')
files.download(f'{stock}_balance_sheet.csv')
cashflow.to_csv(f'{stock}_cashflow.csv')
files.download(f'{stock}_cashflow.csv')
info.to_csv(f'{stock}_info.csv')
files.download(f'{stock}_info.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [7]:
# Multiple Tickers Data Test
TickersData = yf.download(
        # tickers list or string as well
        tickers = "SPY AAPL MSFT",
        # use "period" instead of start/end
        # valid periods: 1d,5d,1mo,3mo,6mo,1y,2y,5y,10y,ytd,max
        # (optional, default is '1mo')
        period = "5d",
        # fetch data by interval (including intraday if period < 60 days)
        # valid intervals: 1m,2m,5m,15m,30m,60m,90m,1h,1d,5d,1wk,1mo,3mo
        # (optional, default is '1d')
        interval = "1m",
        # group by ticker (to access via data['SPY'])
        # (optional, default is 'column')
        group_by = 'ticker',
        # adjust all OHLC automatically
        # (optional, default is False)
        auto_adjust = True,
        # download pre/post regular market hours data
        # (optional, default is False)
        prepost = True,
        # use threads for mass downloading? (True/False/Integer)
        # (optional, default is True)
        threads = True,
        # proxy URL scheme use use when downloading?
        # (optional, default is None)
        proxy = None
    )
TickersData.head()

[*********************100%%**********************]  3 of 3 completed


Unnamed: 0_level_0,AAPL,AAPL,AAPL,AAPL,AAPL,SPY,SPY,SPY,SPY,SPY,MSFT,MSFT,MSFT,MSFT,MSFT
Unnamed: 0_level_1,Open,High,Low,Close,Volume,Open,High,Low,Close,Volume,Open,High,Low,Close,Volume
Datetime,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
2023-08-15 04:00:00-04:00,179.4,179.4,178.63,178.8,0.0,446.09,448.0,445.77,445.86,0.0,323.13,324.68,322.55,322.77,0.0
2023-08-15 04:01:00-04:00,178.8,178.8,178.65,178.76,0.0,445.82,445.82,445.72,445.72,0.0,322.56,323.11,322.56,322.92,0.0
2023-08-15 04:02:00-04:00,178.76,178.76,178.7,178.7,0.0,445.76,445.88,445.73,445.73,0.0,322.8,322.8,322.6,322.6,0.0
2023-08-15 04:03:00-04:00,178.7,178.78,178.64,178.73,0.0,445.75,445.92,445.71,445.9,0.0,322.6,322.98,322.6,322.98,0.0
2023-08-15 04:04:00-04:00,178.76,178.79,178.75,178.79,0.0,445.91,445.94,445.87,445.87,0.0,322.93,322.93,322.88,322.88,0.0


# Data Visualization (plotly express) - Visualizing the historical performance

In [8]:
data = hist.reset_index()

In [9]:
#Line graph (Price), Area graph (Volume) , box plot (Analyzing price and volume)

px.line(data, x="Date", y="Close", title=f'{stock} Stock Close Price')

In [10]:
px.bar(data, x="Date", y="Volume", title=f'{stock} Stock Volume')

In [11]:
px.box(data, y="Close", title=f'{stock} box plot', height=500, width=500)

In [12]:
# Hurst Exponent >0.5 expanding, 0.5 random walking, <0.5 mean reverting (Lags in Days)
def get_hurst_exponent(time_series, max_lag=20):
    """Returns the Hurst Exponent of the time series"""
    lags = range(2, max_lag)
    # variances of the lagged differences
    tau = [np.std(np.subtract(time_series[lag:], time_series[:-lag])) for lag in lags]
    # calculate the slope of the log plot -> the Hurst Exponent
    reg = np.polyfit(np.log(lags), np.log(tau), 1)
    return reg[0]

hurst = pd.DataFrame(columns =["hurst_exp","lag"])
lags = range(4, 121, 2)
for lag in lags:
    hurst_exp = get_hurst_exponent(data["Close"].values, lag)
    hurst = hurst.append({"hurst_exp":hurst_exp,"lag":lag},ignore_index=True)
px.line(hurst, x="hurst_exp", y="lag", title=f'{stock} Hurst Exp')


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated a

## Data Preparation for Facebook Prophet

In [13]:
columns=['Date','Close']
prophet_df=pd.DataFrame(data,columns=columns)
prophet_df= prophet_df.rename(columns={'Date':'ds','Close':'y'})
prophet_df.tail()

Unnamed: 0,ds,y
3304,2023-08-15 00:00:00-04:00,232.960007
3305,2023-08-16 00:00:00-04:00,225.600006
3306,2023-08-17 00:00:00-04:00,219.220001
3307,2023-08-18 00:00:00-04:00,215.490005
3308,2023-08-21 00:00:00-04:00,228.509995
