# Stock Flex
### Technical and Fundamental Portfolio Builder

This Jupyter Notebook will outline the exploration, developement and clean-up of the appliation code

### Table of Contents
- Data connectors
- Data query and clean-up
- Analytical methods
    - Technical
    - Fundamental
    - Traditional
    
---

- Visualization
- Dashboard (GUI)
    - GUI
    - Data filtration methods

### Dependencies

In [23]:
# Data analytics
import pandas as pd
import numpy as np
import panel as pn

# Database tools
import sqlite3

# Visualization
# pn.extension('plotly')
# import plotly.express as px
# import hvplot.pandas
import matplotlib.pyplot as plt

# System
import os
import time, sys
from pathlib import Path
from dotenv import load_dotenv
import requests
from datetime import date, datetime, timedelta
from IPython.display import clear_output
from ast import literal_eval as make_tuple

# Finance
import alpaca_trade_api as tradeapi
import quandl as ql
import finnhub

import warnings
warnings.filterwarnings('ignore')

# Local dependancies
import fundamentals as fun

## Data Connections
- Static Data Connections
- Dynamic Data Connections

### Static Data Connections

#### Stock Ticker Lists

In [2]:
# Get tickers within S&P500 index
sp500_tickers_path = Path('resources/sp500_tickers.csv')
sp500_tickers = pd.read_csv(sp500_tickers_path).sort_values(by="Symbol")

# Get test tickers within S&P500 index
sp500_tickers_test_path = Path('resources/sp500_tickers_test.csv')
sp500_tickers_test = pd.read_csv(sp500_tickers_test_path)

In [3]:
# Get fundamental data from csv
stock_fundamentals_data_path = Path("resources/fundamental_data.csv")
stock_fundamentals_df = pd.read_csv(stock_fundamentals_data_path)

#### Database Builder

In [4]:
def create_db():
    '''This method creates a SQLite database that will store all of ticker daily prices;
    This method is created to resolve the Alpaca API request limits that can greatly slow down large data requests
    '''
    
    print("This will restore the 'stock_prices' database and delete all of stored values, are you sure? y/[n]")
    confirm = input()
    
    if confirm == 'y':
        conn = sqlite3.connect('resources/stock_prices.db')
        
    else:
        print("Databse creation aborted")

### Dynamic Data Connections

#### Stock Price Data

In [5]:
# Alpaca API connector
load_dotenv("../resources/api_keys.env")

# Set Alpaca API key and secret
alpaca_api_key = os.getenv("ALPACA_API_KEY")
alpaca_secret_key = os.getenv("ALPACA_SECRET_KEY")

# Create the Alpaca API object
api = tradeapi.REST(
alpaca_api_key,
alpaca_secret_key,
api_version = "v2"
)

#### Fundamental Stock Data

In [6]:
# FinnHub API connector
load_dotenv("../resources/api_keys.env")

# Set FinnBug API key
finnhub_api_key = os.getenv("FINNHUB_API_KEY")

# Create FinnHub API object
finnhub_client = finnhub.Client(api_key=finnhub_api_key)

# Method to obtain json data from Polygon API
def finnhub_data(ticker):
    
    data = finnhub_client.company_basic_financials(ticker, "")
    data_df = pd.DataFrame(data)
    time.sleep(1)
    
    return data_df

#### Bond Data

In [7]:
# Treasury bonds
def treasury_data():
    return ql.get("USTREASURY/YIELD")

#### Crypto Data

In [8]:
# Crypto connector URLs
btc_url = "https://api.alternative.me/v2/ticker/Bitcoin/?convert=USD"
eth_url = "https://api.alternative.me/v2/ticker/Ethereum/?convert=USD"

# Build out the crypto connector here

## Data Parsing

### Stock Data

In [52]:
# Get prices for tickers withing a given index or sector
def stock_prices_from_api(tickers_df, start_date, end_date):
    '''Returns pd.DataFrame with prices for the given tickers
    
    ...
    
    Parameters
    ----------
    tickers_df : pd.DataFrame - contains tickers for given index or sector under 
        the "Symbol" column which is the DataFrame key
    start_date : str() - string with date in following format YYYY-MM-DD
    end_date: str() - string with date in following format YYYY-MM-DD 
    
    
    Returns
    -------
    result_df : pd.DataFrame with securities price data
    '''
   
    # Get list of tickers from the tickers_df list or tickers_df DataFrame 
    tickers = tickers_df["Symbol"]
    
    # Parse start and end dates
    start_date = pd.Timestamp(start_date, tz="America/New_York").isoformat()
    end_date = pd.Timestamp(end_date, tz="America/New_York").isoformat()
    
    # Connect to Alpaca API and get data
    """Condition handling: 
        a. Alpaca API 422 Client Error if more than 100 tickers are passed - COMPLETE
        b. Alpaca API data max row limit of 1000 - PENDING"""
    
    
    # a. Alpaca API condition handling, sending 100 tickers at a time
    # Declate a pd.DataFrame
    result_df = pd.DataFrame()
    tickers_n = 50
    
    for i in range(0, len(tickers), tickers_n):
        # Slice the ticker list into lists of 50 tickers
        sliced_tickers = tickers[i:i + tickers_n] 
        
        temp_df = api.get_barset(
        sliced_tickers,
        timeframe = "1D",
        start = start_date,
        end = end_date,
        limit = 1000).df

        # Append temporary dataframe to result_df
        result_df = pd.concat([result_df, temp_df], axis = "columns", join = "outer")
        time.sleep(0.1)
        
    return result_df



# Seed stock price database
def seed_stock_prices_database():
    '''Seeds stock_prices database that houses all prices for tickers within an index or sector'''

    # Get data for dates indicated
    data = stock_prices_from_api(sp500_tickers, "2015-01-01", "2018-01-01")
    
    # Transpose dataframe
    data = data.transpose()
    
    # Connect to databse
    conn = sqlite3.connect('resources/stock_prices.db') 
    c = conn.cursor()
    
    # Iterate over dataframe and populate the database
    for ticker, sub_frame in data.groupby(level=0):
        
        ticker = "_" + ticker.replace('.','_')
        
        list_of_tables = c.execute(f"SELECT name FROM sqlite_master WHERE type='table' AND name='{ticker}';").fetchall()
        if list_of_tables == []:
            sub_frame.transpose().to_sql(ticker, conn, if_exists='append')
        else:
            sub_frame.transpose().to_sql(ticker, conn, if_exists='append')
    
    conn.commit()
    conn.close()

    

# Update stock price database     
def update_stock_prices_database(tickers_df):
    '''Updates stock_prices database that houses all prices for tickers within an index or sector'''
    
    # Get today's date
    today_date = date.today().strftime("%Y-%m-%d")
    
    # Connect to databse
    conn = sqlite3.connect('resources/stock_prices.db') 
    c = conn.cursor()
    
    # Iterate over dataframe and populate the database
    ticker_count = 0
    for index, ticker_data in tickers_df.iterrows():
        
        ticker = "_" + ticker_data["Symbol"].replace('.','_')
        
        # Get list of all tables in databse
        list_of_tables = c.execute(f"SELECT name FROM sqlite_master WHERE type='table' AND name='{ticker}';").fetchall()
        
        # If table does not exist, create and seed
        if list_of_tables == []: # If tables does not exist
            # Get data from Alpaca API
            data = stock_prices_from_api(ticker_data, "2015-01-01", "2018-01-01")
            
            # Write stock data to databse
            data.to_sql(ticker, conn, if_exists='append')
            
        else: # If table does exist
            try:
                # Get last date from the table
                last_date = c.execute(f"SELECT time FROM {ticker} ORDER BY time DESC LIMIT 1").fetchall()
                last_date = ''.join(last_date[0]).split(" ")[0]

                # Get next date
                next_date = datetime.strptime(last_date, '%Y-%m-%d') + timedelta(days=1)

                # Change date format for Alpaca
                next_date = next_date.strftime("%Y-%m-%d")
                
                # Get today's date
                today_date = datetime.today().strftime("%Y-%m-%d")

                # Get data from Alpaca API
                data = stock_prices_from_api(ticker_data, last_date, today_date)

                # Write data to database
                data.to_sql(ticker, conn, if_exists='append')
            except:
                pass
            
        ticker_count += 1
        print(f"Processing {ticker} | {ticker_count}/{len(tickers_df.index)}")
    
    conn.commit()
    conn.close()
    
    
    
# Get stock prices from database
def stock_prices_from_db(ticker_df, start_date, end_date):
    '''Returns a pd.DataFrame with stock price data for tickers passed in ticker_df and 
    filtered by dates passed in start_date and end_date
    
    ...
    
    Parameters
    ----------
    tickers_df : pd.DataFrame - contains tickers for given index or sector under 
        the "Symbol" column which is the DataFrame key
    start_date : str() - string with date in following format YYYY-MM-DD
    end_date: str() - string with date in following format YYYY-MM-DD 
    
    
    Returns
    -------
    result_df : pd.DataFrame with securities price data
    '''
    # Connect to database
    conn = sqlite3.connect('resources/stock_prices.db') 
    c = conn.cursor()
    
    # Cast dates to pandas datetime
    start_date = pd.Timestamp(start_date, tz="America/New_York").isoformat()
    end_date = pd.Timestamp(end_date, tz="America/New_York").isoformat()
    
    # Declare result_df
    result_df = pd.DataFrame()
    
    # Iterate over tickers
    for ticker in list(ticker_df["Symbol"]):
        
        try:
            # Parse ticker name
            _ticker = "_" + ticker.replace('.','_')

            # Query databse
            data = pd.read_sql_query(f"SELECT * FROM {_ticker}", conn)

            # Filter database by dates
            data = data[(data['time'] >= start_date) & (data['time'] <= end_date)].set_index(['time'])
            

            # Concatenate dataframes
            result_df = pd.concat([result_df, data], axis = "columns", join = "outer")
        
        except:
            print(f"Ticker {ticker} not found in database.")
            
    conn.commit()
    conn.close()
    
    # Create a multi-index 
    ## Transpose the df
    result_df = result_df.transpose()
    
    ## Get dataframe keys
    keys = []
    for key in result_df.index:
        keys.append(make_tuple(key))

    ## Build an index
    index = pd.MultiIndex.from_tuples(keys, names=('Symbol', 'Data'))

    ## Set index
    result_df = result_df.set_index(index).transpose()
        
    return result_df



# Get stock fundamental data from FinnHub
def generate_stock_fundamentals(tickers_df):
    '''Returns pd.DataFrame with fundamentals of tickers within tickers_df
    
    ...
    Parameters
    ----------
    tickers_df : pd.DataFrame - contains tickers for given index or sector under 
        the "Symbol" column which is the DataFrame key
        
    Returns
    -------
    result_df : pd.DataFrame - securities fundamental data
    '''
    
    result_df = pd.DataFrame()
    
    for ticker in tickers_df['Symbol']:
        fundamental_data = finnhub_data(ticker)
        result_df = pd.concat([result_df, fundamental_data], axis='rows', join="outer")
        
    # Parse the dataframe
    result_df = result_df.reset_index().set_index('symbol')
    result_df = result_df.drop('metricType', 1)
    result_df.columns = ['metric_type', 'metric', 'series']
    
    return result_df



# Refresh stock fundamental data csv
def refresh_fundamentals_csv(tickers_df):
    '''Returns pd.DataFrame with fundamentals of tickers within tickers_df
    and/or calls to generate a refreashed dataset
    
    ...
    Parameters
    ----------
    tickers_df : pd.DataFrame - contains tickers for given index or sector under 
        the "Symbol" column which is the DataFrame key
    '''
    
    result_df = stock_fundamentals(tickers_df)
    result_df.to_csv("resources/fundamental_data.csv")

In [55]:
# conn = sqlite3.connect('resources/stock_prices.db') 
# c = conn.cursor()
# data = c.execute("SELECT * FROM _AAPL")
# for row in data:
#     print(row)
# conn.commit()
# conn.close()

data = stock_prices_from_db(sp500_tickers_test, "2020-01-01", "2020-02-01")



data.head()

Symbol,MMM,MMM,MMM,MMM,MMM,ABT,ABT,ABT,ABT,ABT,...,ABMD,ABMD,ABMD,ABMD,ABMD,ACN,ACN,ACN,ACN,ACN
Data,open,high,low,close,volume,open,high,low,close,volume,...,open,high,low,close,volume,open,high,low,close,volume
time,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2020-01-02 00:00:00-05:00,177.68,180.01,177.1356,180.0,2699844.0,86.06,86.96,85.8,86.94,3511742.0,...,172.51,173.86,167.14,168.81,652882.0,210.86,211.92,208.81,210.15,1953325.0
2020-01-03 00:00:00-05:00,177.02,178.66,175.63,178.39,1874415.0,85.75,86.67,85.59,85.84,3539144.0,...,166.83,169.3174,165.24,166.75,711463.0,208.5,210.625,207.0,209.76,1283366.0
2020-01-06 00:00:00-05:00,177.15,178.71,176.35,178.62,1611440.0,85.62,86.36,85.205,86.33,4397100.0,...,166.53,179.42,166.02,179.07,1428348.0,208.27,208.79,206.81,208.43,1698710.0
2020-01-07 00:00:00-05:00,178.28,178.51,176.82,177.86,1624671.0,85.77,86.3,85.54,85.87,2656902.0,...,178.83,182.44,177.19,180.36,934233.0,203.2,206.26,203.19,203.92,2585064.0
2020-01-08 00:00:00-05:00,178.0,181.5,177.78,180.63,2353019.0,86.24,86.66,85.9285,86.21,4454212.0,...,178.38,180.73,174.29,178.72,1077586.0,204.0,205.45,203.53,204.37,1917073.0


## Computational Methods

### Technical

In [11]:
# please start with RSI and MACD
# your method must take entire dataframe and add a RSI or MACD column to it
# please keep in mind that the dataframe will contain multiple tickers
# follow specs for method writing outlined below, feel free to expand and improve
# document your method with docstring
# document theory for your method in readme (see the README.md for example)


def rsi(df, days):
    '''Returns a pd.DataFrame with RSI column appended
    
    ...
    
    Parameters
    ----------
    df : pd.DataFrame - dataframe to be processed
    days : int() - numbers of days for RSI calcualtion
    
    Returns
    -------
    result_df : pd.DataFrame - dataframe with RSI column appended, calcualted daily for 
        timeperiod specified by days
    '''
    
    result_df = pd.DataFrame()
    
    # Your code
    
    return result_df

def macd(df, short_ema, long_ema):
    '''Description...
    
    ...
    
    Parameters
    ----------
    df : pd.DataFrame - dataframe to be processed
    short_ema : int() - short-term EMA for MACD calculation
    long_ema : int() - long-term EMA for MACD calculation
    
    Returns
    -------
    result_df : pd.DataFrame - dataframe with MACD column appended, calcualted daily for 
        timeperiod specified by days
    '''
    pass

# Method test

### Fundamental

In [12]:
# Fundamental data filter
def fundamental_data_query(tickers_df, fundamental_indicator_keys, lower_bound = -1000000, upper_bound = 1000000):
    '''Returns a pd.DataFrame of fundamental data filtered by user input range
    
    ...
    
    Parameters
    ----------
    tickers_df : pd.DataFrame - dataframe to be processed, contains tickers
    fundamental_indicator_key : str() - keyword for fundamental indicator requested
    
        Fundamental indicator keys ->
        
        P/E Ratio : [pe_ratio]
        EPS (Earnings per Share) : [eps]
        Annual Dividend : [dividend]
        Beta (vs. S&P 500) : [beta]
        EBIDT : [ebidt]
        Quick Ratio : [quick_ratio]
        3 Year Revenue Growth : [rev_growth]
        Free Cash Flow : [cash_flow]
    
    lower_bound : int() or float() - lower bound for fundamental value filter, default = -1000000
    upper_bound : int() or float() - upper bound for fundamental value filter, default = 1000000
     
     
     
    Returns
    -------
    result_df : pd.DataFrame - dataframe with ticker and filtered fundamental data
    '''

    fund_indicators_dict = {
        'pe_ratio' : 'peNormalizedAnnual',
        'eps' : 'epsNormalizedAnnual',
        'dividend' : 'dividendsPerShareTTM',
        'beta' : 'beta',
        'ebidt' : 'ebitdPerShareTTM',
        'quick_ratio' : 'quickRatioAnnual',
        'rev_growth' : 'revenueGrowth3Y',
        'free_cash_flow' : 'freeCashFlowAnnual'   
    }

    # Declare result_df
    result_df = pd.DataFrame()
    
    # Declate tickers list
    tickers_list = tickers_df['Symbol']
    
    # Declare fundamental data df and filter by ticker df
    data_df = stock_fundamentals_df[stock_fundamentals_df.symbol.isin(tickers_list)]
    data_df = data_df.set_index(['symbol'])
    
    # Extract requested fundamental data
    for ind in fundamental_indicator_keys:
        df = data_df[data_df['metric_type'] == fund_indicators_dict[ind]]
        result_df = pd.concat([result_df, df], axis = 1, join = 'outer')
    
        # Clean up df
        result_df = result_df.drop(columns = ['metric_type', 'series'])
        result_df = result_df.rename(columns = {'symbol' : 'ticker', 
                                'metric' : ind})
        
            # Convert all df values to numeric
        result_df[ind] = result_df[ind].apply(pd.to_numeric)

    
    
    return result_df


# Method test
print(fundamental_data_query(sp500_tickers, ['pe_ratio','eps'], 0, 1))
# help(fundamental_data_query)

        pe_ratio      eps
symbol                   
MMM     22.15725  9.09138
ABT     47.07946  2.49663
ABBV    31.12677  3.65923
ABMD    60.91690  4.93771
ACN     36.93617  7.89470
...          ...      ...
YUM     29.98900  3.98046
ZBRA    48.65101  9.91593
ZBH     66.38869  2.61415
ZION    18.36350  3.17042
ZTS     49.31394  3.52375

[504 rows x 2 columns]


### Traditional

In [13]:
# Please start with calculating the Sharpe Ratio Calculation
# your method must take entire dataframe and return a dataframe with ticker and sharpe ratio for the given time
# please keep in mind that the dataframe will contain multiple tickers
# follow specs for method writing outlined below, feel free to expand and improve
# document your method with docstring
# document theory for your method in readme (see the README.md for example)

# Method test

### Portfolio Methods

In [14]:
# Portfolio weighing
def fundamental_portfolio_builder(tickers_df, start_date, end_date):
     '''Returns a pd.DataFrame time series price data of the equal weight portfolio
    
    ...
    
    Parameters
    ----------
    tickers_df : pd.DataFrame - dataframe to be processed
    start_date : str() - string with date in following format YYYY-MM-DD
    end_date: str() - string with date in following format YYYY-MM-DD 
     
     
     
    Returns
    -------
    result_df : pd.DataFrame - dataframe with ticker and filtered fundamental data
    '''
    

## Visualization

In [None]:
# your code here

## Dashboard

In [None]:
# your code here