##### Imports

In [2]:
import numpy as np
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import datetime as dt
import mplfinance as mpf

##### Function that saves stock data to csv

In [5]:
def save_to_csv_from_yahoo(ticker, syear, smonth, sday, eyear, emonth, eday):
    start = dt.datetime(syear, smonth, sday)
    end = dt.datetime(eyear, emonth, eday)

    df = yf.download(ticker, start=start, end=end)

    df.to_csv("/home/johnadi/Desktop/projects/analytics-projects/python-for-finance-derek-banas/" + ticker + ".csv")
    return df

##### Function that returns a DataFrame from a CSV

In [7]:
def get_dataframe_from_csv(ticker):
    try:
        df = pd.read_csv("/home/johnadi/Desktop/projects/analytics-projects/python-for-finance-derek-banas/" + ticker + ".csv")
    except FileNotFoundError:
        print("File does not exist!")
    else:
        return df

##### Add Daily Return to DataFrame

In [10]:
# We calculate a percentage rate of return for each day to compare investments. 
# Simple rate of Return  = (End Price - Beginning Price) / Beginning Price OR (EP/BP) - 1
def add_daily_return_to_df(df, ticker):
    df['daily_return'] = (df['Adj Close'] / df['Adj Close'].shift(1)) - 1
    df.to_csv("/home/johnadi/Desktop/projects/analytics-projects/python-for-finance-derek-banas/" + ticker + ".csv")
    return df

##### Returns Total Return Over Time

In [16]:
def get_return_defined_time(df, syear, smonth, sday, eyear, emonth, eday):
    start = f"{syear}-{smonth}-{sday}"
    end = f"{eyear}-{emonth}-{eday}"
    mask = (df['Date'] >= start) & (df['Date'] <= end)
    daily_returns = df.loc[mask]['daily_return'].mean()
    df2 = df.loc[mask]
    days = df2.shape[0]
    return (days * daily_returns)

##### Matplotlib Finance

In [19]:
def mplfinance_plot(ticker, chart_type, syear, smonth, sday, eyear, emonth, eday):
    start = f"{syear}-{smonth}-{sday}"
    end = f"{eyear}-{emonth}-{eday}"
    try:
        df = pd.read_csv("/home/johnadi/Desktop/projects/analytics-projects/python-for-finance-derek-banas/" + ticker + ".csv")
    except FileNotFoundError:
        print("File does not exist!")
    else:
        df.index = pd.DatetimeIndex(df['Date'])
        df_sub = df.loc[start:end]
        mpf.plot(df_sub, type='candle')
        mpf.plot(df_sub, type="line")
        mpf.plot(df_sub, type="ohlc", mav=4)

        s = mpf.make_mpf_style(base_mpf_style="charles", rc={'font.size':8})
        fig = mpf.figure(figsize=(12,8), style=s)
        ax = fig.add_subplot(2, 1, 2)
        av = fig.add_subplot(2, 1, 2, sharex=ax)
        mpf.plot(df_sub, type=chart_type, mav=(3,5,7), ax=ax, volume=av, show_nontrading=True)

##### Simple Price Plot

In [28]:
def price_plot(ticker, syear, smonth, sday, eyear, emonth, eday):
    start = f"{syear}-{smonth}-{sday}"
    end = f"{eyear}-{emonth}-{eday}"
    try:
        df = pd.read_csv("/home/johnadi/Desktop/projects/analytics-projects/python-for-finance-derek-banas/" + ticker + ".csv")
    except FileNotFoundError:
        print("File does not exist!")
    else:
        df.index = pd.DatetimeIndex(df['Date'])
        df_sub = df.loc[start:end]
        df_np = df_sub.to_numpy()
        np_adj_close = df_np[:,5]
        date_arr = df_np[:, 1]
        fig = plt.figure(figsize=(12,6), dpi=100)
        axes = fig.add_axes([0,0,1,1])
        axes.plot(date_arr, np_adj_close, color='navy')
        axes.xaxis.set_major_locator(plt.MaxNLocator(8))
        axes.grid(True, color='0.6', dashes=(5, 2, 1, 2))
        axes.set_facecolor('#FAEBD7')

##### Download Multiple Stocks

In [29]:
def download_multiple_stocks(syear, smonth, sday, eyear, emonth, eday, *args):
    for x in args:
        save_to_csv_from_yahoo(x, syear, smonth, sday, eyear, emonth, eday)

##### Merge Multiple Stocks in One DataFrame by Column Name

In [32]:
def merge_df_by_column_name(col_name, syear, smonth, sday, eyear, emonth, eday, *tickers):
    multi_df = pd.DataFrame()
    start = f"{syear}-{smonth}-{sday}"
    end = f"{eyear}-{emonth}-{eday}"
    for x in tickers:
        multi_df[x] = yf.download(x, start=start, end=end)[col_name]
    return multi_df

##### Get Changing Value of Investment using Multiple Stocks

In [33]:
def plot_return_multiple_stocks(investment, stock_df):
    (stock_df / stock_df.iloc[0] * investment).plot(figsize=(15,6))

##### Get Standar Deviation for Multiple Stocks

Risk is a measure of the variability of return. Variance and Standard Deviation allow us to quantify risk. 

Variance is a measure of how spread out a dataset is. It is calculated as the average squared deviation of each number from the mean of the data set. It equals the sum of squares of the difference between each data point and the mean divided by the number of data points minus 1.

Standard deviation is a measure of the amount of variation within a set of values. A low standard deviation indicates that the values tend to be close to the mean. A high standard deviation means values are more spread out. Standard deviation is the square root of the variance. 

In [36]:
def get_stock_mean_and_std(stock_df, ticker):
    return stock_df[ticker].mean(), stock_df[ticker].std()

In [37]:
def get_mult_stock_mean_and_std(stock_df):
    for stock in stock_df:
        mean, sd = get_stock_mean_and_std(stock_df, stock)
        cov = sd / mean
        print("Stock: {:4} Mean: {:7.2f} Standard Deviation: {:2.2f}".format(stock, mean, sd))
        print("Coefficient of Variation: {}\n".format(cov))

##### Test Functions

In [25]:
# save_to_csv_from_yahoo("AMZN", 2020, 1 ,1, 2021, 1, 1)

In [24]:
# AMZN = get_dataframe_from_csv('AMZN')
# AMZN

In [23]:
# add_daily_return_to_df(AMZN, 'AMZN')

In [26]:
# total_ret = get_return_defined_time(AMZN, 2020, 1 ,1, 2021, 1, 1)
# print("Total Return: ", total_ret)

In [22]:
# mplfinance_plot('AMZN', 'ohlc', 2020, 1 ,1, 2021, 1, 1)

In [30]:
# price_plot('AMZN', 2020, 1 ,1, 2021, 1, 1)

In [34]:
# tickers = ['META', "AAPL", "NFLX", "GOOG"]

# download_multiple_stocks(2020, 1 ,1, 2021, 1, 1, *tickers)

In [40]:
tickers = ['META', "AAPL", "NFLX", "GOOG", "AMZN"]

multi_df = merge_df_by_column_name('Adj Close', 2020, 1 ,1, 2021, 1, 1, *tickers)
multi_df

# plot_return_multiple_stocks(100, multi_df)

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,META,AAPL,NFLX,GOOG,AMZN
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-01-02,209.557648,73.059418,329.809998,68.368500,94.900497
2020-01-03,208.448837,72.349129,325.899994,68.032997,93.748497
2020-01-06,212.374680,72.925636,335.829987,69.710503,95.143997
2020-01-07,212.834183,72.582664,330.750000,69.667000,95.343002
2020-01-08,214.991898,73.750229,339.260010,70.216003,94.598503
...,...,...,...,...,...
2020-12-24,267.116577,129.514481,513.969971,86.942497,158.634506
2020-12-28,276.706421,134.146652,519.119995,88.804497,164.197998
2020-12-29,276.486633,132.360504,530.869995,87.935997,166.100006
2020-12-30,271.581848,131.231918,524.590027,86.975998,164.292496


In [39]:
get_mult_stock_mean_and_std(multi_df)

Stock: META Mean:  234.30 Standard Deviation: 38.52
Coefficient of Variation: 0.16442381513470283

Stock: AAPL Mean:   93.28 Standard Deviation: 21.55
Coefficient of Variation: 0.23099016066777844

Stock: NFLX Mean:  446.83 Standard Deviation: 65.61
Coefficient of Variation: 0.14684116488739776

Stock: GOOG Mean:   74.07 Standard Deviation: 8.80
Coefficient of Variation: 0.11878452781797685

Stock: AMZN Mean:  134.04 Standard Deviation: 27.29
Coefficient of Variation: 0.2035772250662167

