In [4]:
import numpy as np
import pandas as pd
from pandas_datareader import data as web
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
%matplotlib inline

import datetime as dt
import mplfinance as mpf
import time
import yfinance as yf

import os
from os import listdir
from os.path import isfile, join

import statsmodels.api as sum
import seaborn as sns

In [7]:
#default Values
path ="/workspaces/Quantitative-Finance/Data/"
S_year = 2020
S_month = 1
S_day = 1
S_date_str = f"{S_year}-{S_month}-{S_day}"
S_date_datetime = dt.datetime(S_year,S_month,S_day)

E_year = 2024
E_month = 5
E_day = 31
E_date_str = f"{E_year}-{E_month}-{E_day}"
E_date_datetime = dt.datetime(E_year,E_month,E_day)


In [6]:
# # get stock file names in a list
# files = [x for x in listdir(path) if isfile(join(path,x))]
# tickers = [os.path.splitext(x)[0] for x in files]
# # create dataframe from our list
# stock_df = pd.DataFrame(tickers,columns=["tickers"])

# return dataframe from csv
def get_df_from_csv(ticker):
    try:
        df = pd.read_csv(path+ticker+".csv")
    except FileNotFoundError:
        print("File doesn't exist")
    else:
        return df
    

# Save dataframe to csv
def save_dataframe_to_csv(df,ticker):
    df.to_csv(path+ticker+".csv")

# return on investment over time
def get_roi(df):
    df['Date'] = pd.to_datetime(df['Date'])
    start_val = df[df['Date'] == S_date_str]['Adj Close'][0]
    end_val = df[df['Date'] == E_date_str]['Adj Close'][0]
    print("Initial Price:",start_val)
    print("Final Price:",end_val)
    roi = (end_val - start_val) / start_val
    return roi

# get coefficient of variance
def get_cov(stock_df):
    for stock in stock_df:
        mean,sd = stock_df['Adj Close'].mean(),stock_df['Adj Close'].std()
        cov = sd / mean
        return cov
    

# Merge Multiple stock in on df
def merge_df_by_column_name(col_name,*tickers):
    
    mult_df = pd.DataFrame()
    start = S_date_datetime
    end = E_date_datetime
    
    for x in tickers:
        mult_df[x] = get_df_from_csv(x)[col_name]
    return mult_df

def get_valid_dates(df, sdate, edate):  
    try:
        mask = (df['Date'] > sdate) & (df['Date'] <= edate) 
        sm_df = df.loc[mask]
        sm_df = sm_df.set_index(['Date'])
        sm_date = sm_df.index.min()
        last_date = sm_df.index.max()
        date_leading = '-'.join(('0' if len(x)<2 else '')+x for x in sm_date.split('-'))
        date_ending = '-'.join(('0' if len(x)<2 else '')+x for x in last_date.split('-'))
    except Exception:
        print("Date Corrupted")
    else:
        return date_leading, date_ending
    
def roi_between_dates(df, sdate, edate):
    try: 
        start_val = df.loc[sdate,'Adj Close'] 
        end_val = df.loc[edate,'Adj Close']
        roi = ((end_val - start_val) / start_val)
    except Exception:
        print("Data Corrupted")
    else:
        return roi
    
def merge_df_by_column_name(col_name, sdate, edate, *tickers):
    # Will hold data for all dataframes with the same column name
    mult_df = pd.DataFrame()
    
    for x in tickers:
        df = get_df_from_csv(x)
        df['Date'] = pd.to_datetime(df['Date'])
        # Use a mask to grab data between defined dates
        mask = (df['Date'] >= sdate) & (df['Date'] <= edate)
        mult_df[x] = df.loc[mask][col_name]
        
    return mult_df

23
