In [10]:
import numpy as np
import pandas as pd
from pandas_datareader import data as web
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
%matplotlib inline

import datetime as dt
import mplfinance as mpf
import time
import yfinance as yf

import os
from os import listdir
from os.path import isfile, join

import statsmodels.api as sum
import seaborn as sns
from statsmodels.tsa.ar_model import AutoReg, ar_select_order

In [36]:
#default Values
path = "D:\\Personal\\Quantitative-Finance\\stocks\\"
S_year = 2023
S_month = 1
S_day = 1
S_date_str = f"{S_year}-{S_month}-{S_day}"
S_date_datetime = dt.datetime(S_year,S_month,S_day)

E_year = 2024
E_month = 5
E_day = 31
E_date_str = f"{E_year}-{E_month}-{E_day}"
E_date_datetime = dt.datetime(E_year,E_month,E_day)


In [37]:
# # get stock file names in a list
files = [x for x in listdir(path) if isfile(join(path,x))]
tickers = [os.path.splitext(x)[0] for x in files]
tickers.sort()
# # create dataframe from our list
# stock_df = pd.DataFrame(tickers,columns=["tickers"])

# return dataframe from csv
def get_df_from_csv(ticker):
    try:
        df = pd.read_csv(path+ticker+".csv",index_col='Date',parse_dates=True)
    except FileNotFoundError:
        print("File doesn't exist")
    else:
        return df
    

# Save dataframe to csv
def save_dataframe_to_csv(df,ticker):
    df.to_csv(path+ticker+".csv")

# return on investment over time
def get_roi(df):
    df['Date'] = pd.to_datetime(df['Date'])
    start_val = df[df['Date'] == S_date_str]['Adj Close'][0]
    end_val = df[df['Date'] == E_date_str]['Adj Close'][0]
    print("Initial Price:",start_val)
    print("Final Price:",end_val)
    roi = (end_val - start_val) / start_val
    return roi

# get coefficient of variance
def get_cov(stock_df):
    for stock in stock_df:
        mean,sd = stock_df['Adj Close'].mean(),stock_df['Adj Close'].std()
        cov = sd / mean
        return cov
    

# Merge Multiple stock in on df
def merge_df_by_column_name(col_name,*tickers):
    
    mult_df = pd.DataFrame()
    start = S_date_datetime
    end = E_date_datetime
    
    for x in tickers:
        mult_df[x] = get_df_from_csv(x)[col_name]
    return mult_df

def get_valid_dates(df, sdate, edate):  
    try:
        mask = (df['Date'] > sdate) & (df['Date'] <= edate) 
        sm_df = df.loc[mask]
        sm_df = sm_df.set_index(['Date'])
        sm_date = sm_df.index.min()
        last_date = sm_df.index.max()
        date_leading = '-'.join(('0' if len(x)<2 else '')+x for x in sm_date.split('-'))
        date_ending = '-'.join(('0' if len(x)<2 else '')+x for x in last_date.split('-'))
    except Exception:
        print("Date Corrupted")
    else:
        return date_leading, date_ending
    
def roi_between_dates(df, sdate, edate):
    try: 
        start_val = df.loc[sdate,'Adj Close'] 
        end_val = df.loc[edate,'Adj Close']
        roi = ((end_val - start_val) / start_val)
    except Exception:
        print("Data Corrupted")
    else:
        return roi
    
def merge_df_by_column_name(col_name, sdate, edate, *tickers):
    # Will hold data for all dataframes with the same column name
    mult_df = pd.DataFrame()
    
    for x in tickers:
        df = get_df_from_csv(x)
        df['Date'] = pd.to_datetime(df['Date'])
        # Use a mask to grab data between defined dates
        mask = (df['Date'] >= sdate) & (df['Date'] <= edate)
        mult_df[x] = df.loc[mask][col_name]
        
    return mult_df

In [38]:
def calc_projected_roi(ticker):
    
    # Load and preprocess data
    a_df = get_df_from_csv(ticker)
    a_df = a_df.asfreq('d') # change frequency to day
    a_df = a_df.ffill()
    a_df.drop(a_df.columns[a_df.columns.str.contains('unnamed', case=False)], axis=1, inplace=True)
    a_df = a_df.drop(['daily_return'], axis=1)


    # Determine optimal lags and fit model
    lags = ar_select_order(a_df, maxlag=30)
    model = AutoReg(a_df['Adj Close'], lags.ar_lags)
    model_fit = model.fit()

    # Split data into training and testing sets
    n = int(len(a_df) * 0.8)
    train_df = a_df.iloc[50:n]
    test_df = a_df.iloc[n:]

    # Fit the model on the training data
    train_model = AutoReg(a_df['Adj Close'], 600).fit(cov_type="HC0")
    start = len(train_df)
    end = len(train_df) + len(test_df) - 1

    # Make predictions
    prediction = train_model.predict(start=start, end=end, dynamic=True)

    # Forecast future values
    forecast = train_model.predict(start=end, end=end+160, dynamic=True)

    s_price = forecast.head(1).iloc[0]

    # Get the last price of prediction
    e_price = forecast.iloc[-1]

    # Get return over prediction
    return (e_price - s_price) / s_price

In [43]:
calc_projected_roi('GOOG')

-0.11376061299483518

In [39]:
def get_proj_rois():
    ticker = []
    roi = []

    for x in tickers:
        print("working on : ",x)
        try:
            the_roi = calc_projected_roi(x)
        except Exception as ex:
            print("Stock Data Corrupted")
        else:
            ticker.append(x)
            print("ROI :",the_roi)
            roi.append(the_roi)
    return pd.DataFrame({'Ticker':ticker, 'ROI':roi})

In [40]:
proj_roi_df = get_proj_rois()
proj_roi_df

working on :  A
ROI : -0.11376061299483518
working on :  AA
ROI : -0.11376061299483518
working on :  AAL
ROI : -0.11376061299483518
working on :  AAME
ROI : -0.11376061299483518
working on :  AAN
ROI : -0.11376061299483518
working on :  AAOI
ROI : -0.11376061299483518
working on :  AAON
ROI : -0.11376061299483518
working on :  AAP
ROI : -0.11376061299483518
working on :  AAPL
ROI : -0.11376061299483518
working on :  AAT
ROI : -0.11376061299483518
working on :  AAWW
ROI : -0.11376061299483518
working on :  AAXN
ROI : -0.11376061299483518
working on :  ABBV
ROI : -0.11376061299483518
working on :  ABC
ROI : -0.11376061299483518
working on :  ABCB
ROI : -0.11376061299483518
working on :  ABEO
ROI : -0.11376061299483518
working on :  ABG
ROI : -0.11376061299483518
working on :  ABIO
ROI : -0.11376061299483518
working on :  ABM
ROI : -0.11376061299483518
working on :  ABMD
ROI : -0.11376061299483518
working on :  ABR
ROI : -0.11376061299483518
working on :  ABT
ROI : -0.11376061299483518
wo

Unnamed: 0,Ticker,ROI
0,A,-0.113761
1,AA,-0.113761
2,AAL,-0.113761
3,AAME,-0.113761
4,AAN,-0.113761
...,...,...
3256,ZUMZ,-0.113761
3257,ZUO,-0.113761
3258,ZVO,-0.113761
3259,ZYNE,-0.113761


In [41]:
proj_roi_df.sort_values(by=['ROI'], ascending=False)[:20]

Unnamed: 0,Ticker,ROI
0,A,-0.113761
2178,OTEL,-0.113761
2168,ORLY,-0.113761
2169,ORN,-0.113761
2170,ORRF,-0.113761
2171,OSBC,-0.113761
2172,OSG,-0.113761
2173,OSIS,-0.113761
2174,OSK,-0.113761
2175,OSPN,-0.113761
