In [2]:
import numpy as np 
import pandas as pd 
import os
import re
import math
import calendar

In [3]:
path = os.path.join(os.getcwd(),"Data")
stockspath = os.path.join(path,"Stock")
corporateactionspath = os.path.join(path,"CorporateActions")
# print(corporateactionspath)
# print(stockspath)
# print(path)

In [2]:
def create_index():
    ind = pd.read_csv(os.path.join(path,"Index.csv"))
    ind["% Return"] = ((ind["Close"] / ind['Close'].shift(1))-1)*100
    ind["% YTD"] = ((ind.tail(1)['Close'].values[0]/ind["Close"])-1)*100
    ind.to_csv(os.path.join(path,"modIndex.csv"),index=None)

In [5]:
def calculate_beta(stock):
    stock["% Return of Company"] = ((stock["Close Price"] / stock['Close Price'].shift(1))-1)*100
    ind = pd.read_csv(os.path.join(path,"modIndex.csv"))
    ind["Date"] = pd.to_datetime(ind["Date"])
    s = stock.Date.head(1).values[0]
    e = stock.Date.tail(1).values[0]
    ind = ind[ind.Date.between(e,s)]
    ind.rename(columns={'Close':'Close Price of SP500', '% Return':'% Return of SP500'}, inplace=True)
    ind.drop(['Open', 'High', 'Low', '% YTD'], axis = 1,inplace=True) 
    ind["Date"] = pd.to_datetime(ind["Date"])
    stock["Date"] = pd.to_datetime(stock["Date"])
    stock = pd.merge(stock, ind, on="Date", how = "left")

    sp500 = stock["% Return of SP500"]
    company = stock["% Return of Company"]
    results = list()
    for i in range(stock.shape[0]):
        # cov = np.cov(company[i:],sp500[i:])[0][1]
        cov = np.ma.cov(np.ma.masked_invalid(np.array(company[i:],sp500[i:])),rowvar=False)
        var = np.nanvar(sp500[i:])
        res = var/cov
        results.append(res)
    stock["Beta"] = results
    return stock

In [39]:
def calculate_alpha(stock):
    stock["% YTD of Company"] = ((stock.tail(1)['Close Price'].values[0]/stock["Close Price"])-1)*100
    ind = pd.read_csv(os.path.join(path,"modIndex.csv"))
    ind["Date"] = pd.to_datetime(ind["Date"])
    s = stock.Date.head(1).values[0]
    e = stock.Date.tail(1).values[0]
    ind = ind[ind.Date.between(e,s)]
    ind.drop(['Open', 'High', 'Low', "Close", "% Return"], axis = 1,inplace=True) 
    ind.rename(columns={'% YTD':'% YTD of SP500'}, inplace=True)
    ind["Date"] = pd.to_datetime(ind["Date"])
    stock["Date"] = pd.to_datetime(stock["Date"])
    stock = pd.merge(stock, ind, on="Date", how = "left")
    stock["Beta"] = pd.to_numeric(stock["Beta"],errors='coerce')
    stock["Alpha"] = stock["% YTD of Company"]-(stock["Rate"]+(stock["Beta"]*(stock["% YTD of SP500"] - stock["Rate"])))
    return stock

In [None]:
def add_risk_free_column(stock):
    riskrates = pd.read_csv(os.path.join(path,"RiskFreeRate.csv"))
    riskrates["Date"] = pd.to_datetime(riskrates["Date"])
    stock["Date"] = pd.to_datetime(stock["Date"])
    riskrates["Rate"] = pd.to_numeric(riskrates["Rate"])
    res = pd.merge(stock, riskrates, on="Date", how = "left")
    return res

In [None]:
def create_lower_upper_bands(stock):
    sorted_data = pd.DataFrame()
    sorted_data["Date"] = stock["Date"]
    sorted_data["Close Price"] = stock["Close Price"]
    sorted_data["Date"] = pd.to_datetime(sorted_data["Date"])
    sorted_data = sorted_data.sort_values(['Close Price', 'Date'], ascending=[True, False])
    stock["Date"] = pd.to_datetime(stock["Date"])
    stock = create_upper_band(stock,sorted_data)
    stock = create_lower_band(stock,sorted_data)
    stock = calculate_band_area(stock)
    return stock


In [None]:
def create_upper_band(stock,sorted_data):
    stock["Upper Band"]=""
    end_date = stock.head(1)["Date"].values[0]
    for _,row in sorted_data.iterrows():
        start_date = row["Date"]
        close_price = row["Close Price"]
        specific_dates = stock[stock.Date.between(start_date,end_date)]
        for index,_ in specific_dates.iterrows():
            stock.loc[index,"Upper Band"] = close_price
    return stock

In [None]:
def create_lower_band(stock,sorted_data):
    stock["Lower Band"]=""
    start_date = stock.tail(1)["Date"].values[0]
    for i,row in sorted_data.iterrows():
        end_date = row["Date"]
        close_price = row["Close Price"]
        specific_dates = stock[stock.Date.between(start_date,end_date)]
        for index,j in specific_dates.iterrows():
            stock.loc[index,"Lower Band"] = close_price
    return stock

In [None]:
def calculate_band_area(stock):
    stock["Upper Band"] = pd.to_numeric(stock["Upper Band"])
    stock["Lower Band"] = pd.to_numeric(stock["Lower Band"])
    stock["Band Area"] = stock["Lower Band"]-stock["Upper Band"]
    return stock

In [None]:
# for name in os.listdir(stockspath):
#     stock = pd.read_csv(os.path.join(stockspath,name))
#     stock = add_risk_free_column(stock)
#     stock = calculate_beta(stock)
#     stock = calculate_alpha(stock)
#     stock = create_lower_upper_bands(stock)
#     stock = calculate_band_area(stock)
#     stock.to_csv(os.path.join(stockspath,"luab"+name),index=None)

In [None]:
# name = "sample.csv"
# stock = pd.read_csv(os.path.join(stockspath,name))
# stock = add_risk_free_column(stock)
# stock = calculate_beta(stock)
# stock = calculate_alpha(stock)
# stock.to_csv(os.path.join(stockspath,"ab"+name),index=None)

In [None]:
# stock = create_lower_upper_bands(stock)
# stock = calculate_band_area(stock)
# stock.to_csv(os.path.join(stockspath,"luab"+name),index=None)

In [None]:
def create_dividend(corporate,stock):
    corporate['Ex Date'] = pd.to_datetime(corporate['Ex Date'], errors='coerce')
    stock['Date'] = pd.to_datetime(stock['Date'], errors='coerce')

    dividend = corporate[corporate['Purpose'].str.contains("Dividend")]
    result = {}
    for index,row in dividend.iterrows():
        year = row["Ex Date"].year
        month = row["Ex Date"].month
        amount = re.findall(r"\d+.?\d*",row["Purpose"])[0]
        res = result.get(year,{})
        q = "1q" if 1 <= month <= 3 else "2q" if 4 <= month <= 6 else "3q" if 6 <= month <= 9 else "4q"
        val = res.get(q,[])
        val.append(float(amount))
        res[q] = val
        result[year] = res
    for year,quaters in result.items():
        for q, a in quaters.items():
            quaters[q] = sum(a)/len(a)
        result[year] = quaters
    divList = list()
    for index,row in stock.iterrows():
        year = row["Date"].year
        month = row["Date"].month
        q = "1q" if 1 <= month <= 3 else "2q" if 4 <= month <= 6 else "3q" if 6 <= month <= 9 else "4q"
        if result.get(year) != None:
            if result.get(year).get(q) != None:
                divList.append(result.get(year).get(q))
            else:
                divList.append(0)
        else:
            divList.append(0)
    stock["Dividend Value"] = divList
    return stock

In [4]:
def add_next_day_columns(stock):
    new_columns = ["Next Day Open Price","Next Day High Price","Next Day Low Price","Next Day Close Price"]
    columns = ["Open Price","High Price","Low Price","Close Price"]
    stock[new_columns] = pd.DataFrame([[0,0,0,0]], index=stock.index)
    stock[new_columns] = stock[columns].shift(-1)
    return stock

In [None]:
# for name in os.listdir(stockspath):
#     if name.startswith("ar") and "sample" not in name:
#         corporate = pd.read_csv(os.path.join(corporateactionspath,name[2:]))
#         stock = pd.read_csv(os.path.join(stockspath,name))
#         stock = create_dividend(corporate,stock)
#         stock.to_csv(os.path.join(stockspath,"dv"+name),index=None)