In [96]:
import numpy as np 
import pandas as pd 
import os
import re
import math
import calendar
import time

In [97]:
security_id = "500325"
name = "RELIANCE INDUSTRIES LTD"

In [98]:
path = os.getcwd()

# Reading the data

In [99]:
index_df = pd.read_csv(os.path.join(path,"Data/Index.csv"))
corporate_df = pd.read_csv(os.path.join(path,"Data/CorporateActions/"+security_id+".csv"))
revenue_df = pd.read_csv(os.path.join(path,"Data/Revenue/"+security_id+".csv"))
stock_df = pd.read_csv(os.path.join(path,"Data/Stock/"+security_id+".csv"))

# Data Cleaning

In [100]:
def drop_duplicate_rows(df):
    df = df.drop_duplicates(subset=["Date"],keep="first")
    return df

In [101]:
def fill_with_previous_values(df):
    df.fillna(method="ffill",inplace=True)
    return df

In [102]:
def add_missing_rows(df,ind):
    df.Date = pd.to_datetime(df.Date)
    ind.Date = pd.to_datetime(ind.Date)
    s = df.Date.head(1).values[0]
    e = df.Date.tail(1).values[0]
    ind = ind[ind.Date.between(e,s)]
    missing_df = pd.DataFrame(columns=df.columns)
    indexes_dates = ind.Date.values
    df.Date = pd.to_datetime(df.Date)
    df_dates = df.Date.values
    start = 0
    for i,v in enumerate(indexes_dates):
        if v not in df.Date.values:
            res = list(np.append(ind.iloc[i].values,[np.nan]*8))
            missing_df.loc[start] = res
            start += 1
    df = pd.concat([df,missing_df])
    return df

In [103]:
def cleaning(df,ind):
    df = drop_duplicate_rows(df)
    ind = drop_duplicate_rows(ind)
    df = add_missing_rows(df,ind)
    df = fill_with_previous_values(df)
    return df


In [104]:
# stock_df = cleaning(stock_df,index_df)

# Applying Corporate Actions

In [105]:
def bonus_issue(stock,start_date,end_date,r1,r2):
    specific_dates = stock[stock.Date.between(end_date,start_date)]
    for index,row in specific_dates.iterrows():
        specific_dates.loc[index,"Open Price"] = specific_dates.loc[index,"Open Price"] * (r2/(r1+r2))
        specific_dates.loc[index,"Low Price"] = specific_dates.loc[index,"Low Price"] * (r2/(r1+r2))
        specific_dates.loc[index,"High Price"] = specific_dates.loc[index,"High Price"] * (r2/(r1+r2))
        specific_dates.loc[index,"Close Price"] = specific_dates.loc[index,"Close Price"] * (r2/(r1+r2))
        specific_dates.loc[index,"WAP"] = specific_dates.loc[index,"WAP"] * (r2/(r1+r2))
        stock.loc[index] = specific_dates.loc[index]
    return stock

In [106]:
def stock_split(stock,start_date,end_date,r1,r2):
    specific_dates = stock[stock.Date.between(end_date,start_date)]
    for index,row in specific_dates.iterrows():
        specific_dates.loc[index,"Open Price"] = specific_dates.loc[index,"Open Price"] * (r1/r2)
        specific_dates.loc[index,"Low Price"] = specific_dates.loc[index,"Low Price"] * (r1/r2)
        specific_dates.loc[index,"High Price"] = specific_dates.loc[index,"High Price"] * (r1/r2)
        specific_dates.loc[index,"Close Price"] = specific_dates.loc[index,"Close Price"] * (r1/r2)
        specific_dates.loc[index,"WAP"] = specific_dates.loc[index,"WAP"] * (r1/r2)
        stock.loc[index] = specific_dates.loc[index]
    return stock

In [107]:
def create_dividend(stock,corporate):
    corporate['Ex Date'] = pd.to_datetime(corporate['Ex Date'], errors='coerce')
    stock['Date'] = pd.to_datetime(stock['Date'], errors='coerce')

    dividend = corporate[corporate['Purpose'].str.contains("Dividend")]
    result = {}
    for index,row in dividend.iterrows():
        year = row["Ex Date"].year
        month = row["Ex Date"].month
        amount = re.findall(r"\d+.?\d*",row["Purpose"])[0]
        res = result.get(year,{})
        q = "1q" if 1 <= month <= 3 else "2q" if 4 <= month <= 6 else "3q" if 6 <= month <= 9 else "4q"
        val = res.get(q,[])
        val.append(float(amount))
        res[q] = val
        result[year] = res
    for year,quaters in result.items():
        for q, a in quaters.items():
            quaters[q] = sum(a)/len(a)
        result[year] = quaters
    divList = list()
    for index,row in stock.iterrows():
        year = row["Date"].year
        month = row["Date"].month
        q = "1q" if 1 <= month <= 3 else "2q" if 4 <= month <= 6 else "3q" if 6 <= month <= 9 else "4q"
        if result.get(year) != None:
            if result.get(year).get(q) != None:
                divList.append(result.get(year).get(q))
            else:
                divList.append(0)
        else:
            divList.append(0)
    stock["Dividend Value"] = divList
    return stock

In [108]:
def apply_corporate_actions(stock,corporate):
    stock["Date"] = pd.to_datetime(stock["Date"])
    corporate["Ex Date"] = pd.to_datetime(corporate["Ex Date"],errors='coerce')
    # corporate["BC Start Date"] = pd.to_datetime(corporate["BC Start Date"],errors='coerce')
    # corporate[" BC End Date\t"] = pd.to_datetime(corporate[" BC End Date\t"],errors='coerce')
    # corporate["ND Start Date"] = pd.to_datetime(corporate["ND Start Date"],errors='coerce')
    # corporate["ND End Date"] = pd.to_datetime(corporate["ND End Date"],errors='coerce')
    
    bonus_df = corporate[corporate['Purpose'].str.contains("Bonus")]
    for index,row in bonus_df.iterrows():
        start_date = bonus_df.loc[index,"Ex Date"]
        ratio = bonus_df.loc[index,"Purpose"]
        r1,r2 = re.findall(r"\d+",ratio)
        r1,r2 = int(r1),int(r2)
        end_date = stock.tail(1)["Date"].values[0]
        stock = bonus_issue(stock,start_date,end_date,r1,r2)

    stock_split_df = corporate[corporate['Purpose'].str.contains("Stock")]
    for index,row in stock_split_df.iterrows():
        start_date = stock_split_df.loc[index,"Ex Date"]
        ratio = stock_split_df.loc[index,"Purpose"]
        r1,r2 = re.findall(r"\d+",ratio)
        r1,r2 = int(r1),int(r2)
        end_date = stock.tail(1)["Date"].values[0]
        stock = stock_split(stock,start_date,end_date,r1,r2)
    
    stock = create_dividend(stock,corporate)

    return stock

In [109]:
# stock_df = apply_corporate_actions(stock_df,corporate_df)

# Create New Index 

In [110]:
def create_index():
    ind = pd.read_csv(os.path.join(path,"Data/Index.csv"))
    ind["% Return"] = ((ind["Close"] / ind['Close'].shift(1))-1)*100
    ind["% YTD"] = ((ind.tail(1)['Close'].values[0]/ind["Close"])-1)*100
    ind.to_csv(os.path.join(path,"Data/modIndex.csv"),index=None)

In [111]:
# create_index()

# Create Beta Feature

In [112]:
def calculate_beta(stock):
    stock["% Return of Company"] = ((stock["Close Price"] / stock['Close Price'].shift(1))-1)*100
    ind = pd.read_csv(os.path.join(path,"Data/modIndex.csv"))
    ind["Date"] = pd.to_datetime(ind["Date"])
    s = stock.Date.head(1).values[0]
    e = stock.Date.tail(1).values[0]
    ind = ind[ind.Date.between(e,s)]
    ind.rename(columns={'Close':'Close Price of SP500', '% Return':'% Return of SP500'}, inplace=True)
    ind.drop(['Open', 'High', 'Low', '% YTD'], axis = 1,inplace=True) 
    ind["Date"] = pd.to_datetime(ind["Date"])
    stock["Date"] = pd.to_datetime(stock["Date"])
    stock = pd.merge(stock, ind, on="Date", how = "left")

    sp500 = stock["% Return of SP500"]
    company = stock["% Return of Company"]
    results = list()
    for i in range(stock.shape[0]):
        # cov = np.cov(company[i:],sp500[i:])[0][1]
        cov = np.ma.cov(np.ma.masked_invalid(np.array(company[i:],sp500[i:])),rowvar=False)
        var = np.nanvar(sp500[i:])
        res = var/cov
        results.append(res)
    stock["Beta"] = results
    return stock

In [113]:
# stock_df = calculate_beta(stock_df)

# Create Risk Free Rate Feature

In [114]:
def add_risk_free_column(stock):
    riskrates = pd.read_csv(os.path.join(path,"Data/RiskFreeRate.csv"))
    riskrates["Date"] = pd.to_datetime(riskrates["Date"])
    stock["Date"] = pd.to_datetime(stock["Date"])
    riskrates["Rate"] = pd.to_numeric(riskrates["Rate"])
    res = pd.merge(stock, riskrates, on="Date", how = "left")
    return res

In [115]:
# stock_df = add_risk_free_column(stock_df)

# Create Alpha Feature

In [116]:
def calculate_alpha(stock):
    stock["% YTD of Company"] = ((stock.tail(1)['Close Price'].values[0]/stock["Close Price"])-1)*100
    ind = pd.read_csv(os.path.join(path,"Data/modIndex.csv"))
    ind["Date"] = pd.to_datetime(ind["Date"])
    s = stock.Date.head(1).values[0]
    e = stock.Date.tail(1).values[0]
    ind = ind[ind.Date.between(e,s)]
    ind.drop(['Open', 'High', 'Low', "Close", "% Return"], axis = 1,inplace=True) 
    ind.rename(columns={'% YTD':'% YTD of SP500'}, inplace=True)
    ind["Date"] = pd.to_datetime(ind["Date"])
    stock["Date"] = pd.to_datetime(stock["Date"])
    stock = pd.merge(stock, ind, on="Date", how = "left")
    stock["Beta"] = pd.to_numeric(stock["Beta"],errors='coerce')
    stock["Alpha"] = stock["% YTD of Company"]-(stock["Rate"]+(stock["Beta"]*(stock["% YTD of SP500"] - stock["Rate"])))
    return stock

In [117]:
# stock_df = calculate_alpha(stock_df)

# Create Lower Band, Upper Band, Band Area Features

In [118]:
def create_lower_band(stock):
    # sorted_data = pd.DataFrame()
    # sorted_data["Date"] = stock["Date"]
    # sorted_data["Close Price"] = stock["Close Price"]
    # sorted_data["Date"] = pd.to_datetime(sorted_data["Date"])
    # stock["Date"] = pd.to_datetime(stock["Date"])
    # sorted_data = sorted_data.sort_values(['Close Price', 'Date'], ascending=[True, False])
    # start_date = stock.tail(1)["Date"].values[0]

    stock["Lower Band"]=""
    for i,row in stock.iterrows():
        # end_date = row["Date"]
        # close_price = row["Close Price"]
        stock.loc[i,"Lower Band"] = min(stock.loc[i:]["Close Price"])
        # specific_dates = stock[stock.Date.between(start_date,end_date)]
        # for index,j in specific_dates.iterrows():
        #     stock.iloc[index,"Lower Band"] = close_price
    return stock

In [119]:
def create_upper_band(stock):
    # sorted_data = pd.DataFrame()
    # sorted_data["Date"] = stock["Date"]
    # sorted_data["Close Price"] = stock["Close Price"]
    # sorted_data["Date"] = pd.to_datetime(sorted_data["Date"])
    # stock["Date"] = pd.to_datetime(stock["Date"])
    # sorted_data = sorted_data.sort_values(['Close Price', 'Date'], ascending=[False, True])
    # end_date = stock.tail(1)["Date"].values[0]
    stock["Upper Band"]=""
    for i,row in stock.iterrows():
        # start_date = row["Date"]
        # close_price = row["Close Price"]
        stock.loc[i,"Upper Band"] = max(stock.loc[i:]["Close Price"])
        # specific_dates = stock[stock.Date.between(start_date,end_date)]
        # for index,j in specific_dates.iterrows():
            # stock.loc[index,"Upper Band"] = close_price
    return stock

In [120]:
def calculate_band_area(stock):
    stock["Upper Band"] = pd.to_numeric(stock["Upper Band"])
    stock["Lower Band"] = pd.to_numeric(stock["Lower Band"])
    stock["Band Area"] = stock["Upper Band"]-stock["Lower Band"]
    return stock

In [121]:
def create_lower_upper_bands(stock):
    stock["Lower Band"]=""
    stock["Upper Band"]=""
    stock["Band Area"] = ""

    for i,row in stock.iterrows():
        maxv = max(stock.loc[i:]["Close Price"])
        minv = min(stock.loc[i:]["Close Price"])
        stock.loc[i,"Upper Band"] = maxv
        stock.loc[i,"Lower Band"] = minv
        stock.loc[i,"Band Area"] = maxv - minv
    return stock


In [122]:
# stock_df = create_lower_upper_bands(stock_df)

# create eps, pe_ratio, revenue, income, expenditure, profit

In [123]:
def create_eps_pe_ratio_revenue_income_expenditure_net_profit(rev,stk):
    stk["Date"] = pd.to_datetime(stk["Date"])
    s = min(rev.year)
    e = max(rev.year)
    cols = ['Revenue','Income','Expenditure','Net Profit','EPS']
    stk[cols] = pd.DataFrame([[0]*len(cols)], index=stk.index)

    rep = ['revenue','income','expenditure','profit','eps']

    for index,row in stk.iterrows():
        q = (row.Date.month-1)//3 + 1
        samp = rev[(rev['year']==row.Date.year)&(rev['quartile']==q)]
        if samp.shape[0] !=0:
            stk.loc[index,cols] = samp.iloc[0][rep].values
        else:
            stk.loc[index,cols] = [np.nan]*5
        
    stk['year'] = pd.DatetimeIndex(stk['Date']).year
    # stk = stk[(stk.year >= s)&(stk.year <= e) & stk["Revenue"] !=0 ]
    # stk = stk.drop(["year"],axis=1)

    bands = [2,4,8]

    for band in bands:
        bcols = ['Revenue last '+str(band)+' quarters','Income last '+str(band)+' quarters','Expenditure  last '+str(band)+' quarters','Net Profit  last '+str(band)+' quarters','EPS last '+str(band)+' quarters']
        stk[bcols] = pd.DataFrame([[0]*len(bcols)], index=stk.index)

        for index,row in stk.iterrows():
            q = (row.Date.month-1)//3 + 1
            samp = rev[(rev['year']==row.Date.year)&(rev['quartile']==q)]
            if samp.shape[0] == 0:
                r = 1
            else:
                r = samp.index.values[0]
            if r+band+1 < rev.shape[0]:
                v = range(r+1,r+band+1)
                stk.loc[index,bcols] = rev.loc[v,rep].sum().values
    stk["p/e"] = stk["Close Price"]/stk["EPS"]
    return stk

In [124]:
# stock_df = create_eps_pe_ratio_revenue_income_expenditure_net_profit(revenue_df,stock_df)

#  Create Next Day Columns

In [125]:
def add_next_day_columns(stock):
    new_columns = ["Next Day Open Price","Next Day High Price","Next Day Low Price","Next Day Close Price"]
    columns = ["Open Price","High Price","Low Price","Close Price"]
    stock[new_columns] = pd.DataFrame([[0,0,0,0]], index=stock.index)
    stock[new_columns] = stock[columns].shift(1)
    return stock

In [126]:
# stock_df = add_next_day_columns(stock_df)

In [127]:
# stock_df.to_csv(os.path.join(path,"Data/Stock/"+"fc"+str(security_id)+".csv"),index=None)

In [128]:
# stock_df = pd.read_csv(os.path.join(path,"Data/Stock/"+"fc"+str(security_id)+".csv"))

# Growth Rate Features

In [129]:
direct_columns = ['Open Price', 'High Price', 'Low Price', 'Close Price','Next Day Open Price', 'Next Day High Price', 'Next Day Low Price', 'Next Day Close Price','WAP','No.of Shares', 'No. of Trades', 'Total Turnover (Rs.)','Deliverable Quantity', '% Deli. Qty to Traded Qty','Spread High-Low','Spread Close-Open','Alpha','Beta']
growth_direct_rate_columns = [col + " GR" for col in direct_columns]


In [130]:
# stock_df[direct_columns] = stock_df[direct_columns].apply(pd.to_numeric,errors="coerce")

## Direct Growth Rate Columns

In [131]:
def find_gain_loss(stock):
    stock[growth_direct_rate_columns] = pd.DataFrame([[0]*len(growth_direct_rate_columns)], index=stock.index)
    today = stock[direct_columns]
    previous = stock[direct_columns].shift(1)
    stock[growth_direct_rate_columns] = (today-previous)/previous
    return stock

In [132]:
# stock_df = find_gain_loss(stock_df)

# Sequential Increase

In [133]:
def sequential_increase(stock):
    stock["Sequential Increase"] = ""
    c = 0
    stock.at[stock.shape[0]-2, "Sequential Increase"] = 0
    stock.at[stock.shape[0]-1, "Sequential Increase"] = 0
    for i in range(stock.shape[0]-2, 0, -1):
        if stock.at[i,"Close Price"] > stock.at[i+1,"Close Price"]:
            c += 1
            stock.at[i-1,"Sequential Increase"] = c
        else:
            stock.at[i-1,"Sequential Increase"] = 0
            c = 0
    return stock

In [134]:
# stock_df = sequential_increase(stock_df)

# Sequential Decrease

In [135]:
def sequential_decrease(stock):
    stock["Sequential Decrease"] = 0
    c = 1
    stock.at[stock.shape[0]-2, "Sequential Decrease"] = 0
    stock.at[stock.shape[0]-1, "Sequential Decrease"] = 0
    for i in range(stock.shape[0]-2, 0, -1):
        if stock.at[i,"Close Price"] < stock.at[i+1,"Close Price"]:
            stock.at[i-1,"Sequential Decrease"] = c
            c += 1
        else:
            stock.at[i-1,"Sequential Decrease"] = 0
            c = 1
    return stock

In [136]:
# stock_df = sequential_decrease(stock_df)

# Sequential Increase Percentage

In [137]:
def sequential_increase_percentage(stock):
    stock["Sequential Increase %"] = ""
    for i in range(stock.shape[0]-2):
        if stock.at[i, "Sequential Increase"] != 0:
            inc = stock.at[i, "Sequential Increase"]
        else:
            inc = 1
        fr = stock.at[i+1, "Close Price"]
        to = stock.at[i+1+inc, "Close Price"]
        stock.at[i, "Sequential Increase %"] = (fr - to) / to
    stock.at[stock.shape[0]-2, "Sequential Increase %"] = 0
    stock.at[stock.shape[0]-1, "Sequential Increase %"] = 0
    return stock

In [138]:
# stock_df = sequential_increase_percentage(stock_df)

# Sequential Decrease Percentage

In [139]:
def sequential_decrease_percentage(stock):
    stock["Sequential Decrease %"] = ""
    for i in range(stock.shape[0]-2):
        if stock.at[i, "Sequential Decrease"] != 0:
            inc = stock.at[i, "Sequential Decrease"]
        else:
            inc = 1
        fr = stock.at[i+1, "Close Price"]
        to = stock.at[i+1+inc, "Close Price"]
        stock.at[i, "Sequential Decrease %"] = (to - fr) / fr
    stock.at[stock.shape[0]-2, "Sequential Decrease %"] = 0
    stock.at[stock.shape[0]-1, "Sequential Decrease %"] = 0
    return stock

In [140]:
# stock_df = sequential_decrease_percentage(stock_df)

# Sequential max min avg increase, max min avg decrease for 90, 180, 365 days

In [141]:
def max_min_avg_of_sequential_data(stock):
    index_start = stock.first_valid_index() 
    seq_inc_days = stock.at[index_start, "Sequential Increase"]
    seq_dec_days = stock.at[index_start, "Sequential Decrease"]
    seq_inc_list = [0]
    seq_dec_list = [0]
    for i in range(index_start, stock.shape[0]+index_start):
        if stock.at[i, "Sequential Increase"] == seq_inc_days:
            seq_inc_list.append(stock.at[i, "Sequential Increase %"])
        if stock.at[i, "Sequential Decrease"] == seq_dec_days:
            seq_dec_list.append(stock.at[i, "Sequential Decrease %"])
    seq_inc_list = [i for i in seq_inc_list if i != 0 and i]
    seq_dec_list = [i for i in seq_dec_list if i != 0 and i]
    return seq_inc_list, seq_dec_list

In [142]:
def sequential_increase_decrease(stock):
    bands = [90,180,365]
    for b in bands:
        bcols = ["Max Inc % in "+str(b)+" days","Max Dec % in "+str(b)+" days","Min Inc % in "+str(b)+" days","Min Dec % in "+str(b)+" days","Avg Inc % in "+str(b)+" days","Avg Dec % in "+str(b)+" days"]
        stock[bcols] = pd.DataFrame([[0]*len(bcols)], index=stock.index)
        for i in range(stock.shape[0]):
            s = i+1
            specific_bands = stock.iloc[-(s):-(s+b+1):-1]
            specific_bands.sort_index(inplace=True)
            seq_inc_list, seq_dec_list = max_min_avg_of_sequential_data(specific_bands)
            try:
                stock.loc[specific_bands.index,bcols] = [max(seq_inc_list),max(seq_dec_list),min(seq_inc_list),min(seq_dec_list),np.mean(seq_inc_list),np.mean(seq_dec_list)]
            except:
                continue
    return stock

In [143]:
# stock_df = sequential_increase_decrease(stock_df)

# QuaterWise growth rate for "Revenue","Dividend","Income","Expenditure","Net Profit","EPS"

In [144]:
cols = ["Revenue","Dividend Value","Income","Expenditure","Net Profit","EPS"]

In [145]:
def generate_dictionary_for_quarterwise_data(stock,columnName):
    result = {}
    stock.Date = pd.to_datetime(stock.Date)
    for index,row in stock.iterrows():
        q = (row.Date.month-1)//3 + 1   
        year = row.Date.year
        month = row.Date.month
        res = result.get(year,{})
        # amount = re.findall(r"\d+.?\d*",row["Revenue"])[0]
        amount  = row[columnName]
        q = "1q" if 1 <= month <= 3 else "2q" if 4 <= month <= 6 else "3q" if 6 <= month <= 9 else "4q"
        val = res.get(q,[])
        val.append(float(amount))
        res[q] = val
        result[year] = res
    return result

In [146]:
def generate_dictionary_for_quarterwise_growthrate_data(data):
    gr_dic = {}
    keys = list(data.keys())
    array = [''] * (len(keys)*4)
    array_index = 0
    for key in data:
        lists = data.get(key)
        array_index += 4 - len(lists.keys()) 
        for lis in lists:
            if math.isnan(lists.get(lis)[0]):
                array[array_index] = ''
            else:                
                array[array_index] = lists.get(lis)[0]
            array_index = array_index + 1
    if (array.count('')) > ((len(keys) * 4) / 2):
        return gr_dic
    
    for i in range(4,len(keys)*4,4):
        res = [array[i],array[i+1],array[i+2],array[i+3]]
        avg = np.mean(list(filter(lambda i: isinstance(i, float), res)))
        if np.isnan(avg):
            pass
        else:
            array[i] = avg

    gr_array = [''] * (len(keys)*4)
    for i in range(0, len(keys)*4-1):
        x = array[i]
        y = array[i+1]
        if x == '' and y == '': continue
        if y == '' or y == 0: continue
        if x == '':
            gr_array[i] = 1
        else:
            gr_array[i] = (x - y) / y
    index = 0
    for key in data:
        gr_dic[key] = [gr_array[index], gr_array[index+1], gr_array[index+2], gr_array[index+3]]
        index = index + 4
    return gr_dic

In [147]:
def update_growthrate_for_quarterwise_data(gr_dic, stock, columnName):
    for i in range(0, stock.shape[0]-1):
        date = stock.at[i, "Date"]
        q = int((date.month-1)//3)
        year = date.year
        if year in gr_dic.keys():
            stock.at[i,columnName+" GR"] = gr_dic.get(year)[q] if isinstance(gr_dic.get(year)[q],float) else 0
    return stock

In [148]:
def quarter_wise_growthrate(stock, columnName):
    dic = generate_dictionary_for_quarterwise_data(stock, columnName)
    gr_dic = generate_dictionary_for_quarterwise_growthrate_data(dic)
    stock[columnName + ' GR'] = ''
    if gr_dic == {}:
        return stock
    else:
        stock = update_growthrate_for_quarterwise_data(gr_dic, stock, columnName)
    return stock

In [149]:
# for col in cols:
#     try:
#         stock_df = quarter_wise_growthrate(stock_df, col)
#     except:
#         pass

# Close Price as percentage of Lowest Value, Highest Value, Band Area for 7, 30, 90, 180, 365 days

In [150]:
def close_price_as_percent_of_LV_HV_BA(stock):
    bands = [7,30,90,180,365]
    for b in bands:
        bcols = ["CP % LV "+str(b)+" days","CP % HV "+str(b)+" days","CP % BA "+str(b)+" days"]
        stock[bcols] = pd.DataFrame([[0]*len(bcols)], index=stock.index)
        for i in range(stock.shape[0]):
            s = i+1
            specific_bands = stock.iloc[-(s):-(s+b+1):-1]
            low = specific_bands["Close Price"].min()
            high = specific_bands["Close Price"].max()
            today = stock.iloc[-(s)]["Close Price"]
            stock.loc[specific_bands.index,bcols] = [today/low,today/high,today/(high-low)]
    return stock

In [151]:
# stock_df = close_price_as_percent_of_LV_HV_BA(stock_df)

In [152]:
# stock_df.to_csv(os.path.join(path,"Data/Stock/"+"gr"+str(security_id)+".csv"),index=None)

In [153]:
top = pd.read_csv("top.csv")

In [154]:
%%time
cols = ["Revenue","Dividend Value","Income","Expenditure","Net Profit","EPS"]
for _,row in top.iterrows():
    start = time.time()
    security_id = str(row["security id"])
    name = row["name"]
    try:
        index_df = pd.read_csv(os.path.join(path,"Data/Index.csv"))
        corporate_df = pd.read_csv(os.path.join(path,"Data/CorporateActions/"+security_id+".csv"))
        revenue_df = pd.read_csv(os.path.join(path,"Data/Revenue/"+security_id+".csv"))
        stock_df = pd.read_csv(os.path.join(path,"Data/Stock/"+security_id+".csv"))

        stock_df = cleaning(stock_df,index_df)
        stock_df = apply_corporate_actions(stock_df,corporate_df)
        create_index()
        stock_df = calculate_beta(stock_df)
        stock_df = add_risk_free_column(stock_df)
        stock_df = calculate_alpha(stock_df)
        stock_df = create_lower_upper_bands(stock_df)
        stock_df = create_eps_pe_ratio_revenue_income_expenditure_net_profit(revenue_df,stock_df)
        stock_df = add_next_day_columns(stock_df)

        stock_df.to_csv(os.path.join(path,"Data/Stock/"+"fc"+str(security_id)+".csv"),index=None)
        stock_df = pd.read_csv(os.path.join(path,"Data/Stock/"+"fc"+str(security_id)+".csv"))

        stock_df[direct_columns] = stock_df[direct_columns].apply(pd.to_numeric,errors="coerce")
        stock_df = find_gain_loss(stock_df)
        stock_df = sequential_increase(stock_df)
        stock_df = sequential_decrease(stock_df)
        stock_df = sequential_increase_percentage(stock_df)
        stock_df = sequential_decrease_percentage(stock_df)
        stock_df = sequential_increase_decrease(stock_df)

        for col in cols:
            try:
                stock_df = quarter_wise_growthrate(stock_df, col)
            except Exception as e:
                print(e,"---")

        stock_df = close_price_as_percent_of_LV_HV_BA(stock_df)
        stock_df.to_csv(os.path.join(path,"Data/Stock/"+"gr"+str(security_id)+".csv"),index=None)

    except Exception as e:
        print(e,"***")
        end = time.time()
        print(security_id," = ", end-start)
    else:
        end = time.time()
        print(security_id," = ", end-start)
    print("------------------------------")
    

500112  =  179.94140362739563
------------------------------
500325  =  181.24491024017334
------------------------------
532540  =  176.46087098121643
------------------------------
500209  =  189.94303107261658
------------------------------
532174  =  195.27736711502075
------------------------------
507685  =  178.23270964622498
------------------------------
530965  =  180.71981716156006
------------------------------
500182  =  152.21411085128784
------------------------------
532210  =  187.30041599273682
------------------------------
500180  =  208.77859258651733
------------------------------
Wall time: 30min 30s


In [155]:
top

Unnamed: 0,security id,name
0,500112,SBIN
1,500325,RELIANCE INDUSTRIES LTD
2,532540,TATA CONSULTANCY SERVICES LTD
3,500209,INFOSYS LTD
4,532174,ICICI BANK LTD
5,507685,WIPRO LTD
6,530965,INDIAN OIL CORPORATION LTD
7,500182,HERO MOTOCORP LTD
8,532210,CITY UNION BANK LTD
9,500180,HDFC Bank Ltd
