In [1]:
# Bring in all dependencies
import pandas as pd
import requests
import numpy as np
import scipy as sp
from statistics import mode

In [2]:
########## Read in data file ##########

stock_history_file = pd.read_csv('../resources/stock_history_data.csv',index_col = False)
stock_history_file

# Drop unnecessary columns 
stock_history_file = stock_history_file.drop("Unnamed: 0",axis=1)
stock_history_file = stock_history_file.drop(['Stock Splits'], axis=1)

In [3]:
########## Clean Data ##########

# creating bool series True for NaN values 
is_null = stock_history_file.isnull().values

# replacing nan values in pricing columns with zero
stock_history_file["Open"].fillna(0.00, inplace = True)
stock_history_file["Close"].fillna(0.00, inplace = True)
stock_history_file["High"].fillna(0.00, inplace = True)
stock_history_file["Low"].fillna(0.00, inplace = True)

# replacing nan values in volume with zero
stock_history_file["Volume"].fillna(0, inplace = True)

# replacing nan values in volume with zero
stock_history_file["Sector"].fillna("Undefined", inplace = True)
    
# filtering data to display any remaining NaN values
stock_history_file[is_null]

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Symbol,Name,Sector
29196,2019-03-01,9.654213,9.984586,9.654213,9.859778,28900.0,0.0,HGLB,Highland Global Allocation Fund Common Stock,Undefined
29197,2019-03-04,9.852436,9.940535,9.250425,9.881803,110300.0,0.0,HGLB,Highland Global Allocation Fund Common Stock,Undefined
29198,2019-03-05,9.764335,9.843625,9.184349,9.657882,111700.0,0.0,HGLB,Highland Global Allocation Fund Common Stock,Undefined
29199,2019-03-06,9.485356,9.727629,9.433965,9.433965,72200.0,0.0,HGLB,Highland Global Allocation Fund Common Stock,Undefined
29200,2019-03-07,9.433966,9.543355,9.250426,9.382574,92700.0,0.0,HGLB,Highland Global Allocation Fund Common Stock,Undefined
...,...,...,...,...,...,...,...,...,...,...
485156,2021-05-25,15.510000,15.510000,15.420000,15.490000,3800.0,0.0,EIC,Eagle Point Income Company Inc. Common Stock,Undefined
485157,2021-05-26,15.480000,15.600000,15.360000,15.500000,13200.0,0.0,EIC,Eagle Point Income Company Inc. Common Stock,Undefined
485158,2021-05-27,15.470000,15.500000,15.400000,15.470000,30100.0,0.0,EIC,Eagle Point Income Company Inc. Common Stock,Undefined
485159,2021-05-28,15.470000,15.554000,15.400000,15.450000,17200.0,0.0,EIC,Eagle Point Income Company Inc. Common Stock,Undefined


In [5]:
# Company Name column contains descriptive text
# This will need to be removed at a future point

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Symbol,Name,Sector
29196,2019-03-01,9.654213,9.984586,9.654213,9.859778,28900.0,0.0,HGLB,Highland Global Allocation Fund Common Stock,Undefined
29197,2019-03-04,9.852436,9.940535,9.250425,9.881803,110300.0,0.0,HGLB,Highland Global Allocation Fund Common Stock,Undefined
29198,2019-03-05,9.764335,9.843625,9.184349,9.657882,111700.0,0.0,HGLB,Highland Global Allocation Fund Common Stock,Undefined
29199,2019-03-06,9.485356,9.727629,9.433965,9.433965,72200.0,0.0,HGLB,Highland Global Allocation Fund Common Stock,Undefined
29200,2019-03-07,9.433966,9.543355,9.250426,9.382574,92700.0,0.0,HGLB,Highland Global Allocation Fund Common Stock,Undefined
...,...,...,...,...,...,...,...,...,...,...
485156,2021-05-25,15.510000,15.510000,15.420000,15.490000,3800.0,0.0,EIC,Eagle Point Income Company Inc. Common Stock,Undefined
485157,2021-05-26,15.480000,15.600000,15.360000,15.500000,13200.0,0.0,EIC,Eagle Point Income Company Inc. Common Stock,Undefined
485158,2021-05-27,15.470000,15.500000,15.400000,15.470000,30100.0,0.0,EIC,Eagle Point Income Company Inc. Common Stock,Undefined
485159,2021-05-28,15.470000,15.554000,15.400000,15.450000,17200.0,0.0,EIC,Eagle Point Income Company Inc. Common Stock,Undefined


In [4]:
######### ADD CACLUATED COLUMNS #############

# Calculate daily price change
stock_history_file['Price Change'] = stock_history_file['Open'] - stock_history_file['Close']
# loop over df columns for change column
for i in range(1,len(stock_history_file)) :
    # Calculate volume changes from one day to next
    stock_history_file['Volume Change'] = stock_history_file.loc[(i-1), "Volume"] - stock_history_file.loc[i, "Volume"]
    # Calculate dividend changes from one day to next
    stock_history_file['Dividend Change'] = stock_history_file.loc[(i-1), "Dividends"] - stock_history_file.loc[i, "Dividends"]

######### ADD PRICE VOLITILITY COLUMN #########

# Use describe to get the Price Volititliy Score parameters
price_min = stock_history_file['Price Change'].min()
price_25_qrt = stock_history_file['Price Change'].describe()['25%']
price_50_qrt = stock_history_file['Price Change'].describe()['50%']
price_75_qrt = stock_history_file['Price Change'].describe()['75%']
price_max = stock_history_file['Price Change'].max()

# Use parameters to assign volitility scores

# Empty list to hold scores
price_volitility_list = []

# Assign Volitility score based on .describe values
for i in range(len(stock_history_file)) :
    if stock_history_file['Price Change'][i] <= price_25_qrt:
        price_volitility_list.append(1)
    elif stock_history_file['Price Change'][i] <= price_50_qrt:
        price_volitility_list.append(2)
    elif stock_history_file['Price Change'][i] <= price_75_qrt:
        price_volitility_list.append(3)
    else:
        price_volitility_list.append(4)
        
# Add list to DF
stock_history_file['Price Volitility'] = price_volitility_list

######### ADD VOLUME VOLITILITY COLUMN #########

# Use describe to get the Volume Volititliy Score parameters
volume_min = stock_history_file['Volume Change'].min()
volume_25_qrt = stock_history_file['Volume Change'].describe()['25%']
volume_50_qrt = stock_history_file['Volume Change'].describe()['50%']
volume_75_qrt = stock_history_file['Volume Change'].describe()['75%']
volume_max = stock_history_file['Volume Change'].max()

# Use parameters to assign volitility scores

# Empty list to hold scores
volume_volitility_list = []

# Assign Volitility score based on .describe values
for i in range(len(stock_history_file)) :
    if stock_history_file['Volume Change'][i] <= volume_25_qrt:
        volume_volitility_list.append(1)
    elif stock_history_file['Volume Change'][i] <= volume_50_qrt:
        volume_volitility_list.append(2)
    elif stock_history_file['Volume Change'][i] <= volume_75_qrt:
        volume_volitility_list.append(3)
    else:
        volume_volitility_list.append(4)
        
# Add list to DF
stock_history_file['Volume Volitility'] = volume_volitility_list

######### ADD DIVIDEND VOLITILITY COLUMN #########

# Use describe to get the Dividend Volititliy Score parameters
divd_min = stock_history_file['Dividend Change'].min()
divd_25_qrt = stock_history_file['Dividend Change'].describe()['25%']
divd_50_qrt = stock_history_file['Dividend Change'].describe()['50%']
divd_75_qrt = stock_history_file['Dividend Change'].describe()['75%']
divd_max = stock_history_file['Dividend Change'].max()

# Use parameters to assign volitility scores

# Empty list to hold scores
divd_volitility_list = []

# Assign Volitility score based on .describe values
for i in range(len(stock_history_file)) :
    if stock_history_file['Dividend Change'][i] <= divd_25_qrt:
        divd_volitility_list.append(1)
    elif stock_history_file['Dividend Change'][i] <= divd_50_qrt:
        divd_volitility_list.append(2)
    elif stock_history_file['Dividend Change'][i] <= divd_75_qrt:
        divd_volitility_list.append(3)
    else:
        divd_volitility_list.append(4)
        
# Add list to DF
stock_history_file['Dividend Volitility'] = divd_volitility_list

######### ADD OVERALL VOLITILITY COLUMN #########

# Create the overall stock score
stock_history_file['Stock Volitility Score'] = stock_history_file['Price Volitility'] + stock_history_file['Dividend Volitility'] + stock_history_file['Volume Volitility']

######### REARRANGE COLUMN ORDER #########

#Display list of colheaders to rearrange
#col_name_lst = list(stock_history_file)

#specify desired column order
clean_data_df = stock_history_file[['Symbol', 'Name', 'Sector', 'Date', 'Open', 'Close', 'High', 'Low', 'Price Change',
                                         'Price Volitility', 'Volume', 'Volume Change','Volume Volitility', 'Dividends', 'Dividend Change', 'Dividend Volitility', 'Stock Volitility Score']]

In [7]:
# Diplay results
clean_data_df

Unnamed: 0,Symbol,Name,Sector,Date,Open,Close,High,Low,Price Change,Price Volitility,Volume,Volume Change,Volume Volitility,Dividends,Dividend Change,Dividend Volitility,Stock Volitility Score
0,JEF,Jefferies Financial Group Inc. Common Stock,Financial Services,2016-06-02,15.009533,15.042908,15.201429,14.967818,-0.033374,2,1997053.0,-3404100.0,1,0.0,0.0,1,4
1,JEF,Jefferies Financial Group Inc. Common Stock,Financial Services,2016-06-03,14.967818,14.650775,14.967818,14.508940,0.317043,4,2280607.0,-3404100.0,1,0.0,0.0,1,6
2,JEF,Jefferies Financial Group Inc. Common Stock,Financial Services,2016-06-06,14.650772,14.842667,14.884384,14.567339,-0.191894,2,1643118.0,-3404100.0,1,0.0,0.0,1,4
3,JEF,Jefferies Financial Group Inc. Common Stock,Financial Services,2016-06-07,14.825982,14.967817,15.001190,14.750894,-0.141835,2,1866330.0,-3404100.0,1,0.0,0.0,1,4
4,JEF,Jefferies Financial Group Inc. Common Stock,Financial Services,2016-06-08,14.976159,15.017877,15.143026,14.867696,-0.041717,2,2120514.0,-3404100.0,1,0.0,0.0,1,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
511831,WTI,W&T Offshore Inc. Common Stock,Energy,2021-05-25,3.690000,3.600000,3.700000,3.590000,0.090000,3,1599000.0,-3404100.0,1,0.0,0.0,1,5
511832,WTI,W&T Offshore Inc. Common Stock,Energy,2021-05-26,3.580000,3.650000,3.670000,3.540000,-0.070000,2,1361700.0,-3404100.0,1,0.0,0.0,1,4
511833,WTI,W&T Offshore Inc. Common Stock,Energy,2021-05-27,3.630000,3.680000,3.750000,3.630000,-0.050000,2,1524300.0,-3404100.0,1,0.0,0.0,1,4
511834,WTI,W&T Offshore Inc. Common Stock,Energy,2021-05-28,3.690000,3.740000,3.770000,3.640000,-0.050000,2,1451000.0,-3404100.0,1,0.0,0.0,1,4


In [9]:
######### Save clean data as csv for model use & user table #########
clean_data_df.to_csv('../resources/BERTS_clean_data.csv',index=False)