In [1]:
# Bring in all dependencies
import pandas as pd
import requests
import numpy as np
import scipy as sp
from statistics import mode

In [2]:
########## Read in data file ##########

stock_history_file = pd.read_csv('model_data.csv',index_col = False)
stock_history_file

# Drop unnecessary columns 
stock_history_file = stock_history_file.drop("Unnamed: 0",axis=1)
stock_history_file = stock_history_file.drop(['Stock Splits'], axis=1)

In [3]:
########## Clean Data ##########

# creating bool series True for NaN values 
is_null = stock_history_file.isnull().values

# replacing nan values in pricing columns with zero
stock_history_file["Open"].fillna(0.00, inplace = True)
stock_history_file["Close"].fillna(0.00, inplace = True)
stock_history_file["High"].fillna(0.00, inplace = True)
stock_history_file["Low"].fillna(0.00, inplace = True)

# replacing nan values in volume with zero
stock_history_file["Volume"].fillna(0, inplace = True)

# replacing nan values in volume with zero
stock_history_file["Sector"].fillna("Undefined", inplace = True)
    
# filtering data to display any remaining NaN values
stock_history_file[is_null]

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Symbol,Name,Sector
15864,2018-12-05,0.000000,0.000000,0.000000,0.000000,0.0,0.24,FNV,Franco-Nevada Corporation,Basic Materials
15864,2018-12-05,0.000000,0.000000,0.000000,0.000000,0.0,0.24,FNV,Franco-Nevada Corporation,Basic Materials
15864,2018-12-05,0.000000,0.000000,0.000000,0.000000,0.0,0.24,FNV,Franco-Nevada Corporation,Basic Materials
15864,2018-12-05,0.000000,0.000000,0.000000,0.000000,0.0,0.24,FNV,Franco-Nevada Corporation,Basic Materials
15864,2018-12-05,0.000000,0.000000,0.000000,0.000000,0.0,0.24,FNV,Franco-Nevada Corporation,Basic Materials
...,...,...,...,...,...,...,...,...,...,...
89516,2021-05-24,18.240000,18.299999,18.139999,18.139999,65800.0,0.00,FINS,Angel Oak Financial Strategies Income Term Tru...,Undefined
89517,2021-05-25,18.208000,18.299999,18.208000,18.260000,28900.0,0.00,FINS,Angel Oak Financial Strategies Income Term Tru...,Undefined
89518,2021-05-26,18.290001,18.299999,18.250000,18.250000,36500.0,0.00,FINS,Angel Oak Financial Strategies Income Term Tru...,Undefined
89519,2021-05-27,18.299999,18.350000,18.250000,18.309999,47000.0,0.00,FINS,Angel Oak Financial Strategies Income Term Tru...,Undefined


In [4]:
######### ADD CACLUATED COLUMNS #############

# Calculate daily price change
stock_history_file['Price Change'] = stock_history_file['Open'] - stock_history_file['Close']
# loop over df columns for change column
for i in range(1,len(stock_history_file)) :
    # Calculate volume changes from one day to next
    stock_history_file['Volume Change'] = stock_history_file.loc[(i-1), "Volume"] - stock_history_file.loc[i, "Volume"]
    # Calculate dividend changes from one day to next
    stock_history_file['Dividend Change'] = stock_history_file.loc[(i-1), "Dividends"] - stock_history_file.loc[i, "Dividends"]

######### ADD PRICE VOLITILITY COLUMN #########

# Use describe to get the Price Volititliy Score parameters
price_min = stock_history_file['Price Change'].min()
price_25_qrt = stock_history_file['Price Change'].describe()['25%']
price_50_qrt = stock_history_file['Price Change'].describe()['50%']
price_75_qrt = stock_history_file['Price Change'].describe()['75%']
price_max = stock_history_file['Price Change'].max()

# Use parameters to assign volitility scores

# Empty list to hold scores
price_volitility_list = []

# Assign Volitility score based on .describe values
for i in range(len(stock_history_file)) :
    if stock_history_file['Price Change'][i] <= price_25_qrt:
        price_volitility_list.append(1)
    elif stock_history_file['Price Change'][i] <= price_50_qrt:
        price_volitility_list.append(2)
    elif stock_history_file['Price Change'][i] <= price_75_qrt:
        price_volitility_list.append(3)
    else:
        price_volitility_list.append(4)
        
# Add list to DF
stock_history_file['Price Volitility'] = price_volitility_list

######### ADD VOLUME VOLITILITY COLUMN #########

# Use describe to get the Volume Volititliy Score parameters
volume_min = stock_history_file['Volume Change'].min()
volume_25_qrt = stock_history_file['Volume Change'].describe()['25%']
volume_50_qrt = stock_history_file['Volume Change'].describe()['50%']
volume_75_qrt = stock_history_file['Volume Change'].describe()['75%']
volume_max = stock_history_file['Volume Change'].max()

# Use parameters to assign volitility scores

# Empty list to hold scores
volume_volitility_list = []

# Assign Volitility score based on .describe values
for i in range(len(stock_history_file)) :
    if stock_history_file['Volume Change'][i] <= volume_25_qrt:
        volume_volitility_list.append(1)
    elif stock_history_file['Volume Change'][i] <= volume_50_qrt:
        volume_volitility_list.append(2)
    elif stock_history_file['Volume Change'][i] <= volume_75_qrt:
        volume_volitility_list.append(3)
    else:
        volume_volitility_list.append(4)
        
# Add list to DF
stock_history_file['Volume Volitility'] = volume_volitility_list

######### ADD DIVIDEND VOLITILITY COLUMN #########

# Use describe to get the Dividend Volititliy Score parameters
divd_min = stock_history_file['Dividend Change'].min()
divd_25_qrt = stock_history_file['Dividend Change'].describe()['25%']
divd_50_qrt = stock_history_file['Dividend Change'].describe()['50%']
divd_75_qrt = stock_history_file['Dividend Change'].describe()['75%']
divd_max = stock_history_file['Dividend Change'].max()

# Use parameters to assign volitility scores

# Empty list to hold scores
divd_volitility_list = []

# Assign Volitility score based on .describe values
for i in range(len(stock_history_file)) :
    if stock_history_file['Dividend Change'][i] <= divd_25_qrt:
        divd_volitility_list.append(1)
    elif stock_history_file['Dividend Change'][i] <= divd_50_qrt:
        divd_volitility_list.append(2)
    elif stock_history_file['Dividend Change'][i] <= divd_75_qrt:
        divd_volitility_list.append(3)
    else:
        divd_volitility_list.append(4)
        
# Add list to DF
stock_history_file['Dividend Volitility'] = divd_volitility_list

######### ADD OVERALL VOLITILITY COLUMN #########

# Create the overall stock score
stock_history_file['Stock Volitility Score'] = stock_history_file['Price Volitility'] + stock_history_file['Dividend Volitility'] + stock_history_file['Volume Volitility']

######### REARRANGE COLUMN ORDER #########

#Display list of colheaders to rearrange
#col_name_lst = list(stock_history_file)

#specify desired column order
clean_data_df = stock_history_file[['Symbol', 'Name', 'Sector', 'Date', 'Open', 'Close', 'High', 'Low', 'Price Change',
                                         'Price Volitility', 'Volume', 'Volume Change','Volume Volitility', 'Dividends', 'Dividend Change', 'Dividend Volitility', 'Stock Volitility Score']]
# Diplay results
clean_data_df

Unnamed: 0,Symbol,Name,Sector,Date,Open,Close,High,Low,Price Change,Price Volitility,Volume,Volume Change,Volume Volitility,Dividends,Dividend Change,Dividend Volitility,Stock Volitility Score
0,AUS,Austerlitz Acquisition Corporation I Class A O...,Financial Services,2021-04-19,9.900000,9.900000,9.900000,9.900000,0.000000,2,100.0,-20000.0,1,0.0,0.0,1,4
1,AUS,Austerlitz Acquisition Corporation I Class A O...,Financial Services,2021-04-20,9.945000,10.040000,10.047000,9.945000,-0.095000,2,1800.0,-20000.0,1,0.0,0.0,1,4
2,AUS,Austerlitz Acquisition Corporation I Class A O...,Financial Services,2021-04-21,9.910000,9.932000,10.100000,9.900000,-0.022000,2,13200.0,-20000.0,1,0.0,0.0,1,4
3,AUS,Austerlitz Acquisition Corporation I Class A O...,Financial Services,2021-04-22,9.930000,9.930000,9.930000,9.930000,0.000000,2,2100.0,-20000.0,1,0.0,0.0,1,4
4,AUS,Austerlitz Acquisition Corporation I Class A O...,Financial Services,2021-04-23,9.930000,9.930000,9.980000,9.900000,0.000000,2,43700.0,-20000.0,1,0.0,0.0,1,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
90856,PCPC,Periphas Capital Partnering Corporation Class ...,Financial Services,2021-05-21,24.350000,24.350000,24.350000,24.350000,0.000000,2,0.0,-20000.0,1,0.0,0.0,1,4
90857,PCPC,Periphas Capital Partnering Corporation Class ...,Financial Services,2021-05-24,24.350000,24.350000,24.350000,24.350000,0.000000,2,0.0,-20000.0,1,0.0,0.0,1,4
90858,PCPC,Periphas Capital Partnering Corporation Class ...,Financial Services,2021-05-25,24.400000,24.389999,24.400000,24.389999,0.010000,3,1600.0,-20000.0,1,0.0,0.0,1,5
90859,PCPC,Periphas Capital Partnering Corporation Class ...,Financial Services,2021-05-26,24.750000,24.389999,24.750000,24.389999,0.360001,4,10100.0,-20000.0,1,0.0,0.0,1,6


In [5]:
######### Save clean data as csv for model use & user table #########
clean_data_df.to_csv('static/resources/clean_data.csv',index=False)