In [None]:
#
#   Overreaction Data Code
#   Beta Zero Capital
#   Strategy Team Overreaction
#    
#   C. Delahanty, H. Fu, J. Kurlander
# 
#
#   April, 2020
#

In [None]:
#
#   As a preliminary to NLP strategy development,
#   we've created this script to generate an excel / csv
#   file relating news (a stimulus) and equity
#   pricing information pre- and post-stimulus
#
#   The goal is to support both our team and group-wide NLP strategies
#
#   Output will currently take the format of:
#   Ticker, DateOfDrop, PercentSizeOfDropFromRecentHigh, NewsPriorToDrop(1), ...
#   NewsPriorToDrop(2), NewsPriorToDrop(3), Price5minFromLow, Price30minFromLow, ...
#   Price1hFromLow, Price4hFromLow, Price1DFromLow, Price3DFromLow
#
#   Time increments that are during close are treated as the most recent price
#   since market close. 

In [None]:
#   ONGOING COMMENTS HERE
#   
#   Source price data from yfinance at minutely scale for drops / highs
#   Source news data from https://stocknewsapi.com/ (?) - free trial for 1 month
#   
#   A recent high is the most recent max within 24 hours (notice, if a price is monotonically decreasing
#   this could be extremely far back - thus 24hour arbitrary cutoff)
#
#   To avoid output clutter, we limit search for prices with at least a 5% drop.
#   
#   Chould we record both > 5% price drops AND increases? Might be useful to verify model
#
#   Finally, should have market-normalized performance. We should also record general market data for the 
#   fields mentioned above, and potentially duplicate fields after having subtracted out the markets movement
#   in that time period (so we don't consider a stock simply when general market favorability drops)
#

In [None]:
import pandas as pd
import yfinance as yf # reading minutely data
import requests # reading news data from ContextualWeb.io
import datetime as dt

In [None]:
#   PARAMETERS FOR THE MODEL
#
#   These values are pre-defined to make the following code as general
#   as possible. 
#
#

start_date  =       "2016-01-01"
end_date    =       dt.date.today()
market      =       "SPY"
drop        =       0.05
n_headlines =       3

In [None]:
#   IMPORTANT USAGE
#
#   API is available online, but some important usage:
#
#   yf.download(tickers = [array or string separated by " " of tickers], start = <date to start>, end = <date to end>, 
#       inverval = <how often>)

In [None]:
# reading dataframe with tickers of nasdaq / nyse stocks
nasdaq = pd.read_csv("nasdaq.csv")
nyse = pd.read_csv("nyse.csv")
tickers = pd.concat([nasdaq, nyse])

# getting authentication data
auth = pd.read_csv("auth.csv")
host =  auth["auth"][0]
apikey = auth["auth"][1]

In [None]:
# given a ticker, get all drops and the date of the drops
def get_drops_from_ticker(ticker):
    data = yf.download(tickers)

In [None]:
# sample call from yf - getting ticker data with interval
data = yf.download(tickers=list(tickers["Ticker"]), interval="1d", start = start_date, end = end_date)['Open']

In [None]:
data.head()

In [None]:
list(tickers["Ticker"])

In [None]:
msft.options

In [None]:
# reading minutely data
import yfinance as yf

# sample call from yf - getting ticker data with interval
data = yf.download(tickers="MSFT", period="5d", interval="1m", start)

print(data.head())

In [None]:
url = "https://contextualwebsearch-websearch-v1.p.rapidapi.com/api/Search/NewsSearchAPI"

querystring = {"fromPublishedDate":"2019-04-20","toPublishedDate":"2019-04-23", "autoCorrect":"false","pageNumber":"1","pageSize":"10","q":"AAPL","safeSearch":"false"}

headers = {
    'x-rapidapi-host': host,
    'x-rapidapi-key': apikey
    }

response = requests.request("GET", url, headers=headers, params=querystring)

print(response)
print(response.text)