In [1]:
#
#   Overreaction Data Code
#   Beta Zero Capital
#   Strategy Team Overreaction
#    
#   C. Delahanty, H. Fu, J. Kurlander
# 
#
#   April, 2020
#

In [2]:
#
#   As a preliminary to NLP strategy development,
#   we've created this script to generate an excel / csv
#   file relating news (a stimulus) and equity
#   pricing information pre- and post-stimulus
#
#   The goal is to support both our team and group-wide NLP strategies
#
#   Output will currently take the format of:
#   Ticker, DateOfDrop, PercentSizeOfDropFromRecentHigh, NewsPriorToDrop(1), ...
#   NewsPriorToDrop(2), NewsPriorToDrop(3), Price5minFromLow, Price30minFromLow, ...
#   Price1hFromLow, Price4hFromLow, Price1DFromLow, Price3DFromLow
#
#   Time increments that are during close are treated as the most recent price
#   since market close. 

In [3]:
#   ONGOING COMMENTS HERE
#   
#   Source price data from yfinance at minutely scale for drops / highs
#   Source news data from https://stocknewsapi.com/ (?) - free trial for 1 month
#   
#   A recent high is the most recent max within 24 hours (notice, if a price is monotonically decreasing
#   this could be extremely far back - thus 24hour arbitrary cutoff)
#
#   To avoid output clutter, we limit search for prices with at least a 5% drop.
#   
#   Chould we record both > 5% price drops AND increases? Might be useful to verify model
#
#   Finally, should have market-normalized performance. We should also record general market data for the 
#   fields mentioned above, and potentially duplicate fields after having subtracted out the markets movement
#   in that time period (so we don't consider a stock simply when general market favorability drops)
#

In [33]:
import pandas as pd
import yfinance as yf # reading minutely data
import requests # reading news data from ContextualWeb.io
import datetime as dt

In [49]:
#   PARAMETERS FOR THE MODEL
#
#   These values are pre-defined to make the following code as general
#   as possible. 
#
#

start_date  =       "2016-01-01"
end_date    =       dt.date.today()
market      =       "SPY"
drop        =       0.05
n_headlines =       3

In [None]:
#   IMPORTANT USAGE
#
#   API is available online, but some important usage:
#
#   yf.download(tickers = [array or string separated by " " of tickers], start = <date to start>, end = <date to end>, 
#       inverval = <how often>)

In [69]:
# reading dataframe with tickers of nasdaq / nyse stocks
nasdaq = pd.read_csv("nasdaq.csv")
nyse = pd.read_csv("nyse.csv")
tickers = pd.concat([nasdaq, nyse])

# getting authentication data
auth = pd.read_csv("auth.csv")
host =  auth["auth"][0]
apikey = auth["auth"][1]

In [None]:
# given a ticker, get all drops and the date of the drops
def get_drops_from_ticker(ticker):
    data = yf.download(tickers)

In [78]:
# sample call from yf - getting ticker data with interval
data = yf.download(tickers=list(tickers["Ticker"]), interval="1d", start = start_date, end = end_date)['Open']

[*********************100%***********************]  6653 of 6653 completed

1051 Failed downloads:
- NRF.PRB: No data found, symbol may be delisted
- WGP: No data found, symbol may be delisted
- DV: No data found for this date range, symbol may be delisted
- PSA.PRP: No data found, symbol may be delisted
- GMT: No data found for this date range, symbol may be delisted
- KMP: No data found for this date range, symbol may be delisted
- ABR.PRA: No data found, symbol may be delisted
- JBJ: No data found for this date range, symbol may be delisted
- BALT: No data found for this date range, symbol may be delisted
- GRT.PRH: No data found, symbol may be delisted
- GGE: No data found for this date range, symbol may be delisted
- CCG: No data found for this date range, symbol may be delisted
- MTB.WS: No data found, symbol may be delisted
- SGY: No data found for this date range, symbol may be delisted
- MSK: No data found for this date range, symbol may be delisted
- RHT: No data found, symbo

In [79]:
data.head()

Unnamed: 0_level_0,A,AA,AAC,AACG,AAL,AAME,AAN,AAOI,AAON,AAP,...,ZUMZ,ZVO,ZVZZC,ZVZZT,ZWZZT,ZX,ZXYZ.A,ZXZZT,ZYNE,ZYXI
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2016-01-04,41.060001,22.87656,,,41.290001,4.99,22.110001,16.959999,22.82,149.25,...,14.8,,,,,,,,9.81,0.45
2016-01-05,40.73,23.357161,,,41.18,4.8,22.68,16.299999,22.299999,152.350006,...,14.98,,,,,,,,9.6,0.37
2016-01-06,40.240002,21.627001,,,40.049999,4.7,22.700001,15.7,21.860001,149.679993,...,15.05,,,,,,,,9.01,0.41
2016-01-07,40.139999,20.161169,,,40.970001,4.66,20.99,15.48,21.26,144.800003,...,16.790001,,,,,,,,8.99,0.37
2016-01-08,39.220001,20.47356,,,40.939999,4.62,20.73,15.42,21.25,149.029999,...,16.66,,,,,,,,8.78,0.4


In [77]:
list(tickers["Ticker"])

['AACG',
 'AAL',
 'AAME',
 'AAOI',
 'AAON',
 'AAPL',
 'AAWW',
 'AAXJ',
 'AAXN',
 'ABCB',
 'ABDC',
 'ABEO',
 'ABEOW',
 'ABIL',
 'ABIO',
 'ABMD',
 'ABTX',
 'ABUS',
 'ACAD',
 'ACAM',
 'ACAMU',
 'ACAMW',
 'ACBI',
 'ACER',
 'ACGL',
 'ACGLO',
 'ACGLP',
 'ACHC',
 'ACHN',
 'ACHV',
 'ACIA',
 'ACIU',
 'ACIW',
 'ACLS',
 'ACMR',
 'ACNB',
 'ACOR',
 'ACRS',
 'ACRX',
 'ACST',
 'ACT',
 'ACTG',
 'ACTT',
 'ACTTU',
 'ACTTW',
 'ACWI',
 'ACWX',
 'ADAP',
 'ADBE',
 'ADES',
 'ADI',
 'ADIL',
 'ADILW',
 'ADMA',
 'ADMP',
 'ADMS',
 'ADP',
 'ADPT',
 'ADRA',
 'ADRD',
 'ADRE',
 'ADRO',
 'ADRU',
 'ADSK',
 'ADTN',
 'ADUS',
 'ADVM',
 'ADXS',
 'AEGN',
 'AEHR',
 'AEIS',
 'AEMD',
 'AERI',
 'AESE',
 'AEY',
 'AEYE',
 'AEZS',
 'AFH',
 'AFIN',
 'AFINP',
 'AFMD',
 'AFYA',
 'AGBA',
 'AGBAR',
 'AGBAU',
 'AGBAW',
 'AGEN',
 'AGFS',
 'AGFSW',
 'AGIO',
 'AGLE',
 'AGMH',
 'AGNC',
 'AGNCM',
 'AGNCN',
 'AGNCO',
 'AGND',
 'AGRX',
 'AGTC',
 'AGYS',
 'AGZD',
 'AHCO',
 'AHPI',
 'AIA',
 'AIH',
 'AIHS',
 'AIMC',
 'AIMT',
 'AINV',
 'AIQ',
 'A

In [32]:
msft.options

('2020-04-30',
 '2020-05-07',
 '2020-05-14',
 '2020-05-21',
 '2020-05-28',
 '2020-06-18',
 '2020-07-16',
 '2020-09-17',
 '2020-10-15',
 '2020-12-17',
 '2021-01-14',
 '2021-03-18',
 '2021-06-17',
 '2021-09-16',
 '2022-01-20',
 '2022-03-17',
 '2022-06-16',
 '2022-09-15')

In [4]:
# reading minutely data
import yfinance as yf

# sample call from yf - getting ticker data with interval
data = yf.download(tickers="MSFT", period="5d", interval="1m", start)

print(data.head())

[*********************100%***********************]  1 of 1 completed
                                 Open        High         Low       Close  \
Datetime                                                                    
2020-04-15 09:30:00-04:00  171.162003  171.162003  171.162003  171.162003   
2020-04-15 09:31:00-04:00  171.210007  171.289993  170.639999  170.770004   
2020-04-15 09:32:00-04:00  170.779999  171.130005  170.600006  170.945007   
2020-04-15 09:33:00-04:00  171.300003  171.300003  171.300003  171.300003   
2020-04-15 09:34:00-04:00  171.350006  171.350006  170.975800  170.975800   

                            Adj Close   Volume  
Datetime                                        
2020-04-15 09:30:00-04:00  171.162003  1891117  
2020-04-15 09:31:00-04:00  170.770004   283402  
2020-04-15 09:32:00-04:00  170.945007   194289  
2020-04-15 09:33:00-04:00  171.300003   193398  
2020-04-15 09:34:00-04:00  170.975800   231707  


In [1]:
import requests

url = "https://contextualwebsearch-websearch-v1.p.rapidapi.com/api/spelling/AutoComplete"

querystring = {"text":"do"}

headers = {
    'x-rapidapi-host': "contextualwebsearch-websearch-v1.p.rapidapi.com",
    'x-rapidapi-key': "56856d5fc3mshdf946237fac954ap1b040cjsn5700e4f8a915"
    }

response = requests.request("GET", url, headers=headers, params=querystring)

print(response.text)

["donald","dogs","dont","downloads","doors","documentation","doctors","double","domain","donald trump"]


In [17]:
url = "https://contextualwebsearch-websearch-v1.p.rapidapi.com/api/Search/NewsSearchAPI"

querystring = {"fromPublishedDate":"2019-04-20","toPublishedDate":"2019-04-23", "autoCorrect":"false","pageNumber":"1","pageSize":"10","q":"AAPL","safeSearch":"false"}

headers = {
    'x-rapidapi-host': "contextualwebsearch-websearch-v1.p.rapidapi.com",
    'x-rapidapi-key': "56856d5fc3mshdf946237fac954ap1b040cjsn5700e4f8a915"
    }

response = requests.request("GET", url, headers=headers, params=querystring)

print(response)
print(response.text)

<Response [200]>
{"_type":"news","didUMean":"","totalCount":4,"relatedSearch":["aapl","<b>aapl</b> apple inc","<b>aapl</b> stock","<b>aapl</b> earnings","<b>aapl</b> msft","aaple","aaplog","<b>aapl</b> investor"],"value":[{"title":"<b>AAPL</b> – Talk Traders","url":"http://www.talktraders.com/tag/aapl/","description":"Technology\n Talktraders provides of independent, focused, in-depth research and financial news. We offer comprehensive coverage of the most important latest news including Business & Finance, earnings reports, technology, healthcare and biotech and global news.\ncategories","body":"Technology\n Talktraders provides of independent, focused, in-depth research and financial news. We offer comprehensive coverage of the most important latest news including Business & Finance, earnings reports, technology, healthcare and biotech and global news.\ncategories","keywords":"talktraders,technology,business,aapl","language":"en","isSafe":true,"datePublished":"2019-04-22T11:25:00","p

In [6]:
print(type(response))

<class 'requests.models.Response'>


In [18]:
response.json()['value']

[{'title': '<b>AAPL</b> – Talk Traders',
  'url': 'http://www.talktraders.com/tag/aapl/',
  'description': 'Technology\n Talktraders provides of independent, focused, in-depth research and financial news. We offer comprehensive coverage of the most important latest news including Business & Finance, earnings reports, technology, healthcare and biotech and global news.\ncategories',
  'body': 'Technology\n Talktraders provides of independent, focused, in-depth research and financial news. We offer comprehensive coverage of the most important latest news including Business & Finance, earnings reports, technology, healthcare and biotech and global news.\ncategories',
  'keywords': 'talktraders,technology,business,aapl',
  'language': 'en',
  'isSafe': True,
  'datePublished': '2019-04-22T11:25:00',
  'provider': {'name': 'talktraders'},
  'image': {'url': '',
   'height': 0,
   'width': 0,
   'thumbnail': '',
   'thumbnailHeight': 0,
   'thumbnailWidth': 0,
   'base64Encoding': '',
   'na