In [11]:
import numpy as np
import pandas as pd
import quandl as quandl
import yahoo_fin.stock_info as si
import requests
import snscrape.modules.twitter as sntwitter

In [6]:
# stock trading, balance sheet, fundermental(P/E), economic data like gdp, inflation rate, unemployment rate
# yahoo_fin: historical stock prices (daily / weekly / monthly), realtime-prices, fundamentals data, income statements, cash flows, analyst info, current cryptocurrency prices, option chains, earnings history
# https://algotrading101.com/learn/yahoo-finance-api-guide/
# https://algotrading101.com/learn/quandl-guide/
# For quandl and yahoo finance, it's not recommended to build mission critical systems where you have a lot of money on the line!
# Use an official alternative that is connected directly to exchange data instead, like IEX or Polygon.

In [7]:
quandl.ApiConfig.api_key = "SKq39CBqXUj8chCvXWci"

In [8]:
# data is no longer actively maintained after 2017
mydata = quandl.get("WIKI/FB", collapse="annual", start_date="2017-01-31", end_date="2017-10-31")
mydata.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Ex-Dividend,Split Ratio,Adj. Open,Adj. High,Adj. Low,Adj. Close,Adj. Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2017-12-31,180.57,180.8,178.94,180.06,19766043.0,0.0,1.0,180.57,180.8,178.94,180.06,19766043.0


In [9]:
gdp = quandl.get("FRED/GDP", start_date="2017-01-31", end_date="2017-10-31")
gdp.head()

Unnamed: 0_level_0,Value
Date,Unnamed: 1_level_1
2017-04-01,19322.92
2017-07-01,19558.693
2017-10-01,19882.965


In [12]:
ticker_list = ["voo", "ba"]
historical_data = {}
for ticker in ticker_list:
    historical_data[ticker] = si.get_data(ticker, start_date="2022-01-31", end_date="2022-10-31")

historical_data["voo"].head()

Unnamed: 0,open,high,low,close,adjclose,volume,ticker
2022-01-31,405.670013,413.970001,404.350006,413.690002,408.820312,9200400,VOO
2022-02-01,414.339996,417.040009,410.899994,416.390015,411.488495,8154900,VOO
2022-02-02,418.980011,421.179993,416.540009,420.359985,415.411743,6726500,VOO
2022-02-03,414.589996,416.429993,409.790009,410.589996,405.756775,7658300,VOO
2022-02-04,410.339996,416.23999,408.059998,412.519989,407.664062,8210000,VOO


In [2]:
sp500_list = si.tickers_sp500()
sp500_list = sp500_list[2:6]
sp500_historical = {}
for ticker in sp500_list:
    sp500_historical[ticker] = si.get_data(ticker, start_date="01/01/2020", end_date="01/05/2020", interval="1d")

sp500_historical

{'AAP':                   open        high         low       close    adjclose  \
 2020-01-02  160.470001  160.949997  157.529999  159.410004  151.931366   
 2020-01-03  158.029999  159.630005  157.250000  159.419998  151.940933   
 
             volume ticker  
 2020-01-02  944900    AAP  
 2020-01-03  567000    AAP  ,
 'AAPL':                  open       high        low      close   adjclose     volume  \
 2020-01-02  74.059998  75.150002  73.797501  75.087502  73.561539  135480400   
 2020-01-03  74.287498  75.144997  74.125000  74.357498  72.846375  146322800   
 
            ticker  
 2020-01-02   AAPL  
 2020-01-03   AAPL  ,
 'ABBV':                  open       high        low      close   adjclose   volume  \
 2020-01-02  89.080002  89.570000  88.510002  89.550003  77.932617  5639200   
 2020-01-03  88.169998  89.389999  87.900002  88.699997  77.192871  5988500   
 
            ticker  
 2020-01-02   ABBV  
 2020-01-03   ABBV  ,
 'ABC':                  open       high        lo

In [5]:
eod_api_key = '637eb26fb1dc30.49499477'

In [6]:
def get_ticker_news(ticker, start_date, end_date, n_news, eod_api_key, offset = 0):
    url = f'https://eodhistoricaldata.com/api/news?api_token={eod_api_key}&s={ticker}&limit={n_news}&offset={offset}&from={start_date}&to={end_date}'
    news_json = requests.get(url).json()
    news = []
    for i in range(len(news_json)):
        date = news_json[-i]['date']
        title = news_json[-i]['title']
        news.append([ticker, date, title])
        
    return pd.DataFrame(news, columns=['ticker', 'date', 'title'])

In [8]:
aapl_news = get_ticker_news('AAPL', '2021-11-01', '2022-10-31', 1000, eod_api_key, offset = 0)

In [10]:
aapl_news.to_csv('/Users/chiuchristine/Documents/dev_projects/trading/data/aapl_news.csv')

In [12]:
# from advanced search, copy the query in the search bar
query = "(amazon OR google OR meta) min_replies:50 min_faves:100 min_retweets:200 lang:en until:2022-11-20 since:2020-01-01"
tweets = []
limit = 300

for tweet in sntwitter.TwitterSearchScraper(query).get_items():   
    if len(tweets) == limit:
      break
    else:
      tweets.append([tweet.date, tweet.username, tweet.content])
df = pd.DataFrame(tweets, columns=['Date', 'User', 'Tweet'])

  tweets.append([tweet.date, tweet.username, tweet.content])


In [13]:
print(df.head())

                       Date            User  \
0 2022-11-19 23:34:29+00:00         PGDynes   
1 2022-11-19 23:14:56+00:00      Andie00471   
2 2022-11-19 22:31:16+00:00    OnlyBTStream   
3 2022-11-19 20:47:09+00:00   DavidHundeyin   
4 2022-11-19 20:46:14+00:00  AstroBabiesNFT   

                                               Tweet  
0  “According to the WWF report, 35% of the Amazo...  
1  Twitter tip: Don't ever post a pic on Twitter ...  
2  ⚽️BTS WORLD CUP THE FINAL⚽️\n\n🔥YOUTUBE CHALLE...  
3  If you enjoyed the @WestAfricaWeek documentary...  
4  Traditional onlinegambling 🎰combined with meta...  
