In [2]:
#conda activate pyvizenv

In [3]:
#pip install yfinance

In [18]:
# Import Libraries
import os
import requests
import pandas as pd
from dotenv import load_dotenv
import json
import panel as pn
import hvplot.pandas
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path
pn.extension('plotly')
import plotly.express as px
import seaborn as sns
from datetime import datetime, timedelta
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')

from nltk.sentiment.vader import SentimentIntensityAnalyzer
from newsapi import NewsApiClient
sid = SentimentIntensityAnalyzer()




In [2]:
# Laod .env environment
load_dotenv()


True

In [3]:
# Pull in API Key
api_key = os.getenv("glassnode_api")
type(api_key)


str

In [4]:
# Define crypto currencies to pull
crypto_list = ["BTC"]


In [5]:
# Define Price URLs
price_url = 'https://api.glassnode.com/v1/metrics/market/price_usd'



## BTC API Data Pull

In [8]:
# Price API Request
btc_price_res = requests.get(price_url,
                      params={'a': 'BTC',
                              'i': '24h',
                              'api_key': api_key})


# Convert price to Pandas Dataframe, set index to time and clean up file
btc_price_df = pd.read_json(btc_price_res.text, convert_dates=['t'])
btc_price_df.columns = ['Date', 'BTC Price']
btc_price_df.set_index('Date', inplace=True) 




## BTC Data Table Debugging Cells

In [9]:
btc_price_df

Unnamed: 0_level_0,BTC Price
Date,Unnamed: 1_level_1
2010-07-17,0.049510
2010-07-18,0.085840
2010-07-19,0.080800
2010-07-20,0.074733
2010-07-21,0.079210
2010-07-22,0.055945
2010-07-23,0.062283
2010-07-24,0.054540
2010-07-25,0.050500
2010-07-26,0.056000


## BTC Daily Changes

In [10]:
# Daily change in price column
btc_daily_price_pct_change = btc_price_df.pct_change(1)
btc_daily_price_pct_change.columns = ['BTC Daily Price Change']
btc_daily_price_pct_change = btc_daily_price_pct_change.dropna()
# btc_daily_price_pct_change
btc_daily_price_pct_change.tail()

Unnamed: 0_level_0,BTC Daily Price Change
Date,Unnamed: 1_level_1
2021-04-28,-0.003421
2021-04-29,-0.022534
2021-04-30,0.078729
2021-05-01,0.000627
2021-05-02,-0.005213


## BTC Data Aggregating & Cleaning

In [11]:
# Define all the different data frames into a list
btc_frames = [btc_price_df, btc_daily_price_pct_change]

# Concatenate all the dataframes into one
btc_data = pd.concat(btc_frames, axis=1, join="outer", ignore_index=False)
btc_data.tail()


Unnamed: 0_level_0,BTC Price,BTC Daily Price Change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-04-28,54793.39453,-0.003421
2021-04-29,53558.662069,-0.022534
2021-04-30,57775.278152,0.078729
2021-05-01,57811.517528,0.000627
2021-05-02,57510.151675,-0.005213


# News API Keys 

In [12]:
# Read your api key environment variable
load_dotenv()

# Set News API Key
newsapi = NewsApiClient(api_key=os.environ["NEWS_API_KEY"])

In [13]:
 # Fetch all the news about Facebook Libra
bitcoin_headlines = newsapi.get_everything(
    q="bitcoin", language="en", sort_by="publishedAt")
print(f"Total articles about Bitcoin: {bitcoin_headlines['totalResults']}")


Total articles about Bitcoin: 9269


In [14]:
 # Transform the response dictionary to a DataFrame
bitcoin_df = pd.DataFrame.from_dict(bitcoin_headlines["articles"])

bitcoin_df.head()

Unnamed: 0,author,content,description,publishedAt,source,title,url,urlToImage
0,Doug Noland,Photo by 200mm/iStock via Getty Images\r\n\nI'...,"Despite Chair Powell's repeated explanations, ...",2021-05-02T02:53:09Z,"{'id': None, 'name': 'Seeking Alpha'}",Weekly Commentary: Fed Guessing,https://seekingalpha.com/article/4423432-weekl...,https://static.seekingalpha.com/cdn/s3/uploads...
1,Joshua Nelson,As you may have read from sources earlier this...,As you may have read from sources earlier this...,2021-05-02T02:12:17Z,"{'id': None, 'name': 'Bleeding Cool News'}","OPINION: Magic: The Gathering NFTs Are Bad, Bu...",https://bleedingcool.com/games/opinion-magic-t...,https://bleedingcool.com/wp-content/uploads/20...
2,,A cryptocurrency exchange with about 2 million...,A cryptocurrency exchange with about 2 million...,2021-05-02T01:06:00Z,"{'id': None, 'name': 'Bitcoin.com'}",Cryptocurrency Exchange Hotbit Hacked: Systems...,https://news.bitcoin.com/cryptocurrency-exchan...,https://news.bitcoin.com/wp-content/uploads/20...
3,San Lee,"Since the start of 2021, the crypto markets ha...","Since the start of 2021, the crypto markets ha...",2021-05-01T23:50:42Z,"{'id': None, 'name': 'Bitcoinist'}",Federal Reserve’s Stance on Monetary Policy Re...,https://bitcoinist.com/federal-reserves-stance...,https://bitcoinist.com/wp-content/uploads/2021...
4,"Billy Bambrough, Contributor, \n Billy Bambrou...","Ethereum, the second-largest cryptocurrency af...",Even as ethereum investors and developers cele...,2021-05-01T23:10:00Z,"{'id': None, 'name': 'Forbes'}",Ethereum Is Suddenly Rocketing But Dogecoin Is...,https://www.forbes.com/sites/billybambrough/20...,https://thumbor.forbes.com/thumbor/fit-in/1200...


In [21]:
# Set current date and the date from one month ago using the ISO format
current_date = pd.Timestamp(datetime.now(), tz="America/New_York").isoformat()
past_date = pd.Timestamp(datetime.now()- timedelta(30), tz="America/New_York").isoformat()

In [22]:
# Use newsapi client to get most relevant 20 headlines per day in the past month
def get_headlines(keyword):
    all_headlines = []
    all_dates = []    
    date = datetime.strptime(current_date[:10], "%Y-%m-%d")
    end_date = datetime.strptime(past_date[:10], "%Y-%m-%d")
    print(f"Fetching news about '{keyword}'")
    print("*" * 30)
    while date > end_date:
        print(f"retrieving news from: {date}")
        articles = newsapi.get_everything(
            q=keyword,
            from_param=str(date)[:10],
            to=str(date)[:10],
            language="en",
            sort_by="relevancy",
            page=1,
        )
        headlines = []
        for i in range(0, len(articles["articles"])):
            headlines.append(articles["articles"][i]["title"])
        all_headlines.append(headlines)
        all_dates.append(date)
        date = date - timedelta(days=1)
    return all_headlines, all_dates

In [23]:
# For headlines:
btc_headlines, dates = get_headlines("bitcoin")

Fetching news about 'bitcoin'
******************************
retrieving news from: 2021-05-01 00:00:00
retrieving news from: 2021-04-30 00:00:00
retrieving news from: 2021-04-29 00:00:00
retrieving news from: 2021-04-28 00:00:00
retrieving news from: 2021-04-27 00:00:00
retrieving news from: 2021-04-26 00:00:00
retrieving news from: 2021-04-25 00:00:00
retrieving news from: 2021-04-24 00:00:00
retrieving news from: 2021-04-23 00:00:00
retrieving news from: 2021-04-22 00:00:00
retrieving news from: 2021-04-21 00:00:00
retrieving news from: 2021-04-20 00:00:00
retrieving news from: 2021-04-19 00:00:00
retrieving news from: 2021-04-18 00:00:00
retrieving news from: 2021-04-17 00:00:00
retrieving news from: 2021-04-16 00:00:00
retrieving news from: 2021-04-15 00:00:00
retrieving news from: 2021-04-14 00:00:00
retrieving news from: 2021-04-13 00:00:00
retrieving news from: 2021-04-12 00:00:00
retrieving news from: 2021-04-11 00:00:00
retrieving news from: 2021-04-10 00:00:00
retrieving news

In [24]:
# Consider using previously defined function(s):
def create_df(news, language):
    articles = []
    for article in news:
        try:
            text = article["content"]
            articles.append({
                "text": text,
            })
        except AttributeError:
            pass
    return pd.DataFrame(articles)



def headline_sentiment_summarizer_avg(headlines):
    sentiment = []
    for day in headlines:
        day_score = []
        for h in day:
            if h == None:
                continue
            else:
                day_score.append(sid.polarity_scores(h)["compound"])
        sentiment.append(sum(day_score) / len(day_score))
    return sentiment
# The function above should retain dates already

In [27]:
btc_avg_sent = pd.DataFrame(
    {
        "btc_avg": headline_sentiment_summarizer_avg(btc_headlines),
    }
)

In [29]:
# Set the index value of the sentiment averages DataFrame to be the series of dates.
btc_avg_sent.index = pd.to_datetime(dates)

In [31]:
# Merge with BTC returns
btc_sentiments = btc_daily_price_pct_change.join(btc_avg_sent).dropna(how="any")

# Display data
display(btc_sentiments)

Unnamed: 0_level_0,BTC Daily Price Change,btc_avg
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-04-02,0.004621,0.181685
2021-04-03,-0.029451,-0.05346
2021-04-04,0.019254,0.15762
2021-04-05,0.002484,0.08336
2021-04-06,-0.011126,0.01585
2021-04-07,-0.034752,-0.04395
2021-04-08,0.037384,0.034785
2021-04-09,-0.001305,0.161575
2021-04-10,0.026581,0.101945
2021-04-11,0.007245,0.194425


In [33]:
# Correlate the headlines' sentiment to returns
btc_sentiments.corr()

Unnamed: 0,BTC Daily Price Change,btc_avg
BTC Daily Price Change,1.0,0.158645
btc_avg,0.158645,1.0
