In [13]:
import os 
import json 
import time 
import datetime
import requests
import pandas as pd
from dotenv import load_dotenv

In [8]:
load_dotenv()

True

In [9]:
start_date = datetime.date(2020, 6, 16)
end_date = datetime.date(2020, 9, 16)
delta = datetime.timedelta(days=1)
articles=[]
while start_date <= end_date:
    # print(start_date)
    gnews_url = f"https://gnews.io/api/v4/search?q=bitcoin&in=cryptocurrency$from={start_date}T00:01:36Z&to={start_date}T23:59:36Z&lang=en&token=109989618e479cd03b57c69c7cf5aac3"
    response = requests.get(gnews_url)
    data = response.json()
    articles.append(data)
    start_date += delta
    time.sleep(3)


                
                
                
                
                

In [10]:
articles

[{'totalArticles': 4,
  'articles': [{'title': 'Bitcoin Dips After Halving, but the Publicity Is Helping',
    'description': 'A technical change in Bitcoin has brought new attention to the cryptocurrency, but the pre-event price bump doesn’t seem to have held.',
    'content': 'Text size\nA technical change in Bitcoin has brought new attention to the cryptocurrency, but the price bump that came ahead of the adjustment doesn’t seem to have held.\nThe so-called halving, which took place on Monday, reduced the number of Bitcoins... [2039 chars]',
    'url': 'https://www.barrons.com/articles/bitcoin-dips-after-halving-but-the-publicity-is-helping-51589293172',
    'image': 'https://images.barrons.com/im-186035/social',
    'publishedAt': '2020-05-12T14:19:00Z',
    'source': {'name': "Barron's", 'url': 'https://www.barrons.com'}},
   {'title': 'Bitcoin Rallies Above $10,000. Don’t Think of It as a Hedge.',
    'description': 'The cryptocurrency has more than doubled in price since March, 

In [15]:
articles_df = pd.json_normalize(articles, record_path=["articles"], meta="totalArticles")
articles_df['title&description']= articles_df['title']+ " " +articles_df['description']
articles_df = articles_df[['publishedAt', 'title&description', 'totalArticles']]
articles_df.rename(columns={'publishedAt':'date'}, inplace=True)
articles_df.head()

AttributeError: module 'pandas' has no attribute 'json_normalize'

In [None]:
# clean dataframe and add a column that is composed of "title" and "description". Also group dataframe by "publish date"
articles_df['date']=pd.to_datetime(articles_df['date'], infer_datetime_format=True).dt.date
bitcoin_articles= articles_df.groupby(by=["date",'totalArticles']).sum()
bitcoin_articles.head()

In [None]:
# Import the libraries for sentiment scoring using Vader
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
# Initialize the VADER sentiment analyzer
analyzer = SentimentIntensityAnalyzer()
# Download/Update the VADER Lexicon
nltk.download('vader_lexicon')

In [None]:
# Define two lists to store vader sentiment scoring
y_vader_pred = []
y_vader_prob = []

In [None]:
# Score sentiment of test set using Vader
for text in bitcoin_articles["title&description"]:
    y_vader_prob.append(analyzer.polarity_scores(text)["pos"])
    sentiment_score = analyzer.polarity_scores(text)["compound"]
    if sentiment_score >= 0.1:
        y_vader_pred.append(1)
    else:
        y_vader_pred.append(0)

In [None]:
# add sentiment score to bitcoin_articles dataframe
bitcoin_articles["Sentiment Score"]=y_vader_pred
bitcoin_articles.head()

In [None]:
# import ccxt SDK and get bitcoin historical prices from kraken
import ccxt
kraken_public_key = os.getenv("KRAKEN_PUBLIC_KEY")
kraken_secret_key = os.getenv("KRAKEN_SECRET_KEY")
kraken = ccxt.kraken({"apiKey": kraken_public_key, "secret": kraken_secret_key})

historical_prices = kraken.fetch_ohlcv("BTC/USD","1d")

In [None]:
historical_prices_df = pd.DataFrame(historical_prices, columns=["date", "open", "high", "low", "close", "volume"])
historical_prices_df["date"] = pd.to_datetime(
    historical_prices_df["date"], unit="ms"
)
historical_prices_df.set_index("date",inplace=True)
historical_prices_df.head()

In [None]:
# Drop NAs and calculate daily percent return
historical_prices_df['daily_return'] = historical_prices_df['close'].dropna().pct_change()
historical_prices_df['volume change'] = historical_prices_df['volume'].pct_change().shift(-1)
historical_prices_df['spread'] = (historical_prices_df['high'] - historical_prices_df['low'])/historical_prices_df['open']
historical_prices_df.head
()

In [None]:
combined_df = bitcoin_articles.join(historical_prices_df)
combined_df['articles'] = combined_df['articles'].shift(-1)
combined_df.head()

In [None]:
# analyzing tone of bitcoin articles with ibm_watson tone analyzer SDK
from ibm_watson import ToneAnalyzerV3
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator

ibm_key = os.getenv("ibm_key")
authenticator = IAMAuthenticator(ibm_key)
tone_analyzer = ToneAnalyzerV3(
    version='2017-09-21',
    authenticator=authenticator
)

tone_analyzer.set_service_url('https://api.us-east.tone-analyzer.watson.cloud.ibm.com')
tone_analyzer.set_disable_ssl_verification(True)

In [None]:
tone_analyzed = []
for text in combined_df["title&description"]:
    tone_analysis = tone_analyzer.tone(
    {'text': text},
    content_type='application/json').get_result()
    tone_analyzed.append(tone_analysis)

In [None]:
# working on figuring out how to convert tone_analyzed into sparse matrix that will be merged with combined_df so we evaluate extra features.
tone=[]
for text in tone_analyzed:
    dic={}
    tone.append(dic)
    for emotions in text["document_tone"]["tones"]:
        dic.update({emotions['tone_id']:emotions['score']})
data = pd.DataFrame(tone)
data.fillna(0,inplace=True)
data.head()

In [None]:
combined_df.reset_index(inplace=True)
all_df = combined_df.join(data)
all_df.set_index("date",inplace=True)


In [None]:
all_df.head()

In [None]:
#all_df.to_csv('ben_sentiment.csv')