In [20]:
import os
import json
import requests
import moment
from bs4 import BeautifulSoup
from textblob import TextBlob

# Local
from engine import dataset, transformer

In [2]:
response = requests.get("https://www.bitstamp.net/api/ticker/").json()

In [3]:
response

{'volume': '2479.83782994',
 'last': '59353.21',
 'timestamp': '1617412714',
 'bid': '59347.94',
 'vwap': '59411.44',
 'high': '60103.04',
 'low': '58451.65',
 'ask': '59368.04',
 'open': 58967.61}

In [4]:
corjson = json.dumps({
                "error": False,
                "data": {
                    "last": round(float(response["last"]), 2),
                    "high": round(float(response["high"]), 2),
                    "low": round(float(response["low"]), 2),
                    "open": round(float(response["open"]), 2),
                    "volume": round(float(response["volume"]), 2)
                }
            }, indent=2)

In [5]:
corjson

'{\n  "error": false,\n  "data": {\n    "last": 59353.21,\n    "high": 60103.04,\n    "low": 58451.65,\n    "open": 58967.61,\n    "volume": 2479.84\n  }\n}'

In [47]:
html = requests.get("https://www.coindesk.com/")
soup = BeautifulSoup(html.text, "html.parser")

top_articles = soup.find_all('div', class_="card-text-block")
below_list = soup.find_all('div', class_="list-item-card post")

headlines = []

In [42]:
len(below_list), len(top_articles)

(13, 20)

In [49]:
for i in top_articles + below_list:
    date_container = i.find("span", class_="card-date")
    
    if date_container is None:
        # i.e. below_list
        date_container = i.find("time")
    
    date_published = moment.date(date_container.get_text()).format("M-D")
    print(date_published)
    headline_container = i.find("h4") if i.find("h4") else i.find("h2")
    headline = headline_container.get_text().strip()
    print(i.find("a", class_="")["href"])
    
    print(headline)
    headlines.append((headline, date_published, i.find("a", class_="")["href"]))

04-01
/dogecoin-takes-off-after-musk-moonshot
DOGE Jumps After Tesla’s Musk Promises ‘Literal’ Moonshot
04-01
/filecoin-surges-42-replaces-litecoin-as-the-9th-largest-cryptocurrency
Filecoin Surges 42%, Replaces Litecoin as 9th Largest Digital Asset
03-31
/no-joke-chipotle-to-give-away-200k-in-free-burritos-and-bitcoin-on-april-1
No Joke: Chipotle to Give Away $200K in Free Burritos and Bitcoin on April 1
03-31
/bull-flag-70k-bitcoin-skepticism
‘Bull Flag’ Call for $70K Bitcoin Draws Skepticism From Rival Analysts
03-12
/how-to-create-buy-sell-nfts
How to Create, Buy and Sell NFTs
04-02
/fincen-names-former-chainalysis-executive-acting-director-as-blanco-resigns
FinCEN Names Former Chainalysis Executive Acting Director as Blanco Resigns
04-02
/former-sec-chairman-jay-clayton-new-bitcoin-regulations
Former SEC Chairman Jay Clayton Warns of New Bitcoin Regulations
04-02
/ether-price-rises-above-2k-for-first-in-six-weeks
Ether Price Jumps to All-Time High Near $2,100
04-01
/coindesk-q1-qu

In [52]:
ordered_headlines = sorted(headlines, key=lambda h: h[1], reverse=True)
processed_headlines = []
for headline in ordered_headlines:
    headline_str = headline[0].split('\n')[0]
    date_published = headline[1]
    sentiment = TextBlob(headline_str).sentiment.polarity

    if sentiment > 0:
        sentiment = "POS"
    elif int(sentiment) == 0:
        sentiment = "NEUT"
    else:
        sentiment = "NEG"

    processed_headlines += [[
        date_published,
        headline_str,
        sentiment,
        headline[2]
    ]]

In [54]:
processed_headlines

[['04-03',
  'Thai Central Bank to Pilot Its Retail Central Bank Digital Currency in 2022: Report',
  'NEUT',
  '/thai-central-bank-to-pilot-its-retail-central-bank-digital-currency-in-2022-report'],
 ['04-03',
  'US Added More Than 900K Jobs in March, Blowing Past Estimates',
  'POS',
  '/u-s-added-more-than-900k-jobs-in-march-blowing-past-estimates'],
 ['04-03',
  'MicroStrategy Rated ‘Buy’ at BTIG Partly on View Bitcoin Will Hit $95K by End of 2022',
  'NEUT',
  '/microstrategy-rated-buy-at-btig-partly-on-view-bitcoin-will-hit-95k-by-end-of-2022'],
 ['04-03',
  'Bitcoin Mining Difficulty Hits All-Time High as Delayed ASIC Shipments Come Online',
  'POS',
  '/bitcoin-mining-difficulty'],
 ['04-02',
  'FinCEN Names Former Chainalysis Executive Acting Director as Blanco Resigns',
  'NEUT',
  '/fincen-names-former-chainalysis-executive-acting-director-as-blanco-resigns'],
 ['04-02',
  'Former SEC Chairman Jay Clayton Warns of New Bitcoin Regulations',
  'POS',
  '/former-sec-chairman-ja