In [None]:
# Initial imports

from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import hvplot.pandas
import tensorflow as tf
%matplotlib inline

import os
from dotenv import load_dotenv
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from newsapi import NewsApiClient
sid = SentimentIntensityAnalyzer()

import panel as pn
from panel.interact import interact

In [None]:
# Read your api key environment variable

# Load .env enviroment variables
load_dotenv()

# Set News API Key
newsapi = NewsApiClient(api_key=os.environ["NEWS_API_KEY"])

# Set nomics/other crypto data API key 
# Not necessary if just downloading dataframes

---
## Crypto returns

In [None]:
# Use selected API to obtain historical prices for selected Crypto
# More than likely going to obtain latest data for selected cryptos but historical data will work well

In [None]:
# Create dataframes to house crypto data
# May be helpful to just run it through postman to have a dataframe already made and just clean up
# Drop unnecessary columns, keep only crypto name, date and price
# Create new column for daily returns and fill using pct_change 

---
## Sentiment analysis (primitive, limited machine learning)

In [None]:
# Fetch Bitcoin news articles
# Fetch Dogecoin news articles
# Fetch additional crypto news articles

In [None]:
# Consider using previously defined function(s):
def create_df(news, language):
    articles = []
    for article in news:
        try:
            text = article["content"]
            articles.append({
                "text": text,
            })
        except AttributeError:
            pass
    return pd.DataFrame(articles)

# For headlines:
# Example:
btc_headlines, dates = get_headlines("bitcoin")

def headline_sentiment_summarizer_avg(headlines):
    sentiment = []
    for day in headlines:
        day_score = []
        for h in day:
            if h == None:
                continue
            else:
                day_score.append(sid.polarity_scores(h)["compound"])
        sentiment.append(sum(day_score) / len(day_score))
    return sentiment
# The function above should retain dates already

In [None]:
# Create necessary sentiment scores DataFrame(s)

# Sentiment scores dictionaries
# Example:
btc_text = {
    "Compound": [],
    "Negative": [],
    "Neutral": [],
    "Positive": []
}

# Get sentiment for the text 
# Example:
for index, row in btc_df.iterrows():
    try:
        # Sentiment scoring with VADER
        text_sentiment = analyzer.polarity_scores(row["text"])
        btc_text["Compound"].append(text_sentiment["compound"])
        btc_text["Negative"].append(text_sentiment["neg"])
        btc_text["Neutral"].append(text_sentiment["neu"])
        btc_text["Positive"].append(text_sentiment["pos"])
    except AttributeError:
        pass
btc_sentiment_df = pd.DataFrame.from_dict(btc_text, orient='columns')
btc_sentiment_df = btc_sentiment_df.join(btc_df)
btc_sentiment_df.head()

# Should be easy to remake for additional crypto
# Find a way to retain dates from articles to match up with return dates
# 

In [None]:
# Describe the Bitcoin Sentiment

In [None]:
# Import tokenizing libraries
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer, PorterStemmer
from string import punctuation
import re

In [None]:
# Expand the default stopwords list if necessary

In [None]:
# Consider using previously defined tokenizer function
def tokenizer(text):
    """Tokenizes text."""
    sw = set(stopwords.words('english'))
    lemmatizer = WordNetLemmatizer()
    regex = re.compile("[^a-zA-Z ]")
    re_clean = regex.sub('', text)
    # Create a list of the words
    words = word_tokenize(re_clean)
    # Convert the words to lowercase, removing punctuation and stopwords
    words = [word.lower() for word in words if word.lower() not in sw.union(sw_addons)]
    # Lemmatize Words into root words
    tokens = [lemmatizer.lemmatize(word) for word in words]
    return tokens

In [None]:
# Create a new tokens column for crypto tokens
# Example:
btc_tokens = [tokenizer(text) for text in btc_sentiment_df["text"]]
btc_sentiment_df["tokens"] = btc_tokens

In [None]:
# Import NER libraries 
import spacy
from spacy import displacy

In [None]:
# Load the spaCy model
nlp = spacy.load('en_core_web_sm')

In [None]:
# Concatenate all of the crypto text together
# Example:
all_btc_text = ' '.join(btc_sentiment_df["text"])
all_btc_text

In [None]:
# Run the NER processor on all of the text
# Example:
btc_doc = nlp(all_btc_text)

In [None]:
# Render the visualization
displacy.render(btc_doc, style='ent')

In [None]:
# Not required, use for debugging and adding ent to stopwords list
# List all Entities
for ent in btc_doc.ents:
    print(ent.text, ent.label_)

In [None]:
# The above code can easily be replicated and edited for additional crypto. Code should be run separately

---
## Dashboard