In [30]:
# Initial imports
import os
import pandas as pd
from dotenv import load_dotenv
import nltk as nltk
nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()

%matplotlib inline

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\Chris\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [31]:
# Read your api key environment variable
load_dotenv()
api_key = "f7ff35bbe68947ee98817b2a61fb20c7"


In [32]:
# Create a newsapi client
from newsapi import NewsApiClient

In [33]:
newsapi = NewsApiClient(api_key=api_key)

In [34]:
# Fetch the Bitcoin news articles
btc_headlines =newsapi.get_everything(
    q="bitcoin",
    language="en",
    sort_by="relevancy"
)

In [35]:
# Fetch the Ethereum news articles
eth_headlines = newsapi.get_everything(
    q="ethereum",
    language="en",
    sort_by="relevancy"
)

In [36]:
# Create the Bitcoin sentiment scores DataFrame
sentiments =[]

for articles in btc_headlines["articles"]:
    try:
        text = articles["content"]
        results = analyzer.polarity_scores(text)
        compound = results["compound"]
        pos = results["pos"]
        neu = results["neu"]
        neg = results["neg"]
        
        sentiments.append({
            "text": text,
            "Compound": compound,
            "Positive": pos,
            "Negative": neg,
            "Neutral": neu,
        })
    except AttributeError:
        pass

btc = pd.DataFrame(sentiments)
btc.head()

Unnamed: 0,text,Compound,Positive,Negative,Neutral
0,Even if you arent paying attention to Bitcoin ...,0.5859,0.124,0.0,0.876
1,Israeli national Tal Prihar pled guilty to rou...,-0.3182,0.045,0.084,0.871
2,Netflix\r\n is making a docuseries about one o...,-0.7096,0.0,0.169,0.831
3,British entrepreneur and financier Richard ODe...,0.6808,0.185,0.0,0.815
4,"Over the last five years, about 25,000 of thos...",-0.4939,0.0,0.091,0.909


In [37]:
# Create the Ethereum sentiment scores DataFrame
sentiments =[]

for articles in eth_headlines["articles"]:
    try:
        text = articles["content"]
        results = analyzer.polarity_scores(text)
        compound = results["compound"]
        pos = results["pos"]
        neu = results["neu"]
        neg = results["neg"]
        
        sentiments.append({
            "text": text,
            "Compound": compound,
            "Positive": pos,
            "Negative": neg,
            "Neutral": neu,
        })
    except AttributeError:
        pass

eth = pd.DataFrame(sentiments)
eth.head()

Unnamed: 0,text,Compound,Positive,Negative,Neutral
0,If people who buy cryptocurrencies intended on...,-0.2023,0.039,0.062,0.899
1,"Technical analysis isnt a perfect tool, but it...",-0.2498,0.0,0.059,0.941
2,This enables an L1 platform to bootstrap its n...,0.0,0.0,0.0,1.0
3,"The means-and-ends moralists, or non-doers, al...",0.0,0.0,0.0,1.0
4,Coinbase reported that the share of trading vo...,0.6705,0.188,0.0,0.812


In [38]:
# Describe the Bitcoin Sentiment
btc.describe()

Unnamed: 0,Compound,Positive,Negative,Neutral
count,20.0,20.0,20.0,20.0
mean,0.151985,0.08885,0.0476,0.8636
std,0.46868,0.059173,0.059001,0.062236
min,-0.7096,0.0,0.0,0.729
25%,-0.272,0.04575,0.0,0.8295
50%,0.2616,0.073,0.0,0.8625
75%,0.5859,0.14525,0.09025,0.9015
max,0.7783,0.185,0.169,1.0


In [39]:
# Describe the Ethereum Sentiment
eth.describe()

Unnamed: 0,Compound,Positive,Negative,Neutral
count,20.0,20.0,20.0,20.0
mean,0.054765,0.05305,0.03055,0.91645
std,0.361855,0.073697,0.049529,0.087686
min,-0.6808,0.0,0.0,0.766
25%,-0.1027,0.0,0.0,0.8225
50%,0.0,0.0195,0.0,0.9425
75%,0.152575,0.06325,0.05975,1.0
max,0.8341,0.234,0.174,1.0


In [40]:
##Questions:
#Q: Which coin had the highest mean positive score?

#A: Bitcoin had the higher mean positive score with .08885

#Q: Which coin had the highest compound score?

#A:According to mean, ETH took BTC with a mean score of 0.054765

#Q. Which coin had the highest positive score?

#A: According to mean, Bitcoin scored highest. However, utilitizing max, Ethereum has the edge.

In [41]:
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer, PorterStemmer
from string import punctuation
import re

In [52]:
# Instantiate the lemmatizer
wnl = WordNetLemmatizer()

# Create a list of stopwords
stop = stopwords.words('english')

# Expand the default stopwords list if necessary
stop.append("u")
stop.append("it")
stop.append("s")
stop.append("n't")
stop.append("_")
stop.append("\`")
stop.append("``")
stop.append("char")
stop.append("chars")
stop.append("''")
stop = set(stop)

In [68]:
# Complete the tokenizer function
def tokenizer(text):
    """Tokenizes text."""
       # Create a tokenized list of the words
    words = word_tokenize(text)
    
    # Remove the punctuation from text
    words = list(filter(lambda t: t not in punctuation, words))
    
    # Lemmatize words into root words
    tokens = [wnl.lemmatize(word) for word in words]
   
    # Convert the words to lowercase
    words =  list(filter(lambda w: w.lower(), words))
    
    # Remove the stop words
    words = list(filter(lambda t: t.lower() not in stop, words))
    
    # Lemmatize into root
    tokens = [wnl.lemmatize(word) for word in words]
    
    return tokens

In [69]:
# Create a new tokens column for Bitcoin
btc["tokens"] = btc.text.apply(tokenizer)
btc.head()

Unnamed: 0,text,Compound,Positive,Negative,Neutral,tokens
0,Even if you arent paying attention to Bitcoin ...,0.5859,0.124,0.0,0.876,"[Even, arent, paying, attention, Bitcoin, cryp..."
1,Israeli national Tal Prihar pled guilty to rou...,-0.3182,0.045,0.084,0.871,"[Israeli, national, Tal, Prihar, pled, guilty,..."
2,Netflix\r\n is making a docuseries about one o...,-0.7096,0.0,0.169,0.831,"[Netflix, making, docuseries, one, worst, rapp..."
3,British entrepreneur and financier Richard ODe...,0.6808,0.185,0.0,0.815,"[British, entrepreneur, financier, Richard, OD..."
4,"Over the last five years, about 25,000 of thos...",-0.4939,0.0,0.091,0.909,"[last, five, year, 25,000, Bitcoin, transferre..."


In [70]:
# Create a new tokens column for Ethereum
eth["tokens"] = btc.text.apply(tokenizer)
eth.head()

Unnamed: 0,text,Compound,Positive,Negative,Neutral,tokens
0,If people who buy cryptocurrencies intended on...,-0.2023,0.039,0.062,0.899,"[Even, arent, paying, attention, Bitcoin, cryp..."
1,"Technical analysis isnt a perfect tool, but it...",-0.2498,0.0,0.059,0.941,"[Israeli, national, Tal, Prihar, pled, guilty,..."
2,This enables an L1 platform to bootstrap its n...,0.0,0.0,0.0,1.0,"[Netflix, making, docuseries, one, worst, rapp..."
3,"The means-and-ends moralists, or non-doers, al...",0.0,0.0,0.0,1.0,"[British, entrepreneur, financier, Richard, OD..."
4,Coinbase reported that the share of trading vo...,0.6705,0.188,0.0,0.812,"[last, five, year, 25,000, Bitcoin, transferre..."


In [73]:
## N GRAMS

In [74]:
from collections import Counter
from nltk import ngrams

In [75]:
# Generate the Bitcoin N-grams where N=2
N = 2
grams = ngrams(tokenizer(btc.text.str.cat()), N)
Counter(grams).most_common(20)

[(('Reuters', 'Bitcoin'), 3),
 (('Illustration', 'Alex'), 2),
 (('Alex', 'Castro'), 2),
 (('Castro', 'Verge'), 2),
 (('Mr.', 'Lichtensteins'), 2),
 (('Lichtensteins', 'wallet'), 2),
 (('El', 'Salvador'), 2),
 (('Even', 'arent'), 1),
 (('arent', 'paying'), 1),
 (('paying', 'attention'), 1),
 (('attention', 'Bitcoin'), 1),
 (('Bitcoin', 'cryptocurrencies'), 1),
 (('cryptocurrencies', 'might'), 1),
 (('might', 'noticed'), 1),
 (('noticed', 'value'), 1),
 (('value', 'plummeted'), 1),
 (('plummeted', 'last'), 1),
 (('last', 'week'), 1),
 (('week', 'total'), 1),
 (('total', 'value'), 1)]

In [76]:
# Generate the Ethereum N-grams where N=2
N = 2
grams = ngrams(tokenizer(eth.text.str.cat()), N)
Counter(grams).most_common(20)

[(('324', 'million'), 4),
 (('million', 'cryptocurrency'), 3),
 (('version', 'article'), 3),
 (('article', 'published'), 3),
 (('published', 'TIME'), 3),
 (('TIME', '’'), 3),
 (('’', 'newsletter'), 3),
 (('newsletter', 'Metaverse'), 3),
 (('Metaverse', 'Subscribe'), 3),
 (('Subscribe', 'weekly'), 3),
 (('weekly', 'guide'), 3),
 (('guide', 'future'), 3),
 (('find', 'past'), 3),
 (('past', 'issue'), 3),
 (('issue', 'newsletter'), 3),
 (('trading', 'volume'), 2),
 (('15', 'million'), 2),
 (('profile', 'picture'), 2),
 (('Online', 'thief'), 2),
 (('thief', 'made'), 2)]

In [80]:
# Function token_count generates the top 10 words for a given coin
def token_count(tokens, N=3):
    """Returns the top N tokens from the frequency count"""
    return Counter(tokens).most_common(N)

In [82]:
# Use token_count to get the top 10 words for Bitcoin
all_tokens = tokenizer(btc.text.str.cat())
token_count(all_tokens, 10)

[('Bitcoin', 13),
 ('Reuters', 6),
 ('…', 4),
 ('Feb', 4),
 ('cryptocurrency', 4),
 ('money', 3),
 ('one', 3),
 ('wallet', 3),
 ('bitcoin', 3),
 ('crypto', 3)]

In [85]:
# Use token_count to get the top 10 words for Ethereum
all_tokens = tokenizer(eth.text.str.cat())
token_count(all_tokens, 10)

[('million', 7),
 ('cryptocurrency', 6),
 ('newsletter', 6),
 ('Ethereum', 4),
 ('ethereum', 4),
 ('Feb', 4),
 ('Reuters', 4),
 ('324', 4),
 ('find', 4),
 ('investment', 3)]

In [86]:
import spacy
from spacy import displacy

In [87]:
# Download the language model for SpaCy
# !python -m spacy download en_core_web_sm

In [88]:
# Load the spaCy model
nlp = spacy.load('en_core_web_sm')

In [89]:
# Concatenate all of the Bitcoin text together
all_btc_text = btc.text.str.cat()
# Run the NER processor on all of the text
doc = nlp(all_btc_text)

# Add a title to the document
doc.user_data["title"] = "Bitcoin NER"
# Render the visualization
displacy.render(doc, style='ent', jupyter=True)


In [90]:
# List all Entities
for ent in doc.ents:
    print(ent.text, ent.label_)

last week DATE
3 MONEY
Tal Prihar PERSON
Illustration ORG
Alex Castro PERSON
Verge ORG
two-year DATE
about one CARDINAL
Ilya Lichtenstein PERSON
this week DATE
Richard ODell PERSON
house ORG
October DATE
Poulde GPE
the last five years DATE
about 25,000 CARDINAL
Lichtensteins PERSON
the Department of Justice ORG
Monaco PERSON
Al Capone LOC
Margaret Lynaugh PERSON
Bitcoin PERSON
El Salvador GPE
US GPE
The International Monetary Fund ORG
El Salvador GPE
Reuters ORG
Jan 26 DATE
Reuters ORG
The U.S. Securities and Exchange Commission ORG
Thursday DATE
ETF ORG
Fidelitys Wise Origin Bitcoin ORG
+202 GPE
Super Bowl EVENT
Larry David PERSON
LeBron James PERSON
+3454 ORG
one CARDINAL
Tuesday DATE
$4.5 billion MONEY
Lichtensteins PERSON
Feb. 1 DATE
roughly $3.6 billion MONEY
one 2020 DATE
Reuters ORG
thousands CARDINAL
202 CARDINAL
4 CARDINAL
Reuters ORG
8.82% PERCENT
40,611.4 MONEY
2202 DATE
Friday DATE
3,291.29 MONEY
Bitcoin PERSON
23.2% PERCENT
22 CARDINAL
Reuters ORG
2021 DATE
thousands CARDI

In [91]:
# Concatenate all of the Bitcoin text together
all_eth_text = eth.text.str.cat()
# Run the NER processor on all of the text
doc = nlp(all_eth_text)

# Add a title to the document
doc.user_data["title"] = "Ethereum NER"
# Render the visualization
displacy.render(doc, style='ent', jupyter=True)


In [92]:
# List all Entities
for ent in doc.ents:
    print(ent.text, ent.label_)

Ethereum ORG
ETH-USD ORG
Crypto PERSON
-- Saul AlinskyCoinbase PERSON
last year DATE
Between 2020 and 2021 DATE
Colorado GPE
the middle of this year DATE
US GPE
Jared Polis PERSON
James Bareham PERSON
More than $15 million MONEY
More than $15 million MONEY
Ukrainian GPE
Russia GPE
night TIME
Twitter PRODUCT
Ethereum ORG
second ORDINAL
two months DATE
NFT ORG
7 CARDINAL
Reuters ORG
Monday DATE
$450 million MONEY
Sequoia Capital India ORG
nearly $324 million MONEY
Getty Images PERSON
more than $324 million MONEY
Wormhole PERSON
120,000 CARDINAL
wEth ORG
Ethereum ORG
nearly $324 million MONEY
Getty Images PERSON
more than $324 million MONEY
Wormhole PERSON
two hours TIME
YouTube ORG
Waka Flacka Fla ORG
Richard ODell PERSON
house ORG
October DATE
Poulde GPE
4 CARDINAL
Reuters ORG
8.82% PERCENT
40,611.4 MONEY
2202 DATE
Friday DATE
3,291.29 MONEY
Bitcoin PERSON
23.2% PERCENT
TIME ORG
weekly DATE
Vita ORG
TIME ORG
weekly DATE
TIME ORG
weekly DATE
3 CARDINAL
Reuters ORG
NFT ORG
later this year

In [93]:
from wordcloud import WordCloud
import matplotlib.pyplot as plt
plt.style.use('seaborn-whitegrid')
import matplotlib as mpl
mpl.rcParams['figure.figsize'] = [20.0, 10.0]

AttributeError: 'DataFrame' object has no attribute 'WordCloud'