# Twitter NLP

In [13]:
import requests 
import json
from config import consumer_key, consumer_secret, access_key, access_secret, bearer_token

In [14]:
from textblob import TextBlob
import pandas as pd
import sys
import tweepy
import matplotlib.pyplot as plt
import numpy as np
import os
import seaborn as sns
import time
import re
import string

import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
from nltk.stem import SnowballStemmer
from nltk.corpus import stopwords
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from nltk.stem import WordNetLemmatizer 
from nltk.corpus import wordnet
nltk.download('averaged_perceptron_tagger')
nltk.download('vader_lexicon')
nltk.download('stopwords')
nltk.download('wordnet')

!pip install gensim
import gensim
from gensim.parsing.preprocessing import remove_stopwords 
import torch
import flair
from flair.models import TextClassifier
from flair.data import Sentence
from segtok.segmenter import split_single

import plotly.express as px
from wordcloud import WordCloud, STOPWORDS
from PIL import Image
from langdetect import detect
from sklearn.feature_extraction.text import CountVectorizer

# Display max column width 
pd.set_option('display.max_colwidth', None)

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/memme11/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/memme11/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/memme11/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /Users/memme11/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!




## Twitter API (Tweepy)

In [15]:
# Initialize and gain access to Twitter API
def initialize():
    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_key, access_secret)
    api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
    return api

api = initialize()

In [16]:
# Call on tweepy API and create dataframe
search_words = ("bitcoin", "etherium", "cardano")
crypto_data = pd.DataFrame()

def get_data(data):
    data = {
        'text': data.full_text,
        'date': data.created_at,
        'followers': data.user.followers_count,
        'favourites': data.user.favourites_count,
        'retweets': data.retweet_count
    }
    return data

for tweets in search_words:
    comp_tweets = api.search(q=tweets, lang = 'en', result_type = 'recent', count=250, tweet_mode='extended')
    
    for tweet in comp_tweets:
        row = get_data(tweet)
        crypto_data = crypto_data.append(row, ignore_index=True)
        
crypto_data

Unnamed: 0,date,favourites,followers,retweets,text
0,2021-07-30 03:24:48,342.0,5.0,3396.0,RT @gethungrycrypto: 💲Giveaway 1 BNB / 24 hours 💲\n\n1. Follow @gethungrycrypto \n2. Like this tweet\n3. Retweet\n4. Tag a friend\n\n🦛🦛🦛🦛🦛🦛🦛🦛🦛🦛🦛🦛🦛…
1,2021-07-30 03:24:47,1956.0,481.0,0.0,"Word on the street is, Michael Burry got liquidated during the liquidation cascade on binance leading the price to 48k. He is usually early, but a bit too early. #michealjburry #burry $btc #btc #Bitcoin"
2,2021-07-30 03:24:47,8954.0,3382.0,0.0,Amazon will accept #dogecoin #Bitcoin
3,2021-07-30 03:24:45,8377.0,281.0,172.0,RT @DocumentingBTC: Greenide Generation has annouced it will close the forty-year-old Lockwood Hills ash landfill in New York to build a ne…
4,2021-07-30 03:24:45,330.0,21.0,53.0,RT @intocryptoverse: 9 daily green candles in a row for #Bitcoin feels pretty good...\n\nbut it has me wondering...\n\nWhat would 10 daily gree…
...,...,...,...,...,...
295,2021-07-30 03:13:38,30199.0,19728.0,0.0,@Tristan_TWNC @cardano_whale 'bout tree fiddy https://t.co/a79kgTYOsi
296,2021-07-30 03:13:35,15583.0,126.0,186.0,RT @Cryptologist626: 💰$100 to 1 Follower In 24 hrs!\n\n☑️ RT and Follow @cardwallet_fi + RT 📌…
297,2021-07-30 03:13:35,3068.0,41.0,105.0,RT @YoroiWallet: A @YoroiWallet dApp connector will allow interactions between users and blockchain-based dApps on the Cardano blockchain.…
298,2021-07-30 03:13:33,67741.0,936.0,10.0,RT @AdAltruistic: When antagonists say “Cardano isn’t a fully functional product”. Are we talking about investing here?? Don’t the most shr…


## Data Preprocessing

In [17]:
# Formatting
# Keep only tweets with over 1000 favourites
crypto_data = crypto_data.loc[crypto_data['favourites']>1000]

# Clean text column using Regex
crypto_data['cleaned_text'] = crypto_data['text']
clean_text = '(RT) @[\w]*:|(@[A-Za-z0-9]+)|([^\,\!\.\'\%0-9A-Za-z \t])|(\w+:\/\/\S+)'
crypto_data['cleaned_text'] = crypto_data['cleaned_text'].str.replace(clean_text, " ", regex=True)
crypto_data['cleaned_text'] = crypto_data['cleaned_text'].str.lower()

# Convert date dtype to datetime, set index, sort index and drop duplicates
crypto_data['date'] = pd.to_datetime(crypto_data['date'])
crypto_data = crypto_data.set_index('date').sort_index(ascending=False)
crypto_data.drop_duplicates(inplace=True)

crypto_data.head(30)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/

Unnamed: 0_level_0,favourites,followers,retweets,text,cleaned_text
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-07-30 03:24:47,1956.0,481.0,0.0,"Word on the street is, Michael Burry got liquidated during the liquidation cascade on binance leading the price to 48k. He is usually early, but a bit too early. #michealjburry #burry $btc #btc #Bitcoin","word on the street is, michael burry got liquidated during the liquidation cascade on binance leading the price to 48k. he is usually early, but a bit too early. michealjburry burry btc btc bitcoin"
2021-07-30 03:24:47,8954.0,3382.0,0.0,Amazon will accept #dogecoin #Bitcoin,amazon will accept dogecoin bitcoin
2021-07-30 03:24:45,8377.0,281.0,172.0,RT @DocumentingBTC: Greenide Generation has annouced it will close the forty-year-old Lockwood Hills ash landfill in New York to build a ne…,greenide generation has annouced it will close the forty year old lockwood hills ash landfill in new york to build a ne
2021-07-30 03:24:45,42989.0,662.0,5544.0,RT @flurbnb: $200 to one person in 72 hours\n\nRetweet &amp; follow me &amp; post proof of my notifs 🔔\n\n$FLUR #FLURARMY #FLURMOON #bitcoin #ethereu…,200 to one person in 72 hours retweet amp follow me amp post proof of my notifs flur flurarmy flurmoon bitcoin ethereu
2021-07-30 03:24:44,2785.0,551.0,3396.0,RT @gethungrycrypto: 💲Giveaway 1 BNB / 24 hours 💲\n\n1. Follow @gethungrycrypto \n2. Like this tweet\n3. Retweet\n4. Tag a friend\n\n🦛🦛🦛🦛🦛🦛🦛🦛🦛🦛🦛🦛🦛…,giveaway 1 bnb 24 hours 1. follow 2. like this tweet 3. retweet 4. tag a friend
2021-07-30 03:24:44,3457.0,215.0,5544.0,RT @flurbnb: $200 to one person in 72 hours\n\nRetweet &amp; follow me &amp; post proof of my notifs 🔔\n\n$FLUR #FLURARMY #FLURMOON #bitcoin #ethereu…,200 to one person in 72 hours retweet amp follow me amp post proof of my notifs flur flurarmy flurmoon bitcoin ethereu
2021-07-30 03:24:43,54562.0,505.0,3396.0,RT @gethungrycrypto: 💲Giveaway 1 BNB / 24 hours 💲\n\n1. Follow @gethungrycrypto \n2. Like this tweet\n3. Retweet\n4. Tag a friend\n\n🦛🦛🦛🦛🦛🦛🦛🦛🦛🦛🦛🦛🦛…,giveaway 1 bnb 24 hours 1. follow 2. like this tweet 3. retweet 4. tag a friend
2021-07-30 03:24:42,3982.0,145.0,3396.0,RT @gethungrycrypto: 💲Giveaway 1 BNB / 24 hours 💲\n\n1. Follow @gethungrycrypto \n2. Like this tweet\n3. Retweet\n4. Tag a friend\n\n🦛🦛🦛🦛🦛🦛🦛🦛🦛🦛🦛🦛🦛…,giveaway 1 bnb 24 hours 1. follow 2. like this tweet 3. retweet 4. tag a friend
2021-07-30 03:24:40,1129.0,178.0,2098.0,"RT @michael_saylor: #Bitcoin is now held by more than 114 million people, making it the fastest growing, most widely held financial asset i…","bitcoin is now held by more than 114 million people, making it the fastest growing, most widely held financial asset i"
2021-07-30 03:24:40,3331.0,158.0,0.0,@CoinMarketCap Very strong and best project\n\n#CoinMarketCap #Crypto #Cryptocurrency #Bitcoin #Ethereum #Dogecoin #Altcoin #DeFi #SpaceY #SPAY #Airdrop,very strong and best project coinmarketcap crypto cryptocurrency bitcoin ethereum dogecoin altcoin defi spacey spay airdrop


## Tokenization

In [20]:
# Tokenizing Functions

def get_wordnet_pos(word):
# Map POS tag to the first character lemmatize() accepts
    tag = nltk.pos_tag([word])[0][1][0].upper()
    tag_dict = {"J": wordnet.ADJ,
                "N": wordnet.NOUN,
                "V": wordnet.VERB,
                "R": wordnet.ADV}

    return tag_dict.get(tag, wordnet.NOUN)

# Function for tokenizing tweets (already cleaned using regex)
def second_clean(tweet):
    tweet = remove_stopwords(tweet) # remove stopwords with Gensim

    lemmatizer = WordNetLemmatizer()
    tokenized = [lemmatizer.lemmatize(w, get_wordnet_pos(w)) for w in nltk.word_tokenize(tweet)]
    
    # remove left over stop words with nltk
    tokenized = [token for token in tokenized if token not in stopwords.words("english")] 

    # remove non-alpha characters and keep the words of length >2 only
    tokenized = [token for token in tokenized if token.isalpha() and len(token)>2]

    return tokenized

# Function for joining tokenized list into string
def combine_tokens(tokenized): 
    non_tokenized = ' '.join([w for w in tokenized])
    return non_tokenized

# Execute function 
crypto_data['tokens'] = crypto_data['cleaned_text'].apply(lambda x: second_clean(x))
crypto_data['final_clean'] = crypto_data['tokens'].apply(lambda x: combine_tokens(x))

crypto_data

Unnamed: 0_level_0,favourites,followers,retweets,text,cleaned_text,tokens,final_clean
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-07-30 03:24:47,1956.0,481.0,0.0,"Word on the street is, Michael Burry got liquidated during the liquidation cascade on binance leading the price to 48k. He is usually early, but a bit too early. #michealjburry #burry $btc #btc #Bitcoin","word on the street is, michael burry got liquidated during the liquidation cascade on binance leading the price to 48k. he is usually early, but a bit too early. michealjburry burry btc btc bitcoin","[word, street, michael, burry, get, liquidate, liquidation, cascade, binance, lead, price, usually, early, bit, early, michealjburry, burry, btc, btc, bitcoin]",word street michael burry get liquidate liquidation cascade binance lead price usually early bit early michealjburry burry btc btc bitcoin
2021-07-30 03:24:47,8954.0,3382.0,0.0,Amazon will accept #dogecoin #Bitcoin,amazon will accept dogecoin bitcoin,"[amazon, accept, dogecoin, bitcoin]",amazon accept dogecoin bitcoin
2021-07-30 03:24:45,8377.0,281.0,172.0,RT @DocumentingBTC: Greenide Generation has annouced it will close the forty-year-old Lockwood Hills ash landfill in New York to build a ne…,greenide generation has annouced it will close the forty year old lockwood hills ash landfill in new york to build a ne,"[greenide, generation, annouced, close, year, old, lockwood, hill, ash, landfill, new, york, build]",greenide generation annouced close year old lockwood hill ash landfill new york build
2021-07-30 03:24:45,42989.0,662.0,5544.0,RT @flurbnb: $200 to one person in 72 hours\n\nRetweet &amp; follow me &amp; post proof of my notifs 🔔\n\n$FLUR #FLURARMY #FLURMOON #bitcoin #ethereu…,200 to one person in 72 hours retweet amp follow me amp post proof of my notifs flur flurarmy flurmoon bitcoin ethereu,"[person, hour, retweet, amp, follow, amp, post, proof, notifs, flur, flurarmy, flurmoon, bitcoin, ethereu]",person hour retweet amp follow amp post proof notifs flur flurarmy flurmoon bitcoin ethereu
2021-07-30 03:24:44,2785.0,551.0,3396.0,RT @gethungrycrypto: 💲Giveaway 1 BNB / 24 hours 💲\n\n1. Follow @gethungrycrypto \n2. Like this tweet\n3. Retweet\n4. Tag a friend\n\n🦛🦛🦛🦛🦛🦛🦛🦛🦛🦛🦛🦛🦛…,giveaway 1 bnb 24 hours 1. follow 2. like this tweet 3. retweet 4. tag a friend,"[giveaway, bnb, hour, follow, like, tweet, retweet, tag, friend]",giveaway bnb hour follow like tweet retweet tag friend
...,...,...,...,...,...,...,...
2021-07-30 01:59:44,6009.0,98.0,0.0,#Bitcoin and #etherium rallying. If this continues into Friday cryptos should run. $BTBT $SPRT $NCTY,bitcoin and etherium rallying. if this continues into friday cryptos should run. btbt sprt ncty,"[bitcoin, etherium, rally, continue, friday, cryptos, run, btbt, sprt, ncty]",bitcoin etherium rally continue friday cryptos run btbt sprt ncty
2021-07-30 01:52:56,7082.0,58.0,5.0,"RT @NickelToken: We are 🔥BURNING🔥 5,000,000 $Nickel over the next 2 weeks! The first 750k has already been incinerated!\n\nhttps://t.co/wfl3W…","we are burning 5,000,000 nickel over the next 2 weeks! the first 750k has already been incinerated!","[burning, nickel, week, incinerate]",burning nickel week incinerate
2021-07-30 01:52:46,90271.0,516.0,81.0,"RT @Henrikaau: “Kindle” \nMy first drop on @SuperRare, this Friday at 16.00 BST. \n\nhttps://t.co/E8pesR2NvY\n#superrare #etherium #nft https:/…","kindle my first drop on , this friday at 16.00 bst. superrare etherium nft https","[kindle, drop, friday, bst, superrare, etherium, nft, http]",kindle drop friday bst superrare etherium nft http
2021-07-30 01:51:23,1169.0,49.0,0.0,@AltcoinDailyio Etherium is the real mover right now,etherium is the real mover right now,"[etherium, real, mover, right]",etherium real mover right


## NLP - Vader Sentiment Model

In [21]:
# Initialize analyzer
sia = SentimentIntensityAnalyzer()

# Sentiment labels function 
def sentiment_labels(df, feature, value): 
    df.loc[df[value] > 0,feature] = 'positive'
    df.loc[df[value] == 0,feature] = 'neutral'
    df.loc[df[value] < 0,feature] = 'negative'
    
# Vader sentiment analysis

def vader_sentiment(df):
    
    target_col='cleaned_text'
    prefix = 'vader_clean_'
        
    scores_col=prefix+'scores'
    compound_col = prefix+'polarity'
    sentiment = prefix+'sentiment'
    
    df[scores_col] = df[target_col].apply(lambda x:sia.polarity_scores(x))
    df[compound_col] = df[scores_col].apply(lambda d: d['compound'])
    sentiment_labels(df, sentiment, compound_col)
    
#Execute vader function
start = time.time()
vader_sentiment(crypto_data)
stop = time.time()

print(f'Vader analysis took: {round((stop-start)/60, 3)}minutes')

crypto_data.head(30)

Vader analysis took: 0.001minutes


Unnamed: 0_level_0,favourites,followers,retweets,text,cleaned_text,tokens,final_clean,vader_clean_scores,vader_clean_polarity,vader_clean_sentiment
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2021-07-30 03:24:47,1956.0,481.0,0.0,"Word on the street is, Michael Burry got liquidated during the liquidation cascade on binance leading the price to 48k. He is usually early, but a bit too early. #michealjburry #burry $btc #btc #Bitcoin","word on the street is, michael burry got liquidated during the liquidation cascade on binance leading the price to 48k. he is usually early, but a bit too early. michealjburry burry btc btc bitcoin","[word, street, michael, burry, get, liquidate, liquidation, cascade, binance, lead, price, usually, early, bit, early, michealjburry, burry, btc, btc, bitcoin]",word street michael burry get liquidate liquidation cascade binance lead price usually early bit early michealjburry burry btc btc bitcoin,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}",0.0,neutral
2021-07-30 03:24:47,8954.0,3382.0,0.0,Amazon will accept #dogecoin #Bitcoin,amazon will accept dogecoin bitcoin,"[amazon, accept, dogecoin, bitcoin]",amazon accept dogecoin bitcoin,"{'neg': 0.0, 'neu': 0.411, 'pos': 0.589, 'compound': 0.5106}",0.5106,positive
2021-07-30 03:24:45,8377.0,281.0,172.0,RT @DocumentingBTC: Greenide Generation has annouced it will close the forty-year-old Lockwood Hills ash landfill in New York to build a ne…,greenide generation has annouced it will close the forty year old lockwood hills ash landfill in new york to build a ne,"[greenide, generation, annouced, close, year, old, lockwood, hill, ash, landfill, new, york, build]",greenide generation annouced close year old lockwood hill ash landfill new york build,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}",0.0,neutral
2021-07-30 03:24:45,42989.0,662.0,5544.0,RT @flurbnb: $200 to one person in 72 hours\n\nRetweet &amp; follow me &amp; post proof of my notifs 🔔\n\n$FLUR #FLURARMY #FLURMOON #bitcoin #ethereu…,200 to one person in 72 hours retweet amp follow me amp post proof of my notifs flur flurarmy flurmoon bitcoin ethereu,"[person, hour, retweet, amp, follow, amp, post, proof, notifs, flur, flurarmy, flurmoon, bitcoin, ethereu]",person hour retweet amp follow amp post proof notifs flur flurarmy flurmoon bitcoin ethereu,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}",0.0,neutral
2021-07-30 03:24:44,2785.0,551.0,3396.0,RT @gethungrycrypto: 💲Giveaway 1 BNB / 24 hours 💲\n\n1. Follow @gethungrycrypto \n2. Like this tweet\n3. Retweet\n4. Tag a friend\n\n🦛🦛🦛🦛🦛🦛🦛🦛🦛🦛🦛🦛🦛…,giveaway 1 bnb 24 hours 1. follow 2. like this tweet 3. retweet 4. tag a friend,"[giveaway, bnb, hour, follow, like, tweet, retweet, tag, friend]",giveaway bnb hour follow like tweet retweet tag friend,"{'neg': 0.0, 'neu': 0.725, 'pos': 0.275, 'compound': 0.6908}",0.6908,positive
2021-07-30 03:24:44,3457.0,215.0,5544.0,RT @flurbnb: $200 to one person in 72 hours\n\nRetweet &amp; follow me &amp; post proof of my notifs 🔔\n\n$FLUR #FLURARMY #FLURMOON #bitcoin #ethereu…,200 to one person in 72 hours retweet amp follow me amp post proof of my notifs flur flurarmy flurmoon bitcoin ethereu,"[person, hour, retweet, amp, follow, amp, post, proof, notifs, flur, flurarmy, flurmoon, bitcoin, ethereu]",person hour retweet amp follow amp post proof notifs flur flurarmy flurmoon bitcoin ethereu,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}",0.0,neutral
2021-07-30 03:24:43,54562.0,505.0,3396.0,RT @gethungrycrypto: 💲Giveaway 1 BNB / 24 hours 💲\n\n1. Follow @gethungrycrypto \n2. Like this tweet\n3. Retweet\n4. Tag a friend\n\n🦛🦛🦛🦛🦛🦛🦛🦛🦛🦛🦛🦛🦛…,giveaway 1 bnb 24 hours 1. follow 2. like this tweet 3. retweet 4. tag a friend,"[giveaway, bnb, hour, follow, like, tweet, retweet, tag, friend]",giveaway bnb hour follow like tweet retweet tag friend,"{'neg': 0.0, 'neu': 0.725, 'pos': 0.275, 'compound': 0.6908}",0.6908,positive
2021-07-30 03:24:42,3982.0,145.0,3396.0,RT @gethungrycrypto: 💲Giveaway 1 BNB / 24 hours 💲\n\n1. Follow @gethungrycrypto \n2. Like this tweet\n3. Retweet\n4. Tag a friend\n\n🦛🦛🦛🦛🦛🦛🦛🦛🦛🦛🦛🦛🦛…,giveaway 1 bnb 24 hours 1. follow 2. like this tweet 3. retweet 4. tag a friend,"[giveaway, bnb, hour, follow, like, tweet, retweet, tag, friend]",giveaway bnb hour follow like tweet retweet tag friend,"{'neg': 0.0, 'neu': 0.725, 'pos': 0.275, 'compound': 0.6908}",0.6908,positive
2021-07-30 03:24:40,1129.0,178.0,2098.0,"RT @michael_saylor: #Bitcoin is now held by more than 114 million people, making it the fastest growing, most widely held financial asset i…","bitcoin is now held by more than 114 million people, making it the fastest growing, most widely held financial asset i","[bitcoin, held, million, people, make, fast, grow, widely, held, financial, asset]",bitcoin held million people make fast grow widely held financial asset,"{'neg': 0.0, 'neu': 0.819, 'pos': 0.181, 'compound': 0.4939}",0.4939,positive
2021-07-30 03:24:40,3331.0,158.0,0.0,@CoinMarketCap Very strong and best project\n\n#CoinMarketCap #Crypto #Cryptocurrency #Bitcoin #Ethereum #Dogecoin #Altcoin #DeFi #SpaceY #SPAY #Airdrop,very strong and best project coinmarketcap crypto cryptocurrency bitcoin ethereum dogecoin altcoin defi spacey spay airdrop,"[strong, best, project, coinmarketcap, crypto, cryptocurrency, bitcoin, ethereum, dogecoin, altcoin, defi, spacey, spay, airdrop]",strong best project coinmarketcap crypto cryptocurrency bitcoin ethereum dogecoin altcoin defi spacey spay airdrop,"{'neg': 0.0, 'neu': 0.635, 'pos': 0.365, 'compound': 0.8425}",0.8425,positive


## Analysis

In [22]:
# View sentiment totals
crypto_data['vader_clean_sentiment'].value_counts()

positive    105
neutral      72
negative     15
Name: vader_clean_sentiment, dtype: int64

In [23]:
# Function for determining avg sentiment for each score in the model and overall average sentiment
def pos_neg_neutral_avg(df):
    
    positive = []
    neutral = []
    negative = []
    
    for values in df:
        if values > 0:
            positive.append(values)
        
        elif values < 0:
            negative.append(values)
        
        else:
            neutral.append(values)
    
    print(f'Positive score average for {df.name} = {round(np.mean(positive), 2)}')
    print(f'Neutral score average for {df.name} = {round(np.mean(neutral), 2)}')
    print(f'Negative score average for {df.name} = {round(np.mean(negative), 2)}')
    
    print(f'Overall crypto sentiment score is = {round(np.mean(df), 4)}')
    
# Average scores for each sentiment category, and overall sentiment score
vader_values = crypto_data.loc[:, 'vader_clean_polarity']
pos_neg_neutral_avg(vader_values)

Positive score average for vader_clean_polarity = 0.54
Neutral score average for vader_clean_polarity = 0.0
Negative score average for vader_clean_polarity = -0.37
Overall crypto sentiment score is = 0.2667


In [24]:
# Function for creating a df with positive/negative/neutral counts
def pos_neg_neutral(df):
    
    positive = []
    neutral = []
    negative = []
    
    for values in df:
        if values > 0:
            positive.append(values)
        
        elif values < 0:
            negative.append(values)
        
        else:
            neutral.append(values)
            
    positive_score = round(np.mean(positive), 2)
    neutral_score = round(np.mean(neutral), 2)
    negative_score = round(np.mean(negative), 2)
    
    scores = positive_score, neutral_score, negative_score
    scores_df = pd.DataFrame(scores)
    return scores_df

#Execute Function
scores_df = pos_neg_neutral(vader_values)

#Reformat df to fit requirements for merging/implementing into a graph
scores_df = scores_df.rename(index={0: 'positive', 1: 'neutral', 2: 'negative'}).reset_index()
scores_df = scores_df.rename(columns={'index': 'Sentiment', 0: 'Average Polarity'})
scores_df

Unnamed: 0,Sentiment,Average Polarity
0,positive,0.54
1,neutral,0.0
2,negative,-0.37


In [25]:
# Create # of Tweets Df and reformat to prepare for merge
vader_values_plot = pd.DataFrame(crypto_data['vader_clean_sentiment'].value_counts()).reset_index()
vader_values_plot = vader_values_plot.rename(columns={'index': 'Sentiment', 'vader_clean_sentiment': 'Number of Tweets'})

# Merge Dataframes
sentiment_df = pd.merge(vader_values_plot, scores_df, on=['Sentiment', 'Sentiment'], how='left')
sentiment_df

Unnamed: 0,Sentiment,Number of Tweets,Average Polarity
0,positive,105,0.54
1,neutral,72,0.0
2,negative,15,-0.37


In [26]:
# Create dynamic visualization 
fig = px.bar(sentiment_df, x='Sentiment', y='Number of Tweets', 
             title='Twitter Cryptocurrency Sentiment (BTC, ADA, ETH)', hover_data=['Sentiment', 'Number of Tweets', 'Average Polarity'], color='Average Polarity')

fig.show()