# Twitter Scraping & NLP

In [37]:
import requests 
import json
from config import consumer_key, consumer_secret, access_key, access_secret, bearer_token

In [38]:
from textblob import TextBlob
import pandas as pd
import sys
import tweepy
import matplotlib.pyplot as plt
import numpy as np
import os
import seaborn as sns
import time
import re
import string

import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
from nltk.stem import SnowballStemmer
from nltk.corpus import stopwords
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from nltk.stem import WordNetLemmatizer 
from nltk.corpus import wordnet
nltk.download('averaged_perceptron_tagger')
nltk.download('vader_lexicon')
nltk.download('stopwords')
nltk.download('wordnet')

!pip install gensim
import gensim
from gensim.parsing.preprocessing import remove_stopwords 
import torch
import flair
from flair.models import TextClassifier
from flair.data import Sentence
from segtok.segmenter import split_single

from wordcloud import WordCloud, STOPWORDS
from PIL import Image
from langdetect import detect
from sklearn.feature_extraction.text import CountVectorizer

# Display max column width 
pd.set_option('display.max_colwidth', None)

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/memme11/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/memme11/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/memme11/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /Users/memme11/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!




## Twitter API (Tweepy)

In [39]:
# Initialize and gain access to Twitter API
def initialize():
    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_key, access_secret)
    api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
    return api

api = initialize()

In [40]:
# Use get statement to see json format
url = 'https://api.twitter.com/1.1/search/tweets.json'
params = {'q': 'bitcoin',
          'tweet_mode': 'extended', 
         'lang': 'en', 
         'count': 2}
headers = {'authorization': 'Bearer '+bearer_token}

sample_data = requests.get(url, params=params, headers=headers).json()

sample_data

{'statuses': [{'created_at': 'Tue Jul 27 23:31:15 +0000 2021',
   'id': 1420164914136682498,
   'id_str': '1420164914136682498',
   'full_text': 'RT @ledgerstatus: BITCOIN BEING ARTIFICIALLY ENGINEERED TO TOUCH $44,444 WITHIN 48 HOURS.\n- SOURCES',
   'truncated': False,
   'display_text_range': [0, 99],
   'entities': {'hashtags': [],
    'symbols': [],
    'user_mentions': [{'screen_name': 'ledgerstatus',
      'name': 'Ledger Status 🇪🇹 Prometheus of the Plebs',
      'id': 897142437344419842,
      'id_str': '897142437344419842',
      'indices': [3, 16]}],
    'urls': []},
   'metadata': {'iso_language_code': 'en', 'result_type': 'recent'},
   'source': '<a href="http://twitter.com/download/android" rel="nofollow">Twitter for Android</a>',
   'in_reply_to_status_id': None,
   'in_reply_to_status_id_str': None,
   'in_reply_to_user_id': None,
   'in_reply_to_user_id_str': None,
   'in_reply_to_screen_name': None,
   'user': {'id': 1318563132801953792,
    'id_str': '1318563132801953

In [41]:
# Call on tweepy API and create dataframe
search_words = ("bitcoin", "etherium", "cardano")
crypto_data = pd.DataFrame()

def get_data(data):
    data = {
        'text': data.full_text,
        'date': data.created_at,
        'followers': data.user.followers_count,
        'favourites': data.user.favourites_count,
        'retweets': data.retweet_count
    }
    return data

for tweets in search_words:
    comp_tweets = api.search(q=tweets, lang = 'en', result_type = 'recent', count=250, tweet_mode='extended')
    
    for tweet in comp_tweets:
        row = get_data(tweet)
        crypto_data = crypto_data.append(row, ignore_index=True)
        
crypto_data

Unnamed: 0,date,favourites,followers,retweets,text
0,2021-07-27 23:31:18,112.0,11.0,96.0,RT @nf4mation: Goldman Sachs Applies for a DeFi and Blockchain ETF to Optimize Investment Results\n#bitcoin #ethereum #cryptocurrency #block…
1,2021-07-27 23:31:18,2403.0,239.0,531.0,RT @SerkanArikan06: $800 in $BTC #Bitcoin to a random person that Follows and Retweets
2,2021-07-27 23:31:17,3196.0,4932.0,0.0,"Exactly what I think, however, I am more leaning towards the latter because new rules by @binance would start a trend in the crypto industry and volume would flow out of big exchanges, more regulated this industry becomes, the more USP and value #Bitcoin losses. https://t.co/IIVSziUkdJ"
3,2021-07-27 23:31:17,22.0,3258.0,0.0,Bill ackman bitcoin https://t.co/zXfZ2outvt
4,2021-07-27 23:31:16,228.0,11.0,99.0,RT @BTC_Archive: #Bitcoin $39k! \nWe up baby! 🚀
...,...,...,...,...,...
287,2021-07-27 23:18:10,21074.0,7491.0,0.0,"#Crypto Pricing (Top5 by MktCap)\n\n27Jul21 01:00 CET\n\n1) #Bitcoin $39.064 +4.25%\n\n2) #Ether $2,287 +1.85%\n\n3) #Tether $1 -0.01%\n\n4) #BinanceCoin $312 +2.3%\n\n5) #Cardano $1.27 +0.6%\n\n%age increase last 24 hours\n\n@CoinMarketCap #cryptocurrency #cryptotrading \n@WiserIn10 https://t.co/Ku2IyE42vK"
288,2021-07-27 23:18:03,16.0,46.0,0.0,"#Altcoin Cardano Could Become ‘Big Three’ Mainstream Cryptocurrency, According to Morningstar Portfolio Strategist https://t.co/1fpAVUqSpk"
289,2021-07-27 23:18:00,17661.0,1647.0,0.0,HOPE2 has achieved 100% performance.\nThank you for delegation and support to our pools.\n\n#Cardano #Ada #カルダノ #エイダ #카르다노 #에이다 https://t.co/GWvxAb4K7m
290,2021-07-27 23:17:48,136.0,7.0,83.0,"RT @nf4mation: Cardano Could Become 'Big Three' Mainstream Cryptocurrency, According to Morningstar Portfolio Strategist\n#bitcoin #ethereum…"


## Data Preprocessing

In [43]:
# Formatting
# Keep only tweets with over 1000 favourites
crypto_data = crypto_data.loc[crypto_data['favourites']>1000]

# Clean text column using Regex
crypto_data['cleaned_text'] = crypto_data['text']
clean_text = '(RT) @[\w]*:|(@[A-Za-z0-9]+)|([^\,\!\.\'\%0-9A-Za-z \t])|(\w+:\/\/\S+)'
crypto_data['cleaned_text'] = crypto_data['cleaned_text'].str.replace(clean_text, " ", regex=True)
crypto_data['cleaned_text'] = crypto_data['cleaned_text'].str.lower()

# Convert date dtype to datetime, set index, sort index and drop duplicates
crypto_data['date'] = pd.to_datetime(crypto_data['date'])
crypto_data = crypto_data.set_index('date').sort_index(ascending=False)
crypto_data.drop_duplicates(inplace=True)

crypto_data.head(30)

Unnamed: 0_level_0,favourites,followers,retweets,text,cleaned_text
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-07-27 23:31:18,2403.0,239.0,531.0,RT @SerkanArikan06: $800 in $BTC #Bitcoin to a random person that Follows and Retweets,800 in btc bitcoin to a random person that follows and retweets
2021-07-27 23:31:17,3196.0,4932.0,0.0,"Exactly what I think, however, I am more leaning towards the latter because new rules by @binance would start a trend in the crypto industry and volume would flow out of big exchanges, more regulated this industry becomes, the more USP and value #Bitcoin losses. https://t.co/IIVSziUkdJ","exactly what i think, however, i am more leaning towards the latter because new rules by would start a trend in the crypto industry and volume would flow out of big exchanges, more regulated this industry becomes, the more usp and value bitcoin losses."
2021-07-27 23:31:16,3712.0,59.0,122.0,"RT @RyanBerckmans: 1/ Bitcoin's cost problem\n\nYesterday, it cost BTC holders an annualized run rate of $17B to run the bitcoin blockchain.…","1 bitcoin's cost problem yesterday, it cost btc holders an annualized run rate of 17b to run the bitcoin blockchain."
2021-07-27 23:31:16,2752.0,279.0,6.0,RT @upunksunion: #UPU is an #NFT with 13.666 pieces! we have gender types! female #zombies &amp; #apes more in UPU punkverse 🤟 659 of 13.666 SO…,upu is an nft with 13.666 pieces! we have gender types! female zombies amp apes more in upu punkverse 659 of 13.666 so
2021-07-27 23:31:15,1155.0,52.0,0.0,"@jlcx5_2 @jtimberlake We don't buy bitcoin\nWe earn bitcoin From Mining \nI'm ready to show 10 lucky people on how to earn 1BTC and more daily!\nNo referral \nNo withdrawal fees\nIf interested, kindly send a DM","2 we don't buy bitcoin we earn bitcoin from mining i'm ready to show 10 lucky people on how to earn 1btc and more daily! no referral no withdrawal fees if interested, kindly send a dm"
2021-07-27 23:31:15,14384.0,385.0,37.0,"RT @ledgerstatus: BITCOIN BEING ARTIFICIALLY ENGINEERED TO TOUCH $44,444 WITHIN 48 HOURS.\n- SOURCES","bitcoin being artificially engineered to touch 44,444 within 48 hours. sources"
2021-07-27 23:31:14,28661.0,479.0,3.0,RT @francispouliot_: Bitcoin Remnant Energy https://t.co/Hwn2DG5BDO,bitcoin remnant energy
2021-07-27 23:31:14,1751.0,6611.0,158.0,RT @nf4mation: Mastercard Launches Global Program to Help Cryptocurrency Startups Scale Their Innovations – Featured Bitcoin News\n#bitcoin…,mastercard launches global program to help cryptocurrency startups scale their innovations featured bitcoin news bitcoin
2021-07-27 23:31:13,4864.0,579.0,0.0,"Bye-bye, bitcoin: It's time to ban cryptocurrencies https://t.co/5Al947nNup","bye bye, bitcoin it's time to ban cryptocurrencies"
2021-07-27 23:31:12,5141.0,546.0,1.0,RT @gabeh730: This guy @BitcoinIsThePin is a legit pleb. He convinced me that the 3rd largest #Bitcoin address is Robinhood (not your keys…,this guy is a legit pleb. he convinced me that the 3rd largest bitcoin address is robinhood not your keys


## Tokenization

In [44]:
# Tokenizing Functions

def get_wordnet_pos(word):
# Map POS tag to the first character lemmatize() accepts
    tag = nltk.pos_tag([word])[0][1][0].upper()
    tag_dict = {"J": wordnet.ADJ,
                "N": wordnet.NOUN,
                "V": wordnet.VERB,
                "R": wordnet.ADV}

    return tag_dict.get(tag, wordnet.NOUN)

# Function for tokenizing tweets (already cleaned using regex)
def second_clean(tweet):
    tweet = remove_stopwords(tweet) # remove stopwords with Gensim

    lemmatizer = WordNetLemmatizer()
    tokenized = [lemmatizer.lemmatize(w, get_wordnet_pos(w)) for w in nltk.word_tokenize(tweet)]
    
    # remove left over stop words with nltk
    tokenized = [token for token in tokenized if token not in stopwords.words("english")] 

    # remove non-alpha characters and keep the words of length >2 only
    tokenized = [token for token in tokenized if token.isalpha() and len(token)>2]

    return tokenized

# Function for joining tokenized list into string
def combine_tokens(tokenized): 
    non_tokenized = ' '.join([w for w in tokenized])
    return non_tokenized

In [45]:
# Execute function 
crypto_data['tokens'] = crypto_data['cleaned_text'].apply(lambda x: second_clean(x))
crypto_data['final_clean'] = crypto_data['tokens'].apply(lambda x: combine_tokens(x))

crypto_data

Unnamed: 0_level_0,favourites,followers,retweets,text,cleaned_text,tokens,final_clean
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-07-27 23:31:18,2403.0,239.0,531.0,RT @SerkanArikan06: $800 in $BTC #Bitcoin to a random person that Follows and Retweets,800 in btc bitcoin to a random person that follows and retweets,"[btc, bitcoin, random, person, follow, retweets]",btc bitcoin random person follow retweets
2021-07-27 23:31:17,3196.0,4932.0,0.0,"Exactly what I think, however, I am more leaning towards the latter because new rules by @binance would start a trend in the crypto industry and volume would flow out of big exchanges, more regulated this industry becomes, the more USP and value #Bitcoin losses. https://t.co/IIVSziUkdJ","exactly what i think, however, i am more leaning towards the latter because new rules by would start a trend in the crypto industry and volume would flow out of big exchanges, more regulated this industry becomes, the more usp and value bitcoin losses.","[exactly, think, however, lean, new, rule, start, trend, crypto, industry, volume, flow, big, exchange, regulate, industry, becomes, usp, value, bitcoin, loss]",exactly think however lean new rule start trend crypto industry volume flow big exchange regulate industry becomes usp value bitcoin loss
2021-07-27 23:31:16,3712.0,59.0,122.0,"RT @RyanBerckmans: 1/ Bitcoin's cost problem\n\nYesterday, it cost BTC holders an annualized run rate of $17B to run the bitcoin blockchain.…","1 bitcoin's cost problem yesterday, it cost btc holders an annualized run rate of 17b to run the bitcoin blockchain.","[bitcoin, cost, problem, yesterday, cost, btc, holder, annualized, run, rate, run, bitcoin, blockchain]",bitcoin cost problem yesterday cost btc holder annualized run rate run bitcoin blockchain
2021-07-27 23:31:16,2752.0,279.0,6.0,RT @upunksunion: #UPU is an #NFT with 13.666 pieces! we have gender types! female #zombies &amp; #apes more in UPU punkverse 🤟 659 of 13.666 SO…,upu is an nft with 13.666 pieces! we have gender types! female zombies amp apes more in upu punkverse 659 of 13.666 so,"[upu, nft, piece, gender, type, female, zombie, amp, ape, upu, punkverse]",upu nft piece gender type female zombie amp ape upu punkverse
2021-07-27 23:31:15,1155.0,52.0,0.0,"@jlcx5_2 @jtimberlake We don't buy bitcoin\nWe earn bitcoin From Mining \nI'm ready to show 10 lucky people on how to earn 1BTC and more daily!\nNo referral \nNo withdrawal fees\nIf interested, kindly send a DM","2 we don't buy bitcoin we earn bitcoin from mining i'm ready to show 10 lucky people on how to earn 1btc and more daily! no referral no withdrawal fees if interested, kindly send a dm","[buy, bitcoin, earn, bitcoin, mining, ready, lucky, people, earn, daily, referral, withdrawal, fee, interested, kindly, send]",buy bitcoin earn bitcoin mining ready lucky people earn daily referral withdrawal fee interested kindly send
...,...,...,...,...,...,...,...
2021-07-27 22:18:02,4209.0,100.0,1.0,"@EvanDye42799552 Check out what's new in the wolfpack. https://t.co/kqJYaorJAq meet the team behind the scenes, updated roadmap and whitepaper! Come join the wolf pack today! #BinanceSmartChain #etherium #Hiddengems #altcoins #altgems #Bitcoin #crypto #shill @babywolf_token @BabyMoonWolfBsc","check out what's new in the wolfpack. meet the team behind the scenes, updated roadmap and whitepaper! come join the wolf pack today! binancesmartchain etherium hiddengems altcoins altgems bitcoin crypto shill token","[check, new, wolfpack, meet, team, scene, update, roadmap, whitepaper, come, join, wolf, pack, today, binancesmartchain, etherium, hiddengems, altcoins, altgems, bitcoin, crypto, shill, token]",check new wolfpack meet team scene update roadmap whitepaper come join wolf pack today binancesmartchain etherium hiddengems altcoins altgems bitcoin crypto shill token
2021-07-27 22:17:41,4209.0,100.0,1.0,"@ANCrypto57 @ThatCryptoBunny Check out what's new in the wolfpack. https://t.co/kqJYaorJAq meet the team behind the scenes, updated roadmap and whitepaper! Come join the wolf pack today! #BinanceSmartChain #etherium #Hiddengems #altcoins #altgems #Bitcoin #crypto #shill @babywolf_token @BabyMoonWolfBsc","check out what's new in the wolfpack. meet the team behind the scenes, updated roadmap and whitepaper! come join the wolf pack today! binancesmartchain etherium hiddengems altcoins altgems bitcoin crypto shill token","[check, new, wolfpack, meet, team, scene, update, roadmap, whitepaper, come, join, wolf, pack, today, binancesmartchain, etherium, hiddengems, altcoins, altgems, bitcoin, crypto, shill, token]",check new wolfpack meet team scene update roadmap whitepaper come join wolf pack today binancesmartchain etherium hiddengems altcoins altgems bitcoin crypto shill token
2021-07-27 22:17:04,38535.0,5936.0,40.0,"RT @Kreng51: Tomorrow i will send 10 #NFT ""Whale pattern"" to random followers who will retweet and comment WAX wallet here. 🧙\n\n14 June, 202…","tomorrow i will send 10 nft whale pattern to random followers who will retweet and comment wax wallet here. 14 june, 202","[tomorrow, send, nft, whale, pattern, random, follower, retweet, comment, wax, wallet, june]",tomorrow send nft whale pattern random follower retweet comment wax wallet june
2021-07-27 22:16:30,1531.0,141.0,2.0,@JanVanma lost in the starstruck on \n@withFND\n \nhttps://t.co/hSQdaRb5ao\ncheck out\n\n#NFT #NFTs #NFTCommunity #nftcollector #nftart #NFTGiveaway #NFTdrop #NFTgame #Space #art #Cinema4d #cryptoart #cryptocurrencies #Cryptocurency #cryptoartist #ETH #etherium #doge #SpaceX,lost in the starstruck on check out nft nfts nftcommunity nftcollector nftart nftgiveaway nftdrop nftgame space art cinema4d cryptoart cryptocurrencies cryptocurency cryptoartist eth etherium doge spacex,"[lose, starstruck, check, nft, nfts, nftcommunity, nftcollector, nftart, nftgiveaway, nftdrop, nftgame, space, art, cryptoart, cryptocurrencies, cryptocurency, cryptoartist, eth, etherium, doge, spacex]",lose starstruck check nft nfts nftcommunity nftcollector nftart nftgiveaway nftdrop nftgame space art cryptoart cryptocurrencies cryptocurency cryptoartist eth etherium doge spacex


## NLP - Vader Sentiment Model

In [46]:
# Sentiment labels function 
def sentiment_labels(df, feature, value): 
    df.loc[df[value] > 0,feature] = 'positive'
    df.loc[df[value] == 0,feature] = 'neutral'
    df.loc[df[value] < 0,feature] = 'negative'

In [47]:
# Vader sentiment analysis
# define function and variable for SentimentIntensityAnalyzer()

sia = SentimentIntensityAnalyzer()


def vader_sentiment(df):
    
    target_col='cleaned_text'
    prefix = 'vader_clean_'
        
    scores_col=prefix+'scores'
    
    compound_col = prefix+'polarity'
    
    sentiment = prefix+'sentiment'
    
    df[scores_col] = df[target_col].apply(lambda x:sia.polarity_scores(x))
    
    df[compound_col] = df[scores_col].apply(lambda d: d['compound'])
    
    sentiment_labels(df, sentiment, compound_col)

In [53]:
#Execute vader function
start = time.time()

vader_sentiment(crypto_data)
stop = time.time()

print(f'Vader analysis took: {round((stop-start)/60, 3)}minutes')

crypto_data

Vader analysis took: 0.001minutes


Unnamed: 0_level_0,favourites,followers,retweets,text,cleaned_text,tokens,final_clean,vader_clean_scores,vader_clean_polarity,vader_clean_sentiment
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2021-07-27 23:31:18,2403.0,239.0,531.0,RT @SerkanArikan06: $800 in $BTC #Bitcoin to a random person that Follows and Retweets,800 in btc bitcoin to a random person that follows and retweets,"[btc, bitcoin, random, person, follow, retweets]",btc bitcoin random person follow retweets,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}",0.0000,neutral
2021-07-27 23:31:17,3196.0,4932.0,0.0,"Exactly what I think, however, I am more leaning towards the latter because new rules by @binance would start a trend in the crypto industry and volume would flow out of big exchanges, more regulated this industry becomes, the more USP and value #Bitcoin losses. https://t.co/IIVSziUkdJ","exactly what i think, however, i am more leaning towards the latter because new rules by would start a trend in the crypto industry and volume would flow out of big exchanges, more regulated this industry becomes, the more usp and value bitcoin losses.","[exactly, think, however, lean, new, rule, start, trend, crypto, industry, volume, flow, big, exchange, regulate, industry, becomes, usp, value, bitcoin, loss]",exactly think however lean new rule start trend crypto industry volume flow big exchange regulate industry becomes usp value bitcoin loss,"{'neg': 0.057, 'neu': 0.887, 'pos': 0.056, 'compound': -0.0094}",-0.0094,negative
2021-07-27 23:31:16,3712.0,59.0,122.0,"RT @RyanBerckmans: 1/ Bitcoin's cost problem\n\nYesterday, it cost BTC holders an annualized run rate of $17B to run the bitcoin blockchain.…","1 bitcoin's cost problem yesterday, it cost btc holders an annualized run rate of 17b to run the bitcoin blockchain.","[bitcoin, cost, problem, yesterday, cost, btc, holder, annualized, run, rate, run, bitcoin, blockchain]",bitcoin cost problem yesterday cost btc holder annualized run rate run bitcoin blockchain,"{'neg': 0.124, 'neu': 0.876, 'pos': 0.0, 'compound': -0.4019}",-0.4019,negative
2021-07-27 23:31:16,2752.0,279.0,6.0,RT @upunksunion: #UPU is an #NFT with 13.666 pieces! we have gender types! female #zombies &amp; #apes more in UPU punkverse 🤟 659 of 13.666 SO…,upu is an nft with 13.666 pieces! we have gender types! female zombies amp apes more in upu punkverse 659 of 13.666 so,"[upu, nft, piece, gender, type, female, zombie, amp, ape, upu, punkverse]",upu nft piece gender type female zombie amp ape upu punkverse,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}",0.0000,neutral
2021-07-27 23:31:15,1155.0,52.0,0.0,"@jlcx5_2 @jtimberlake We don't buy bitcoin\nWe earn bitcoin From Mining \nI'm ready to show 10 lucky people on how to earn 1BTC and more daily!\nNo referral \nNo withdrawal fees\nIf interested, kindly send a DM","2 we don't buy bitcoin we earn bitcoin from mining i'm ready to show 10 lucky people on how to earn 1btc and more daily! no referral no withdrawal fees if interested, kindly send a dm","[buy, bitcoin, earn, bitcoin, mining, ready, lucky, people, earn, daily, referral, withdrawal, fee, interested, kindly, send]",buy bitcoin earn bitcoin mining ready lucky people earn daily referral withdrawal fee interested kindly send,"{'neg': 0.077, 'neu': 0.631, 'pos': 0.291, 'compound': 0.8698}",0.8698,positive
...,...,...,...,...,...,...,...,...,...,...
2021-07-27 22:18:02,4209.0,100.0,1.0,"@EvanDye42799552 Check out what's new in the wolfpack. https://t.co/kqJYaorJAq meet the team behind the scenes, updated roadmap and whitepaper! Come join the wolf pack today! #BinanceSmartChain #etherium #Hiddengems #altcoins #altgems #Bitcoin #crypto #shill @babywolf_token @BabyMoonWolfBsc","check out what's new in the wolfpack. meet the team behind the scenes, updated roadmap and whitepaper! come join the wolf pack today! binancesmartchain etherium hiddengems altcoins altgems bitcoin crypto shill token","[check, new, wolfpack, meet, team, scene, update, roadmap, whitepaper, come, join, wolf, pack, today, binancesmartchain, etherium, hiddengems, altcoins, altgems, bitcoin, crypto, shill, token]",check new wolfpack meet team scene update roadmap whitepaper come join wolf pack today binancesmartchain etherium hiddengems altcoins altgems bitcoin crypto shill token,"{'neg': 0.0, 'neu': 0.918, 'pos': 0.082, 'compound': 0.4184}",0.4184,positive
2021-07-27 22:17:41,4209.0,100.0,1.0,"@ANCrypto57 @ThatCryptoBunny Check out what's new in the wolfpack. https://t.co/kqJYaorJAq meet the team behind the scenes, updated roadmap and whitepaper! Come join the wolf pack today! #BinanceSmartChain #etherium #Hiddengems #altcoins #altgems #Bitcoin #crypto #shill @babywolf_token @BabyMoonWolfBsc","check out what's new in the wolfpack. meet the team behind the scenes, updated roadmap and whitepaper! come join the wolf pack today! binancesmartchain etherium hiddengems altcoins altgems bitcoin crypto shill token","[check, new, wolfpack, meet, team, scene, update, roadmap, whitepaper, come, join, wolf, pack, today, binancesmartchain, etherium, hiddengems, altcoins, altgems, bitcoin, crypto, shill, token]",check new wolfpack meet team scene update roadmap whitepaper come join wolf pack today binancesmartchain etherium hiddengems altcoins altgems bitcoin crypto shill token,"{'neg': 0.0, 'neu': 0.918, 'pos': 0.082, 'compound': 0.4184}",0.4184,positive
2021-07-27 22:17:04,38535.0,5936.0,40.0,"RT @Kreng51: Tomorrow i will send 10 #NFT ""Whale pattern"" to random followers who will retweet and comment WAX wallet here. 🧙\n\n14 June, 202…","tomorrow i will send 10 nft whale pattern to random followers who will retweet and comment wax wallet here. 14 june, 202","[tomorrow, send, nft, whale, pattern, random, follower, retweet, comment, wax, wallet, june]",tomorrow send nft whale pattern random follower retweet comment wax wallet june,"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}",0.0000,neutral
2021-07-27 22:16:30,1531.0,141.0,2.0,@JanVanma lost in the starstruck on \n@withFND\n \nhttps://t.co/hSQdaRb5ao\ncheck out\n\n#NFT #NFTs #NFTCommunity #nftcollector #nftart #NFTGiveaway #NFTdrop #NFTgame #Space #art #Cinema4d #cryptoart #cryptocurrencies #Cryptocurency #cryptoartist #ETH #etherium #doge #SpaceX,lost in the starstruck on check out nft nfts nftcommunity nftcollector nftart nftgiveaway nftdrop nftgame space art cinema4d cryptoart cryptocurrencies cryptocurency cryptoartist eth etherium doge spacex,"[lose, starstruck, check, nft, nfts, nftcommunity, nftcollector, nftart, nftgiveaway, nftdrop, nftgame, space, art, cryptoart, cryptocurrencies, cryptocurency, cryptoartist, eth, etherium, doge, spacex]",lose starstruck check nft nfts nftcommunity nftcollector nftart nftgiveaway nftdrop nftgame space art cryptoart cryptocurrencies cryptocurency cryptoartist eth etherium doge spacex,"{'neg': 0.084, 'neu': 0.916, 'pos': 0.0, 'compound': -0.3182}",-0.3182,negative


## Analysis

In [49]:
# View sentiment totals
crypto_data['vader_clean_sentiment'].value_counts()

positive    88
neutral     43
negative    26
Name: vader_clean_sentiment, dtype: int64

In [50]:
# Function for determining avg sentiment for each score in the model and overall average sentiment
def pos_neg_neutral_avg(df):
    
    positive = []
    neutral = []
    negative = []
    
    for values in df:
        if values > 0:
            positive.append(values)
        
        elif values < 0:
            negative.append(values)
        
        else:
            neutral.append(values)
    
    print(f'Positive score average for {df.name} = {round(np.mean(positive), 2)}')
    print(f'Neutral score average for {df.name} = {round(np.mean(neutral), 2)}')
    print(f'Negative score average for {df.name} = {round(np.mean(negative), 2)}')
    
    print(f'Overall crypto sentiment score is = {round(np.mean(df), 4)}')

In [52]:
# Average scores for each sentiment category, and overall sentiment score
vader_values = crypto_data.loc[:, 'vader_clean_polarity']
pos_neg_neutral_avg(vader_values)

Positive score average for vader_clean_polarity = 0.55
Neutral score average for vader_clean_polarity = 0.0
Negative score average for vader_clean_polarity = -0.42
Overall crypto sentiment score is = 0.2381
