In [1]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import re
from textblob import TextBlob
from wordcloud import WordCloud
plt.style.use('fivethirtyeight')


%matplotlib inline



In [2]:
# Download/Update the VADER Lexicon
nltk.download('vader_lexicon')

# Initialize the VADER sentiment analyzer
analyzer = SentimentIntensityAnalyzer

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [3]:
# Load the data
from google.colab import files
uploaded = files.upload()


Saving texts.csv to texts (3).csv


In [4]:
# Get the data
tweets = pd.read_csv('texts.csv', infer_datetime_format=True, parse_dates=True)


# Convert timestamp to datetime
tweets['timestamp'] = pd.to_datetime(tweets['timestamp'], unit='ms')
tweets = tweets.rename(columns={'text':'Tweets'})
tweets

Unnamed: 0,id,timestamp,Tweets
0,1431014070182940700,2021-08-26 22:01:55,From blockchain to AI\t Web 3.0 has the potent...
1,1430988007335682000,2021-08-26 20:18:22,Dual Investment (55th Phase) - Subscribe now t...
2,1430967277898322000,2021-08-26 18:55:59,#Binance NFT is launching new features! \t\tDi...
3,1430940034723684400,2021-08-26 17:07:44,If your pups are also trading on #Internationa...
4,1430923091367059500,2021-08-26 16:00:24,#Binance Responsible Trading Summer Camp is no...
...,...,...,...
564,1427517967634509800,2021-08-17 06:29:40,MaskDoge (MASKDOGE): https://t.co/FPVZHKMM3w
565,1427517966724345900,2021-08-17 06:29:39,Blockasset (Personalised NFT Trading Card): ht...
566,1427516706520543200,2021-08-17 06:24:39,Fucu Finance (FUCU): https://t.co/kUvlcCEHfQ
567,1427516705677488000,2021-08-17 06:24:39,BernardSwap (BONES): https://t.co/Dt9NSWPl6b


In [5]:
# Create function to clean tweets (remove @mentions, #, RT, and Hyperlinks)
def cleanText(tweet):
  tweet = re.sub(r'@[A-Za-z0-9]+', '', tweet) 
  tweet = re.sub(r'#', '', tweet)
  tweet = re.sub(r'RT[\s]+', '', tweet)
  tweet = re.sub(r'https?:\/\/\S+', '', tweet)

  return tweet

# Clean tweets
tweets["Tweets"] = tweets['Tweets'].apply(cleanText)
tweets


Unnamed: 0,id,timestamp,Tweets
0,1431014070182940700,2021-08-26 22:01:55,From blockchain to AI\t Web 3.0 has the potent...
1,1430988007335682000,2021-08-26 20:18:22,Dual Investment (55th Phase) - Subscribe now t...
2,1430967277898322000,2021-08-26 18:55:59,Binance NFT is launching new features! \t\tDis...
3,1430940034723684400,2021-08-26 17:07:44,If your pups are also trading on International...
4,1430923091367059500,2021-08-26 16:00:24,Binance Responsible Trading Summer Camp is now...
...,...,...,...
564,1427517967634509800,2021-08-17 06:29:40,MaskDoge (MASKDOGE):
565,1427517966724345900,2021-08-17 06:29:39,Blockasset (Personalised NFT Trading Card):
566,1427516706520543200,2021-08-17 06:24:39,Fucu Finance (FUCU):
567,1427516705677488000,2021-08-17 06:24:39,BernardSwap (BONES):


In [11]:
# Filter and grab tweets related to Bitcoin, Ethereum
btc_tweets = tweets[tweets['Tweets'].str.contains("BTC", "Bitcoin")]
eth_tweets = tweets[tweets['Tweets'].str.contains('ETH', 'Ethereum')]

In [13]:
# Create a Bitcoin sentiment scores Dataframe
bitcoin_sentiments = []
analyzer = SentimentIntensityAnalyzer()

for tweets in btc_tweets["Tweets"]:
    text = tweets
    sentiment = analyzer.polarity_scores(text)
    compound = sentiment["compound"]
    pos = sentiment["pos"]
    neu = sentiment["neu"]
    neg = sentiment["neg"]
        
    bitcoin_sentiments.append({
        "Tweet": text,
        "Compound": compound,
        "Positive": pos,
        "Negative": neg,
        "Neutral": neu,
    })
        
btc_sentiment_df = pd.DataFrame(bitcoin_sentiments)
cols = ["Compound","Negative","Neutral","Positive","Tweet"]
btc_sentiment_df = btc_sentiment_df[cols]
btc_sentiment_df.describe()

Unnamed: 0,Compound,Negative,Neutral,Positive
count,37.0,37.0,37.0,37.0
mean,0.057914,0.024351,0.917595,0.058081
std,0.260102,0.059499,0.105944,0.08822
min,-0.6597,0.0,0.581,0.0
25%,0.0,0.0,0.854,0.0
50%,0.0,0.0,1.0,0.0
75%,0.2023,0.0,1.0,0.093
max,0.6369,0.227,1.0,0.342


In [16]:
# Create ETHEREUM sentiment scores Dataframe
eth_sentiments = []
analyzer = SentimentIntensityAnalyzer()

for tweets in eth_tweets["Tweets"]:
    text = tweets
    sentiment = analyzer.polarity_scores(text)
    compound = sentiment["compound"]
    pos = sentiment["pos"]
    neu = sentiment["neu"]
    neg = sentiment["neg"]
        
    eth_sentiments.append({
        "Tweet": text,
        "Compound": compound,
        "Positive": pos,
        "Negative": neg,
        "Neutral": neu,
    })
        
eth_sentiment_df = pd.DataFrame(eth_sentiments)
cols = ["Compound","Negative","Neutral","Positive","Tweet"]
eth_sentiment_df = eth_sentiment_df[cols]
eth_sentiment_df.describe()

Unnamed: 0,Compound,Negative,Neutral,Positive
count,18.0,18.0,18.0,18.0
mean,0.179683,0.012556,0.897167,0.090278
std,0.293047,0.037783,0.104507,0.109062
min,-0.4215,0.0,0.632,0.0
25%,0.0,0.0,0.85525,0.0
50%,0.08895,0.0,0.8895,0.0455
75%,0.3559,0.0,1.0,0.1435
max,0.7901,0.141,1.0,0.368
