In [None]:
import glob
import pandas as pd

# get data file names
path =r'D:/VisualStudioCode/twitter data/bitcoin'
filenames = glob.glob(path + "/*.csv")

dfs = []
for filename in filenames:
    dfs.append(pd.read_csv(filename))

path1 =r'D:/VisualStudioCode/twitter data/ethereum'
filenames1 = glob.glob(path1 + "/*.csv")

dfs1 = []
for filename in filenames1:
    dfs1.append(pd.read_csv(filename))

path2 =r'D:/VisualStudioCode/twitter data/litecoin'
filenames2 = glob.glob(path2 + "/*.csv")

dfs2 = []
for filename in filenames2:
    dfs2.append(pd.read_csv(filename))

# Concatenate all data into one DataFrame
bitcoin_df = pd.concat(dfs, ignore_index=True)
bitcoin_df = pd.DataFrame(bitcoin_df[['tweets','likes','time']])

ethereum_df = pd.concat(dfs1, ignore_index=True)
ethereum_df = pd.DataFrame(ethereum_df[['tweets','likes','time']])

litecoin_df = pd.concat(dfs2, ignore_index=True)
litecoin_df = pd.DataFrame(litecoin_df[['tweets','likes','time']])

bitcoin_df.head()


In [None]:
ethereum_df.head()

In [None]:
litecoin_df.head()

In [None]:
#dropping retweets
bitcoin_df = bitcoin_df[~bitcoin_df.tweets.str.contains('RT')]
bitcoin_df = bitcoin_df.reset_index(drop=True)

ethereum_df = ethereum_df[~ethereum_df.tweets.str.contains('RT')]
ethereum_df = ethereum_df.reset_index(drop=True)

litecoin_df = litecoin_df[~litecoin_df.tweets.str.contains('RT')]
litecoin_df = litecoin_df.reset_index(drop=True)

In [None]:
#initial cleaning of the tweets 
import re

def cleanTweet(Tweet):
    Tweet = re.sub('#[A-Za-z0-9]+',' ', Tweet)
    Tweet = re.sub('@[A-Za-z0-9]+',' ', Tweet)
    Tweet = re.sub('\\n', '', Tweet)
    Tweet = re.sub('https?:\/\/\S+',' ', Tweet)
    Tweet = re.sub('[0-9]', ' ', Tweet)
    return Tweet  

bitcoin_df['tweets'] = bitcoin_df['tweets'].apply(cleanTweet)

ethereum_df['tweets'] = ethereum_df['tweets'].apply(cleanTweet)

litecoin_df['tweets'] = litecoin_df['tweets'].apply(cleanTweet)


In [None]:
bitcoin_df = pd.DataFrame(bitcoin_df[['tweets','time']])
bitcoin_df['text'] = bitcoin_df['tweets']

ethereum_df = pd.DataFrame(ethereum_df[['tweets','time']])
ethereum_df['text'] = ethereum_df['tweets']

litecoin_df = pd.DataFrame(litecoin_df[['tweets','time']])
litecoin_df['text'] = litecoin_df['tweets']


In [None]:
#changing datetype

bitcoin_df['time'] = pd.to_datetime(bitcoin_df.time)
bitcoin_df['time'] = pd.to_datetime(bitcoin_df["time"].dt.strftime('%m-%d-%Y'))

ethereum_df['time'] = pd.to_datetime(ethereum_df.time)
ethereum_df['time'] = pd.to_datetime(ethereum_df["time"].dt.strftime('%m-%d-%Y'))

litecoin_df['time'] = pd.to_datetime(litecoin_df.time)
litecoin_df['time'] = pd.to_datetime(litecoin_df["time"].dt.strftime('%m-%d-%Y'))


In [None]:
#lowercasing
import string

bitcoin_df['text'] = bitcoin_df["text"].str.lower()

ethereum_df['text'] = ethereum_df["text"].str.lower()

litecoin_df['text'] = litecoin_df["text"].str.lower()

In [None]:
#removing punctuation

def remove_punctuation(text):
    return text.translate(str.maketrans('', '', string.punctuation))

bitcoin_df["text"] = bitcoin_df["text"].apply(lambda text: remove_punctuation(text))

ethereum_df["text"] = ethereum_df["text"].apply(lambda text: remove_punctuation(text))

litecoin_df["text"] = litecoin_df["text"].apply(lambda text: remove_punctuation(text))

In [None]:
#removing stopwords

from nltk.corpus import stopwords

STOPWORDS = set(stopwords.words('english'))
def remove_stopwords(text):
    return " ".join([word for word in str(text).split() if word not in STOPWORDS])

bitcoin_df["text"] = bitcoin_df["text"].apply(lambda text: remove_stopwords(text))

ethereum_df["text"] = ethereum_df["text"].apply(lambda text: remove_stopwords(text))

litecoin_df["text"] = litecoin_df["text"].apply(lambda text: remove_stopwords(text))


In [None]:
#removing emojis
import re
def remove_emoji(string):
    emoji_pattern = re.compile("["
                           u"\U0001F600-\U0001F64F"  # emoticons
                           u"\U0001F300-\U0001F5FF"  # symbols & pictographs
                           u"\U0001F680-\U0001F6FF"  # transport & map symbols
                           u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                           u"\U00002702-\U000027B0"
                           u"\U000024C2-\U0001F251"
                           "]+", flags=re.UNICODE)
    return emoji_pattern.sub(r'', string)

bitcoin_df["text"] = bitcoin_df["text"].apply(lambda text: remove_emoji(text))

ethereum_df["text"] = ethereum_df["text"].apply(lambda text: remove_emoji(text))

litecoin_df["text"] = litecoin_df["text"].apply(lambda text: remove_emoji(text))

In [None]:
#Lemmatization with PART OF SPEECH TAGGING
import nltk

from nltk.corpus import wordnet
from nltk.stem import WordNetLemmatizer

lemmatizer = WordNetLemmatizer()
wordnet_map = {"N":wordnet.NOUN, "V":wordnet.VERB, "J":wordnet.ADJ, "R":wordnet.ADV}
def lemmatize_words(text):
    pos_tagged_text = nltk.pos_tag(text.split())
    return " ".join([lemmatizer.lemmatize(word, wordnet_map.get(pos[0], wordnet.NOUN)) for word, pos in pos_tagged_text])

bitcoin_df["text"] = bitcoin_df["text"].apply(lambda text: lemmatize_words(text))

ethereum_df["text"] = ethereum_df["text"].apply(lambda text: lemmatize_words(text))

litecoin_df["text"] = litecoin_df["text"].apply(lambda text: lemmatize_words(text))

In [None]:
from textblob import TextBlob

def getPolarity(Tweet):
    return TextBlob(Tweet).sentiment.polarity

In [None]:
from textblob import TextBlob

def getPolarity(Tweet):
    return TextBlob(Tweet).sentiment.polarity

bitcoin_df['Polarity'] = bitcoin_df['text'].apply(getPolarity)

ethereum_df['Polarity'] = ethereum_df['text'].apply(getPolarity)

litecoin_df['Polarity'] = litecoin_df['text'].apply(getPolarity)

In [None]:
def getSentiment(score):
    if score < 0:
        return 'Negative'
    elif score == 0:
        return 'Neutral'
    else:
        return 'Positive'

bitcoin_df['Sentiment'] = bitcoin_df['Polarity'].apply(getSentiment)

ethereum_df['Sentiment'] = ethereum_df['Polarity'].apply(getSentiment)

litecoin_df['Sentiment'] = litecoin_df['Polarity'].apply(getSentiment)

In [None]:
bitcoin_df.head()

In [None]:
ethereum_df.head()

In [None]:
litecoin_df.head()

In [None]:
import matplotlib.pyplot as plt
#pie chart counting Positive, Negative and Neutral tweets
fig1, (ax1, ax2, ax3) = plt.subplots(1, 3)
labels = ['Neutral','Positive','Negative']
colors = ['lightskyblue','limegreen','red']
fig1.suptitle('Cryptocurrency Sentiment distribution (Bitcoin, Ethereum, Litecoin)')

ax1.pie(bitcoin_df['Sentiment'].value_counts(), autopct='%1.00f%%',
        shadow=True, startangle=90, labels = labels, colors=colors)
ax1.axis('equal')

ax2.pie(ethereum_df['Sentiment'].value_counts(), autopct='%1.00f%%',
        shadow=True, startangle=90, labels = labels, colors=colors)
ax2.axis('equal')

ax3.pie(litecoin_df['Sentiment'].value_counts(), autopct='%1.00f%%',
        shadow=True, startangle=90, labels = labels, colors=colors)
ax3.axis('equal')

In [None]:
bitcoin_prices = pd.read_csv('D:/VisualStudioCode/Diploma/Binance_BTCUSDT_d.csv',usecols=['date','close','tradecount'])
bitcoin_prices = bitcoin_prices.rename(columns = {'date':'time','close': 'price'}, inplace = False)

ethereum_prices = pd.read_csv('D:/VisualStudioCode/Diploma/Binance_ETHUSDT_d.csv',usecols=['date','close','tradecount'])
ethereum_prices = ethereum_prices.rename(columns = {'date':'time','close': 'price'}, inplace = False)

litecoin_prices = pd.read_csv('D:/VisualStudioCode/Diploma/Binance_LTCUSDT_d.csv',usecols=['date','close','tradecount'])
litecoin_prices = litecoin_prices.rename(columns = {'date':'time','close': 'price'}, inplace = False)


In [None]:
#changing datetype

bitcoin_prices['time'] = pd.to_datetime(bitcoin_prices.time)
bitcoin_prices['time'] = pd.to_datetime(bitcoin_prices['time'].dt.strftime('%m-%d-%Y'))

ethereum_prices['time'] = pd.to_datetime(ethereum_prices.time)
ethereum_prices['time'] = pd.to_datetime(ethereum_prices['time'].dt.strftime('%m-%d-%Y'))

litecoin_prices['time'] = pd.to_datetime(litecoin_prices.time)
litecoin_prices['time'] = pd.to_datetime(litecoin_prices['time'].dt.strftime('%m-%d-%Y'))

print(bitcoin_prices.dtypes)
print(ethereum_prices.dtypes)
print(litecoin_prices.dtypes)

In [None]:
bitcoin_df[["time", "Polarity"]].groupby("time").mean().plot(figsize=(10,6))
plt.xlabel('Date')
plt.ylabel('Sentiment')
plt.title('Bitcoin Twitter Sentiment over time')

In [None]:
ethereum_df[["time", "Polarity"]].groupby("time").mean().plot(figsize=(10,6))
plt.xlabel('Date')
plt.ylabel('Sentiment')
plt.title('Ethereum Twitter Sentiment over time')

In [None]:
litecoin_df[["time", "Polarity"]].groupby("time").mean().plot(figsize=(10,6))
plt.xlabel('Date')
plt.ylabel('Sentiment')
plt.title('Litecoin Twitter Sentiment over time')

In [None]:
plt.figure(figsize=(10,6))
plt.plot(bitcoin_prices['time'],bitcoin_prices['price'])
plt.xlabel('Date')
plt.ylabel('Price')
plt.title('Bitcoin price over time')


In [None]:
plt.figure(figsize=(10,6))
plt.plot(ethereum_prices['time'],ethereum_prices['price'])
plt.xlabel('Date')
plt.ylabel('Price')
plt.title('Ethereum price over time')

In [None]:
plt.figure(figsize=(15,7))
plt.plot(litecoin_prices['time'],litecoin_prices['price'])
plt.xlabel('Date')
plt.ylabel('Price')
plt.title('Litecoin price over time')

In [None]:
bitcoin_sent = bitcoin_df[["time", "Polarity"]].groupby("time").mean()

ethereum_sent = ethereum_df[["time", "Polarity"]].groupby("time").mean()

litecoin_sent = litecoin_df[["time", "Polarity"]].groupby("time").mean()

In [None]:
bitcoin_complete = pd.merge(bitcoin_sent,bitcoin_prices,how='left',on= ['time'])

ethereum_complete = pd.merge(ethereum_sent,ethereum_prices,how='left',on= ['time'])

litecoin_complete = pd.merge(litecoin_sent,litecoin_prices,how='left',on= ['time'])

In [None]:
fig, ax1 = plt.subplots(figsize = (12,6))

color = 'tab:blue'
ax1.set_title('Bitcoin sentiment and price over time')
ax1.set_xlabel('days')
ax1.set_ylabel('Sentiment', color=color)
ax1.plot(bitcoin_complete['Polarity'], color=color)
ax1.tick_params(axis='y', labelcolor=color)

ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis

color = 'tab:red'
ax2.set_ylabel('price', color=color)  # we already handled the x-label with ax1
ax2.plot(bitcoin_complete['price'], color=color)
ax2.tick_params(axis='y', labelcolor=color)

fig.tight_layout()  # otherwise the right y-label is slightly clipped
plt.show()


In [None]:
fig, ax1 = plt.subplots(figsize = (12,6))

color = 'tab:blue'
ax1.set_title('Bitcoin sentiment and trades over time')
ax1.set_xlabel('date')
ax1.set_ylabel('Sentiment', color=color)
ax1.plot(bitcoin_complete['Polarity'], color=color)
ax1.tick_params(axis='y', labelcolor=color)

ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis

color = 'tab:red'
ax2.set_ylabel('number of trades', color=color)  # we already handled the x-label with ax1
ax2.plot(bitcoin_complete['tradecount'], color=color)
ax2.tick_params(axis='y', labelcolor=color)

fig.tight_layout()  # otherwise the right y-label is slightly clipped
plt.show()


In [None]:
fig, ax1 = plt.subplots(figsize = (12,6))

color = 'tab:blue'
ax1.set_title('Ethereum sentiment and price over time')
ax1.set_xlabel('date')
ax1.set_ylabel('Sentiment', color=color)
ax1.plot(ethereum_complete['Polarity'], color=color)
ax1.tick_params(axis='y', labelcolor=color)

ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis

color = 'tab:red'
ax2.set_ylabel('price', color=color)  # we already handled the x-label with ax1
ax2.plot(ethereum_complete['price'], color=color)
ax2.tick_params(axis='y', labelcolor=color)

fig.tight_layout()  # otherwise the right y-label is slightly clipped
plt.show()


In [None]:
fig, ax1 = plt.subplots(figsize = (12,6))

color = 'tab:blue'
ax1.set_title('Ethereum sentiment and trades over time')
ax1.set_xlabel('date')
ax1.set_ylabel('Sentiment', color=color)
ax1.plot(ethereum_complete['Polarity'], color=color)
ax1.tick_params(axis='y', labelcolor=color)

ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis

color = 'tab:red'
ax2.set_ylabel('number of trades', color=color)  # we already handled the x-label with ax1
ax2.plot(ethereum_complete['tradecount'], color=color)
ax2.tick_params(axis='y', labelcolor=color)

fig.tight_layout()  # otherwise the right y-label is slightly clipped
plt.show()


In [None]:
fig, ax1 = plt.subplots(figsize = (12,6))

color = 'tab:blue'
ax1.set_title('Litecoin sentiment and price over time')
ax1.set_xlabel('date')
ax1.set_ylabel('Sentiment', color=color)
ax1.plot(litecoin_complete['Polarity'], color=color)
ax1.tick_params(axis='y', labelcolor=color)

ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis

color = 'tab:red'
ax2.set_ylabel('price', color=color)  # we already handled the x-label with ax1
ax2.plot(litecoin_complete['price'], color=color)
ax2.tick_params(axis='y', labelcolor=color)

fig.tight_layout()  # otherwise the right y-label is slightly clipped
plt.show()


In [None]:
fig, ax1 = plt.subplots(figsize = (12,6))

color = 'tab:blue'
ax1.set_title('Litecoin sentiment and trades over time')
ax1.set_xlabel('date')
ax1.set_ylabel('Sentiment', color=color)
ax1.plot(litecoin_complete['Polarity'], color=color)
ax1.tick_params(axis='y', labelcolor=color)

ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis

color = 'tab:red'
ax2.set_ylabel('number of trades', color=color)  # we already handled the x-label with ax1
ax2.plot(litecoin_complete['tradecount'], color=color)
ax2.tick_params(axis='y', labelcolor=color)

fig.tight_layout()  # otherwise the right y-label is slightly clipped
plt.show()


In [None]:
bitcoin_complete['price'].corr(bitcoin_complete['Polarity'],method='pearson')

In [None]:
bitcoin_complete['tradecount'].corr(ethereum_complete['Polarity'],method='pearson')

In [None]:
bitcoin_complete['tradecount'].corr(ethereum_complete['price'],method='pearson')

In [None]:
ethereum_complete['price'].corr(ethereum_complete['Polarity'],method='pearson')

In [None]:
ethereum_complete['tradecount'].corr(ethereum_complete['Polarity'],method='pearson')

In [None]:
ethereum_complete['tradecount'].corr(ethereum_complete['price'],method='pearson')

In [None]:
litecoin_complete['price'].corr(litecoin_complete['Polarity'],method='pearson')

In [None]:
litecoin_complete['tradecount'].corr(litecoin_complete['Polarity'],method='pearson')

In [None]:
litecoin_complete['tradecount'].corr(litecoin_complete['price'],method='pearson')

In [None]:
import seaborn as sns 
ax = plt.axes()
sns.heatmap(bitcoin_complete.corr(),vmin = -1, vmax = 1, 
xticklabels = ['Sentiment', 'Price', 'Number of trades'], 
yticklabels = ['Sentiment', 'Price', 'Number of trades'])

ax.set_title('Bitcoin correlation heatmap')

In [None]:
ax = plt.axes()

sns.heatmap(ethereum_complete.corr(), vmin = -1, vmax = 1, 
xticklabels = ['Sentiment', 'Price', 'Number of trades'], 
yticklabels = ['Sentiment', 'Price', 'Number of trades'])

ax.set_title('Ethereum correlation heatmap')

In [None]:
ax = plt.axes()

sns.heatmap(litecoin_complete.corr(), vmin = -1, vmax = 1, 
xticklabels = ['Sentiment', 'Price', 'Number of trades'], 
yticklabels = ['Sentiment', 'Price', 'Number of trades'])

ax.set_title('Litecoin correlation heatmap')