# Bitcoin searches & price correlation

In [1]:
# Main
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

# Web
import requests
from bs4 import BeautifulSoup
#from IPython.display import display_html

# More
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
pd.set_option('display.max_rows', 400)

# PyTrends
from pytrends.request import TrendReq
pytrend = TrendReq()

<hr>
• Let's try five differents searches related with Bitcoin.<br>
• PyTrends (Unofficial Google Trends API) allows us to get 5 search in a row.<br>
<hr>
* From past tests, i noticed that Google Trends gives daily score if the period choosen is less than 8 months. That's why i have 3 periods here.

In [36]:
# Keywords list (max 5 words/expressions)
kw_list = ['bitcoin','invest bitcoin','buy bitcoin','bittrex','bitcoin trading']

# 2018-02-01 to 2018-05-12
pytrend.build_payload(kw_list, cat=0, timeframe='2018-02-01 2018-05-11', geo='', gprop='')
gst_1 = pytrend.interest_over_time()

# 2018-05-13 to 2019-01-13
pytrend.build_payload(kw_list, cat=0, timeframe='2018-05-12 2019-01-12', geo='', gprop='')
gst_2 = pytrend.interest_over_time()

# 2019-01-14 to 2019-09-13
pytrend.build_payload(kw_list, cat=0, timeframe='2019-01-13 2019-09-13', geo='', gprop='')
gst_3 = pytrend.interest_over_time()

# Concat
frames = [gst_1, gst_2, gst_3]
gst2 = pd.concat(frames)
gst2.head()

Unnamed: 0_level_0,bitcoin,invest bitcoin,buy bitcoin,bittrex,bitcoin trading,isPartial
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-02-01,61,0,1,1,0,False
2018-02-02,81,0,1,1,0,False
2018-02-03,53,0,1,1,0,False
2018-02-04,46,0,1,1,0,False
2018-02-05,80,0,1,1,0,False


<hr>
• It works well. But the volume of 'bitcoin' searches is too high, other searches score are close to 0.<br>
• We have to request data one by one, for each word/expression.<br>
<hr>

In [33]:
def get_trend_df(kw_list):
    
    df = pd.DataFrame()
    for expression in kw_list:
        pytrend.build_payload(expression, cat=0, timeframe='2018-02-01 2018-05-11', geo='', gprop='')
        gst_1 = pytrend.interest_over_time()

        # 2018-05-13 to 2019-01-13
        pytrend.build_payload(expression, cat=0, timeframe='2018-05-12 2019-01-12', geo='', gprop='')
        gst_2 = pytrend.interest_over_time()

        # 2019-01-14 to 2019-09-13
        pytrend.build_payload(expression, cat=0, timeframe='2019-01-13 2019-09-13', geo='', gprop='')
        gst_3 = pytrend.interest_over_time()

        # Concat
        frames = [gst_1, gst_2, gst_3]
        gst = pd.concat(frames)
        
        df = gst.drop('isPartial', axis=1).join(df)
    return df

In [44]:
# Keywords need brackets
kw_list = [['bitcoin'], ['invest bitcoin'],['buy bitcoin'],['bittrex'],['bitcoin trading']]
df = get_trend_df(kw_list)

df.head()

Unnamed: 0_level_0,bitcoin trading,bittrex,buy bitcoin,invest bitcoin,bitcoin
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-02-01,77,82,78,65,61
2018-02-02,78,100,86,82,81
2018-02-03,56,90,72,60,53
2018-02-04,59,73,57,71,46
2018-02-05,82,76,74,70,80


<hr>
• Okay, looks better!<br>
• Let's get historical bitcoin price.
<hr>

In [37]:
def get_btc_price(start_date='20170101', end_date='20190913'):
    ''' 
    Webscraping BTC price on coinmarketcap.com
    ''' 
    url = f'https://coinmarketcap.com/currencies/bitcoin/historical-data/?start={start_date}&end={end_date}'
    btc = BeautifulSoup(requests.get(url).text, 'lxml')
    btc = pd.read_html(str(btc.find_all('table', class_='table')[0]))[0]
    btc['Date'] = pd.to_datetime(btc['Date'])
    
    return btc

In [38]:
bhp = get_btc_price(start_date='20180201', end_date='20190913')
bhp = bhp.set_index('Date').sort_index()

bhp.head()

Unnamed: 0_level_0,Open*,High,Low,Close**,Volume,Market Cap
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-02-01,10237.3,10288.8,8812.28,9170.54,9959400448,154428564694
2018-02-02,9142.28,9142.28,7796.49,8830.75,12726899712,148725283812
2018-02-03,8852.12,9430.75,8251.63,9174.91,7263790080,154540000411
2018-02-04,9175.7,9334.87,8031.22,8277.01,7073549824,139433682759
2018-02-05,8270.54,8364.84,6756.68,6955.27,9285289984,117184385122


<hr>
• I decided to keep only 'High' and 'Low'. 'Price' will be the mean of these two.<br>
• Let's join the dataframes.
<hr>

In [45]:
data = df.join(bhp[['High', 'Low']])
data['Price'] = (data['High'] + data['Low']) / 2

data.head()

Unnamed: 0_level_0,bitcoin trading,bittrex,buy bitcoin,invest bitcoin,bitcoin,High,Low,Price
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2018-02-01,77,82,78,65,61,10288.8,8812.28,9550.54
2018-02-02,78,100,86,82,81,9142.28,7796.49,8469.385
2018-02-03,56,90,72,60,53,9430.75,8251.63,8841.19
2018-02-04,59,73,57,71,46,9334.87,8031.22,8683.045
2018-02-05,82,76,74,70,80,8364.84,6756.68,7560.76


<hr>

• Final command `corr()`. What's can we say about correlations ?


<hr>

In [46]:
data.corr()

Unnamed: 0,bitcoin trading,bittrex,buy bitcoin,invest bitcoin,bitcoin,High,Low,Price
bitcoin trading,1.0,0.581449,0.440939,0.555011,0.553674,0.461823,0.418811,0.44189
bittrex,0.581449,1.0,0.50941,0.618974,0.513289,0.206085,0.178836,0.193321
buy bitcoin,0.440939,0.50941,1.0,0.748224,0.885359,-0.03758,-0.051416,-0.044286
invest bitcoin,0.555011,0.618974,0.748224,1.0,0.720649,0.129027,0.102575,0.116524
bitcoin,0.553674,0.513289,0.885359,0.720649,1.0,0.057946,0.017947,0.038822
High,0.461823,0.206085,-0.03758,0.129027,0.057946,1.0,0.993674,0.998544
Low,0.418811,0.178836,-0.051416,0.102575,0.017947,0.993674,1.0,0.998286
Price,0.44189,0.193321,-0.044286,0.116524,0.038822,0.998544,0.998286,1.0


<hr>

Here's the result:
1. 'bitcoin trading' (+0.44)
2. 'bittrex' (+0.19)
3. 'invest bitcoin' (+0.11)
4. 'bitcoin' (+0.03)
5. 'buy bitcoin' (-0.04)

**From here, what can we say ?**<br>
Seems like trading searches have more impact, and stronger relation with bitcoin price. On the other side, 'bitcoin' and 'buy bitcoin' seems to have no relation with price. These searches may come from a very large public who is curious to know more about bitcoin, and that's it. It can also be crypto newbie, taking informations when they hear something about bitcoin..<br><br> 
This is just hypothesis, so let's stay to facts. **Trading searches are more correlated with the bitcoin price**.