# Today's cryptocurrencies historical data on by Market Cap 

In [1]:
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'
import pendulum
from sqlalchemy import create_engine
from webdriver_manager.chrome import ChromeDriverManager
import numpy as np
from splinter import Browser
from bs4 import BeautifulSoup
import os
import requests
import json

In [2]:
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

[WDM] - Current google-chrome version is 89.0.4389
[WDM] - Get LATEST driver version for 89.0.4389
[WDM] - Driver [C:\Users\jich-\.wdm\drivers\chromedriver\win32\89.0.4389.23\chromedriver.exe] found in cache






#### Context : Finance

#### Goal: Our goal is to create tables showing today’s 10 best/most profitable cryptocurrencies to invest in. We will look at historical data for the previous year and create a database by hourly and daily price changes. 

#### METHOD: we aim to do this by extracting data based on the ranks of the cryptocurrency and also the daily and hourly change of the cryptocurrency. 

#### FINDING DATA: Following are the sources for finding and collecting our data. Sources: Cryptocurrencies: https://coinmarketcap.com/ 
CSV files: https://www.cryptodatadownload.com/data/binance/
APIs: https://financialmodelingprep.com/api/v3/ https://rest.coinapi.io/v1/

#### Data collection tools: Splinter APIs BeautifulSoup

#### DATA AND CLEANUP: 
##### Cleanup process: Pandas SqlAlchemy Database: PostgreSQL

##### TEAM MEMBERS Juan Castaneda Elif Evrim Polat Nichole Edet

<div><h3 style="color:green;">Web Scraping - Obtaining today's best cryptocurrencies liste at https://coinmarketcap.com </h3></div>

In [3]:
# Obtaining Tables at the next link using pandas
url = 'https://coinmarketcap.com/'

In [4]:
tables = pd.read_html(url)

In [5]:
df = tables[0]
df.head()

Unnamed: 0.1,Unnamed: 0,#,Name,Price,24h %,7d %,Market Cap,Volume(24h),Circulating Supply,Last 7 Days,Unnamed: 10
0,,1.0,Bitcoin1BTCBuy,"$62,999.18",0.15%,11.72%,"$1,176,996,381,873","$77,136,787,8871,224,409 BTC","18,682,725 BTC",,
1,,2.0,Ethereum2ETHBuy,"$2,434.61",6.46%,22.27%,"$281,126,610,234","$35,145,703,88114,435,859 ETH","115,470,846 ETH",,
2,,3.0,Binance Coin3BNBBuy,$541.39,3.70%,41.37%,"$83,662,903,213","$7,366,198,57313,606,021 BNB","154,532,785 BNB",,
3,,4.0,XRP4XRP,$1.80,2.23%,93.64%,"$81,903,181,840","$28,720,512,28115,921,566,572 XRP","45,404,028,640 XRP",,
4,,5.0,Cardano5ADA,$1.45,3.45%,21.66%,"$46,337,004,383","$9,549,138,2906,583,913,420 ADA","31,948,309,441 ADA",,


In [6]:
# using only the tables that will match our PostgreSQL database schema
columns = ['#', 'Name', 'Price']
cryptos_df = df[columns]
cryptos_df.head()

Unnamed: 0,#,Name,Price
0,1.0,Bitcoin1BTCBuy,"$62,999.18"
1,2.0,Ethereum2ETHBuy,"$2,434.61"
2,3.0,Binance Coin3BNBBuy,$541.39
3,4.0,XRP4XRP,$1.80
4,5.0,Cardano5ADA,$1.45


In [7]:
# converting # column values to integer
cryptos_df['#'] = cryptos_df['#'].astype(float).map("{:.0f}".format).copy()
cryptos_df.rename(columns={'#':'id','Name':'coin_name','Price':'latest_price'}, inplace=True)
# obtaiting only the first 10 coins
cryptos_df = cryptos_df[:10]

In [8]:
cryptos_df.head()

Unnamed: 0,id,coin_name,latest_price
0,1,Bitcoin1BTCBuy,"$62,999.18"
1,2,Ethereum2ETHBuy,"$2,434.61"
2,3,Binance Coin3BNBBuy,$541.39
3,4,XRP4XRP,$1.80
4,5,Cardano5ADA,$1.45


In [9]:
# Splitting Name column into 2 different columns which will containt the coin name and coin symbol using a number as a delimiter
# and using n=1 to do this only once, in case the coin name contains a number already
columnsplit = cryptos_df['coin_name'].str.split('(\d+)',n=1, expand=True)
columnsplit.head()

Unnamed: 0,0,1,2
0,Bitcoin,1,BTCBuy
1,Ethereum,2,ETHBuy
2,Binance Coin,3,BNBBuy
3,XRP,4,XRP
4,Cardano,5,ADA


###### Formatting table to match our postgreSQL table in our DB

In [10]:
columnsplit = cryptos_df['coin_name'].str.split('(\d+)',n=1, expand=True)
# since coin_symbol column originally contains a recommendation to "Buy" if applicable on coinmarketcap, we will get rid of that
# columnsplit[1] = columnsplit[1].str.replace('Buy', '', regex=True) by ignoring index 1
cryptos_df = cryptos_df.assign(coin_name=columnsplit[0],coin_symbol=columnsplit[2].str.replace('Buy',''))

In [11]:
cryptos_df

Unnamed: 0,id,coin_name,latest_price,coin_symbol
0,1,Bitcoin,"$62,999.18",BTC
1,2,Ethereum,"$2,434.61",ETH
2,3,Binance Coin,$541.39,BNB
3,4,XRP,$1.80,XRP
4,5,Cardano,$1.45,ADA
5,6,Tether,$1.00,USDT
6,7,Polkadot,$42.45,DOT
7,8,Uniswap,$36.11,UNI
8,9,Litecoin,$280.30,LTC
9,10,Chainlink,$40.23,LINK


In [12]:
# rearranging columns
columns = cryptos_df.columns.to_list()

In [13]:
# new order
columns = columns[:2] + [columns[-1]] + [columns[-2]]
columns

['id', 'coin_name', 'coin_symbol', 'latest_price']

In [14]:
cryptos_df = cryptos_df[columns]
cryptos_df

Unnamed: 0,id,coin_name,coin_symbol,latest_price
0,1,Bitcoin,BTC,"$62,999.18"
1,2,Ethereum,ETH,"$2,434.61"
2,3,Binance Coin,BNB,$541.39
3,4,XRP,XRP,$1.80
4,5,Cardano,ADA,$1.45
5,6,Tether,USDT,$1.00
6,7,Polkadot,DOT,$42.45
7,8,Uniswap,UNI,$36.11
8,9,Litecoin,LTC,$280.30
9,10,Chainlink,LINK,$40.23


<div><h3 style="color:green;">Dowloading the CSV files if available at https://www.cryptodatadownload.com/data/binance/</h3></div>

In [15]:
# Obtaining symbols in the dataframe and storing in a list
symbols = cryptos_df['coin_symbol'].to_list()
symbols

['BTC', 'ETH', 'BNB', 'XRP', 'ADA', 'USDT', 'DOT', 'UNI', 'LTC', 'LINK']

###### Web Scraping using beautiulsoup

In [16]:
# URL of page to be scraped
url = 'https://www.cryptodatadownload.com/data/binance/'

# instantiating the webdriver for Chrome!!!
browser.visit(url)
# Getting the webpage content
html = browser.html
# parsing our html plain text to a BS object
soup = BeautifulSoup(html, 'html.parser')
browser.quit()

In [21]:
prhs = soup.find_all('p')

In [33]:
bnb_cryptos = prhs[3]
for crypto in bnb_cryptos:
    print(crypto)



<br/>
<img height="16" src="/images/btc_logo_small.png" width="16,"/>
<b> BTC/USDT</b>
<a href="/cdd/Binance_BTCUSDT_d.csv"> [Daily]</a>
<a href="/cdd/Binance_BTCUSDT_1h.csv"> [Hourly]</a>
<a href="/cdd/Binance_BTCUSDT_minute.csv"> [Minute]</a>
 ... 
<a href="/analytics/var/BTC/">[Value at Risk]</a>


<br/>
<img height="16" src="/images/eth_logo_small.png" width="16,"/>
<b> ETH/USDT</b>
<a href="/cdd/Binance_ETHUSDT_d.csv"> [Daily]</a>
<a href="/cdd/Binance_ETHUSDT_1h.csv"> [Hourly]</a>
<a href="/cdd/Binance_ETHUSDT_minute.csv"> [Minute]</a>
 ... 
<a href="/analytics/var/ETH/">[Value at Risk]</a>


<br/>
<img height="16" src="/images/ltc_logo_small.png" width="16,"/>
<b> LTC/USDT</b>
<a href="/cdd/Binance_LTCUSDT_d.csv"> [Daily]</a>
<a href="/cdd/Binance_LTCUSDT_1h.csv"> [Hourly]</a>
<a href="/cdd/Binance_LTCUSDT_minute.csv"> [Minute]</a>
 ... 
<a href="/analytics/var/LTC/">[Value at Risk]</a>


<br/>
<img height="16" src="/images/neo_logo_small.png" width="16,"/>
<b> NEO/USDT</b>
<a