# Today's cryptocurrencies historical data on by Market Cap 

In [1]:
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'
import pendulum
from sqlalchemy import create_engine
from webdriver_manager.chrome import ChromeDriverManager
import numpy as np
from splinter import Browser
from bs4 import BeautifulSoup
import os
import requests
import json
from config import api_key, password

In [2]:
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

[WDM] - Current google-chrome version is 89.0.4389
[WDM] - Get LATEST driver version for 89.0.4389






[WDM] - Driver [C:\Users\jich-\.wdm\drivers\chromedriver\win32\89.0.4389.23\chromedriver.exe] found in cache


#### Context : Finance

#### Goal: Our goal is to create tables showing today’s 10 best/most profitable cryptocurrencies to invest in. We will look at historical data for the previous year and create a database by hourly and daily price changes. 

#### METHOD: we aim to do this by extracting data based on the ranks of the cryptocurrency and also the daily and hourly change of the cryptocurrency. 

#### FINDING DATA: Following are the sources for finding and collecting our data. Sources: Cryptocurrencies: https://coinmarketcap.com/ 
CSV files: https://www.cryptodatadownload.com/data/binance/
APIs: https://financialmodelingprep.com/api/v3/ https://rest.coinapi.io/v1/

#### Data collection tools: Splinter APIs BeautifulSoup

#### DATA AND CLEANUP: 
##### Cleanup process: Pandas SqlAlchemy Database: PostgreSQL

##### TEAM MEMBERS Juan Castaneda Elif Evrim Polat Nichole Edet

<div><h3 style="color:green;">Web Scraping - Obtaining today's best cryptocurrencies liste at https://coinmarketcap.com </h3></div>

In [3]:
# Obtaining Tables at the next link using pandas
url = 'https://coinmarketcap.com/'

In [4]:
tables = pd.read_html(url)

In [5]:
df = tables[0]
df.head()

Unnamed: 0.1,Unnamed: 0,#,Name,Price,24h %,7d %,Market Cap,Volume(24h),Circulating Supply,Last 7 Days,Unnamed: 10
0,,1.0,Bitcoin1BTCBuy,"$63,213.45",0.31%,8.92%,"$1,181,066,707,993","$60,987,276,316964,783 BTC","18,683,787 BTC",,
1,,2.0,Ethereum2ETHBuy,"$2,513.52",3.36%,21.31%,"$290,272,278,109","$32,302,269,09412,851,405 ETH","115,484,352 ETH",,
2,,3.0,Binance Coin3BNBBuy,$542.85,0.36%,29.86%,"$83,888,388,163","$4,640,116,6728,547,669 BNB","154,532,785 BNB",,
3,,4.0,XRP4XRP,$1.76,3.04%,63.98%,"$79,796,302,302","$15,477,944,5258,806,937,367 XRP","45,404,028,640 XRP",,
4,,5.0,Cardano5ADA,$1.48,1.67%,21.31%,"$47,187,312,138","$5,332,530,1883,610,405,357 ADA","31,948,309,441 ADA",,


In [6]:
# using only the tables that will match our PostgreSQL database schema
columns = ['#', 'Name', 'Price']
cryptos_df = df[columns]
cryptos_df.head()

Unnamed: 0,#,Name,Price
0,1.0,Bitcoin1BTCBuy,"$63,213.45"
1,2.0,Ethereum2ETHBuy,"$2,513.52"
2,3.0,Binance Coin3BNBBuy,$542.85
3,4.0,XRP4XRP,$1.76
4,5.0,Cardano5ADA,$1.48


In [7]:
# converting # column values to integer
cryptos_df['#'] = cryptos_df['#'].astype(float).map("{:.0f}".format).copy()
cryptos_df.rename(columns={'#':'id','Name':'coin_name','Price':'latest_price'}, inplace=True)
# obtaiting only the first 10 coins
cryptos_df = cryptos_df[:10]

In [8]:
cryptos_df['latest_price'] = cryptos_df['latest_price'].replace({'\$': '', ',': ''}, regex=True).astype(float)
cryptos_df.head()

Unnamed: 0,id,coin_name,latest_price
0,1,Bitcoin1BTCBuy,63213.45
1,2,Ethereum2ETHBuy,2513.52
2,3,Binance Coin3BNBBuy,542.85
3,4,XRP4XRP,1.76
4,5,Cardano5ADA,1.48


In [9]:
# Splitting Name column into 2 different columns which will containt the coin name and coin symbol using a number as a delimiter
# and using n=1 to do this only once, in case the coin name contains a number already
columnsplit = cryptos_df['coin_name'].str.split('(\d+)',n=1, expand=True)
columnsplit.head()

Unnamed: 0,0,1,2
0,Bitcoin,1,BTCBuy
1,Ethereum,2,ETHBuy
2,Binance Coin,3,BNBBuy
3,XRP,4,XRP
4,Cardano,5,ADA


###### Formatting table to match our postgreSQL table in our DB

In [10]:
columnsplit = cryptos_df['coin_name'].str.split('(\d+)',n=1, expand=True)
# since coin_symbol column originally contains a recommendation to "Buy" if applicable on coinmarketcap, we will get rid of that
# columnsplit[1] = columnsplit[1].str.replace('Buy', '', regex=True) by ignoring index 1
cryptos_df = cryptos_df.assign(coin_name=columnsplit[0],coin_symbol=columnsplit[2].str.replace('Buy',''))

In [11]:
cryptos_df

Unnamed: 0,id,coin_name,latest_price,coin_symbol
0,1,Bitcoin,63213.45,BTC
1,2,Ethereum,2513.52,ETH
2,3,Binance Coin,542.85,BNB
3,4,XRP,1.76,XRP
4,5,Cardano,1.48,ADA
5,6,Tether,1.0,USDT
6,7,Polkadot,43.57,DOT
7,8,Dogecoin,0.1798,DOGE
8,9,Uniswap,37.75,UNI
9,10,Litecoin,286.86,LTC


In [12]:
# rearranging columns
columns = cryptos_df.columns.to_list()

In [13]:
# new order
columns = columns[:2] + [columns[-1]] + [columns[-2]]
columns

['id', 'coin_name', 'coin_symbol', 'latest_price']

In [14]:
cryptos_df = cryptos_df[columns]
cryptos_df

Unnamed: 0,id,coin_name,coin_symbol,latest_price
0,1,Bitcoin,BTC,63213.45
1,2,Ethereum,ETH,2513.52
2,3,Binance Coin,BNB,542.85
3,4,XRP,XRP,1.76
4,5,Cardano,ADA,1.48
5,6,Tether,USDT,1.0
6,7,Polkadot,DOT,43.57
7,8,Dogecoin,DOGE,0.1798
8,9,Uniswap,UNI,37.75
9,10,Litecoin,LTC,286.86


<h3 style="color:orange;"> putting cryptos_df into coins table in pgadmin server</h3>

<h4 style="color:pink;"> Connecting to DB</h4>

In [34]:
connection_string = f"postgres:{password}@localhost:5433/cryptos_db"
engine = create_engine(f'postgresql://{connection_string}')

In [35]:
engine.table_names()

['coins', 'daily_Price', 'hourly_price']

In [36]:
cryptos_df.to_sql(name='coins', con=engine, if_exists='append', index=False)

In [37]:
# checking for changes in pgadmin cryoptos_db
pd.read_sql_query('select * from coins', con=engine)

Unnamed: 0,id,coin_name,coin_symbol,latest_price
0,1,Bitcoin,BTC,63213.45
1,2,Ethereum,ETH,2513.52
2,3,Binance Coin,BNB,542.85
3,4,XRP,XRP,1.76
4,5,Cardano,ADA,1.48
5,6,Tether,USDT,1.0
6,7,Polkadot,DOT,43.57
7,8,Dogecoin,DOGE,0.1798
8,9,Uniswap,UNI,37.75
9,10,Litecoin,LTC,286.86


<div><h3 style="color:green;">Dowloading the CSV files if available at https://www.cryptodatadownload.com/data/binance/</h3></div>

In [19]:
# Obtaining symbols in the dataframe and storing in a list
symbols = cryptos_df['coin_symbol'].to_list()
symbols

['BTC', 'ETH', 'BNB', 'XRP', 'ADA', 'USDT', 'DOT', 'DOGE', 'UNI', 'LTC']

###### Web Scraping using beautiulsoup

In [20]:
# URL of page to be scraped
url = 'https://www.cryptodatadownload.com/data/binance/'
# instantiating the webdriver for Chrome!!!
browser.visit(url)
# Getting the webpage content
html = browser.html
# parsing our html plain text to a BS object
soup = BeautifulSoup(html, 'html.parser')
browser.quit()

In [21]:
prhs = soup.find_all('p')

In [22]:
bnb_cryptos = prhs[3]
links = bnb_cryptos.find_all('a')
complete_links = ["https://www.cryptodatadownload.com" + link['href'] for link in links if "cdd" in link['href']]

In [23]:
complete_links

['https://www.cryptodatadownload.com/cdd/Binance_BTCUSDT_d.csv',
 'https://www.cryptodatadownload.com/cdd/Binance_BTCUSDT_1h.csv',
 'https://www.cryptodatadownload.com/cdd/Binance_BTCUSDT_minute.csv',
 'https://www.cryptodatadownload.com/cdd/Binance_ETHUSDT_d.csv',
 'https://www.cryptodatadownload.com/cdd/Binance_ETHUSDT_1h.csv',
 'https://www.cryptodatadownload.com/cdd/Binance_ETHUSDT_minute.csv',
 'https://www.cryptodatadownload.com/cdd/Binance_LTCUSDT_d.csv',
 'https://www.cryptodatadownload.com/cdd/Binance_LTCUSDT_1h.csv',
 'https://www.cryptodatadownload.com/cdd/Binance_LTCUSDT_minute.csv',
 'https://www.cryptodatadownload.com/cdd/Binance_NEOUSDT_d.csv',
 'https://www.cryptodatadownload.com/cdd/Binance_NEOUSDT_1h.csv',
 'https://www.cryptodatadownload.com/cdd/Binance_NEOUSDT_minute.csv',
 'https://www.cryptodatadownload.com/cdd/Binance_BNBUSDT_d.csv',
 'https://www.cryptodatadownload.com/cdd/Binance_BNBUSDT_1h.csv',
 'https://www.cryptodatadownload.com/cdd/Binance_BNBUSDT_minute.c

In [24]:
len(complete_links)

51

In [25]:
from fs.osfs import OSFS
with OSFS("./Resources") as myfs:
    if(not myfs.exists("datasets")):
        myfs.makedir("datasets")
        print("Created datasets folder")

In [26]:
crypto_data_found = []
for symbol in (symbols):
    for index,link in enumerate(complete_links):
        if symbol in link:
            if symbol == "USDT":
                crypto_data_found.append((symbol,False))
            else:
                crypto_data_found.append((symbol,True))
            break
        if index==len(complete_links)-1:
            crypto_data_found.append((symbol,False))

In [27]:
#when true it means the file is on https://www.cryptodatadownload.com/cdd/Binance, otherwise we have to use the API
crypto_data_found

[('BTC', True),
 ('ETH', True),
 ('BNB', True),
 ('XRP', True),
 ('ADA', True),
 ('USDT', False),
 ('DOT', False),
 ('DOGE', False),
 ('UNI', False),
 ('LTC', True)]

###### coinAPI setup and use

In [28]:
def get_historic_data(symbol,time_start,time_end,period="1DAY",limit=10000):
    url = f"https://rest.coinapi.io/v1/ohlcv/{symbol}/USD/history?period_id={period}&limit={limit}&time_start={time_start}&time_end={time_end}"
    headers = {"X-CoinAPI-Key" : api_key}
    response = requests.get(url, headers = headers)

    if(response.status_code == 429):
        # API responses exhausted
        return "Too many requests."
    historic_data = response.json()
    try:
        df = pd.DataFrame(historic_data)
        if period == "1DAY":
            df.to_csv(f"./Resources/datasets/{symbol}USDT_d.csv", index=False)
        elif period == "1HRS":
            df.to_csv(f"./Resources/datasets/{symbol}USDT_1h.csv", index=False)
    except Exception as e:
        print(e)

###### Obaiting CSV files

In [29]:
csv_download_links = []
for symbol,found in crypto_data_found:
    if found:
        csv_download_links.append(f"https://www.cryptodatadownload.com/cdd/Binance_{symbol}USDT_d.csv")
        csv_download_links.append(f"https://www.cryptodatadownload.com/cdd/Binance_{symbol}USDT_1h.csv")
    else:
        try:
            today = pendulum.now().format("YYYY-MM-DDTHH:mm:ss")
            year_ago= pendulum.now().subtract(years=1).format("YYYY-MM-DDTHH:mm:ss")
            get_historic_data(symbol,year_ago,today,period="1DAY")
            get_historic_data(symbol,year_ago,today,period="1HRS")
            print(f"Obtaining {symbol} historical data")
        except Exception as e:
            print(e)

Obtaining USDT historical data
Obtaining DOT historical data
Obtaining DOGE historical data
Obtaining UNI historical data


In [30]:
csv_download_links

['https://www.cryptodatadownload.com/cdd/Binance_BTCUSDT_d.csv',
 'https://www.cryptodatadownload.com/cdd/Binance_BTCUSDT_1h.csv',
 'https://www.cryptodatadownload.com/cdd/Binance_ETHUSDT_d.csv',
 'https://www.cryptodatadownload.com/cdd/Binance_ETHUSDT_1h.csv',
 'https://www.cryptodatadownload.com/cdd/Binance_BNBUSDT_d.csv',
 'https://www.cryptodatadownload.com/cdd/Binance_BNBUSDT_1h.csv',
 'https://www.cryptodatadownload.com/cdd/Binance_XRPUSDT_d.csv',
 'https://www.cryptodatadownload.com/cdd/Binance_XRPUSDT_1h.csv',
 'https://www.cryptodatadownload.com/cdd/Binance_ADAUSDT_d.csv',
 'https://www.cryptodatadownload.com/cdd/Binance_ADAUSDT_1h.csv',
 'https://www.cryptodatadownload.com/cdd/Binance_LTCUSDT_d.csv',
 'https://www.cryptodatadownload.com/cdd/Binance_LTCUSDT_1h.csv']

In [31]:
import urllib.request

for link in csv_download_links:
    try:
        file_name = (link.split('/')[-1]).replace("Binance_","") # removing Binance_ from the filename, to standarize the names
        urllib.request.urlretrieve(link, f"./Resources/datasets/{file_name}")
        print(link,"Downloaded")
    except Exception as e:
        print("can't download",link.split('/')[-1] )
        print(e)

https://www.cryptodatadownload.com/cdd/Binance_BTCUSDT_d.csv Downloaded
https://www.cryptodatadownload.com/cdd/Binance_BTCUSDT_1h.csv Downloaded
https://www.cryptodatadownload.com/cdd/Binance_ETHUSDT_d.csv Downloaded
https://www.cryptodatadownload.com/cdd/Binance_ETHUSDT_1h.csv Downloaded
https://www.cryptodatadownload.com/cdd/Binance_BNBUSDT_d.csv Downloaded
https://www.cryptodatadownload.com/cdd/Binance_BNBUSDT_1h.csv Downloaded
https://www.cryptodatadownload.com/cdd/Binance_XRPUSDT_d.csv Downloaded
https://www.cryptodatadownload.com/cdd/Binance_XRPUSDT_1h.csv Downloaded
https://www.cryptodatadownload.com/cdd/Binance_ADAUSDT_d.csv Downloaded
https://www.cryptodatadownload.com/cdd/Binance_ADAUSDT_1h.csv Downloaded
https://www.cryptodatadownload.com/cdd/Binance_LTCUSDT_d.csv Downloaded
https://www.cryptodatadownload.com/cdd/Binance_LTCUSDT_1h.csv Downloaded


In [32]:
# for coin in coins:
#     read = 