# Today's cryptocurrencies historical data

In [1]:
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'
import pendulum
from sqlalchemy import create_engine
import numpy as np
from splinter import Browser
from bs4 import BeautifulSoup
import os
import requests
import json

#### Context : Finance

#### Goal: Our goal is to create tables showing today’s 10 best/most profitable cryptocurrencies to invest in. We will look at historical data for the previous year and create a database by hourly and daily price changes. 

#### METHOD: we aim to do this by extracting data based on the ranks of the cryptocurrency and also the daily and hourly change of the cryptocurrency. 

#### FINDING DATA: Following are the sources for finding and collecting our data. Sources: Cryptocurrencies: https://coinmarketcap.com/ 
CSV files: https://www.cryptodatadownload.com/data/binance/
APIs: https://financialmodelingprep.com/api/v3/ https://rest.coinapi.io/v1/

#### Data collection tools: Splinter APIs BeautifulSoup

#### DATA AND CLEANUP: 
##### Cleanup process: Pandas SqlAlchemy Database: PostgreSQL

##### TEAM MEMBERS Juan Castaneda Elif Evrim Polat Nichole Edet

<div><h3 style="color:green;">Web Scraping - Obtaining today's best cryptocurrencies liste at https://coinmarketcap.com </h3></div>

In [2]:
# Obtaining Tables at the next link using pandas
url = 'https://coinmarketcap.com/'

In [3]:
tables = pd.read_html(url)

In [4]:
df = tables[0]
df.head()

Unnamed: 0.1,Unnamed: 0,#,Name,Price,24h %,7d %,Market Cap,Volume(24h),Circulating Supply,Last 7 Days,Unnamed: 10
0,,1.0,Bitcoin1BTCBuy,"$62,699.82",0.68%,11.49%,"$1,171,400,299,038","$76,118,408,4241,214,013 BTC","18,682,675 BTC",,
1,,2.0,Ethereum2ETHBuy,"$2,386.04",3.59%,19.98%,"$275,515,047,230","$33,799,970,80914,165,734 ETH","115,469,714 ETH",,
2,,3.0,Binance Coin3BNBBuy,$542.95,3.23%,44.05%,"$83,903,032,672","$7,363,376,43113,561,882 BNB","154,532,785 BNB",,
3,,4.0,XRP4XRP,$1.78,3.59%,94.39%,"$80,644,705,375","$29,024,453,90816,341,148,875 XRP","45,404,028,640 XRP",,
4,,5.0,Tether5USDTBuy,$1.00,0.19%,0.09%,"$45,950,500,055","$167,842,706,526167,496,613,833 USDT","45,855,749,841 USDT",,


In [5]:
# using only the tables that will match our PostgreSQL database schema
columns = ['#', 'Name', 'Price']
cryptos_df = df[columns]
cryptos_df.head()

Unnamed: 0,#,Name,Price
0,1.0,Bitcoin1BTCBuy,"$62,699.82"
1,2.0,Ethereum2ETHBuy,"$2,386.04"
2,3.0,Binance Coin3BNBBuy,$542.95
3,4.0,XRP4XRP,$1.78
4,5.0,Tether5USDTBuy,$1.00


In [6]:
# converting # column values to integer
cryptos_df['#'] = cryptos_df['#'].astype(float).map("{:.0f}".format).copy()
# cryptos_df.rename(colums={'#':'id',})

In [7]:
cryptos_df.head()

Unnamed: 0,#,Name,Price
0,1,Bitcoin1BTCBuy,"$62,699.82"
1,2,Ethereum2ETHBuy,"$2,386.04"
2,3,Binance Coin3BNBBuy,$542.95
3,4,XRP4XRP,$1.78
4,5,Tether5USDTBuy,$1.00


In [8]:
# Splitting Name column into 2 different columns which will containt the coin name and coin symbol using a number as a delimiter
# and using n=1 to do this only once, in case the coin name contains a number already
columnsplit = cryptos_df['Name'].str.split('(\d+)',n=1, expand=True)
columnsplit.head()

Unnamed: 0,0,1,2
0,Bitcoin,1,BTCBuy
1,Ethereum,2,ETHBuy
2,Binance Coin,3,BNBBuy
3,XRP,4,XRP
4,Tether,5,USDTBuy


In [9]:
columnsplit = cryptos_df['Name'].str.split('(\d+)',n=1, expand=True)
# since coin_symbol column originally contains a recommendation to "Buy" if applicable on coinmarketcap, we will get rid of that
# columnsplit[1] = columnsplit[1].str.replace('Buy', '', regex=True) by ignoring index 1
cryptos_df = cryptos_df.assign(coin_name=columnsplit[0],coin_symbol=columnsplit[2].str.replace('Buy',''))

In [10]:
cryptos_df

Unnamed: 0,#,Name,Price,coin_name,coin_symbol
0,1,Bitcoin1BTCBuy,"$62,699.82",Bitcoin,BTC
1,2,Ethereum2ETHBuy,"$2,386.04",Ethereum,ETH
2,3,Binance Coin3BNBBuy,$542.95,Binance Coin,BNB
3,4,XRP4XRP,$1.78,XRP,XRP
4,5,Tether5USDTBuy,$1.00,Tether,USDT
...,...,...,...,...,...
95,,SwissBorgCHSB,$0.97,SwissBorgCHSB,
96,,WazirXWRX,$3.99,WazirXWRX,
97,,BTMXBTMX,$1.26,BTMXBTMX,
98,,Paxos StandardPAX,$1.00,Paxos StandardPAX,
