# Today's cryptocurrencies historical data

In [1]:
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'
import pendulum
from sqlalchemy import create_engine
import numpy as np
from splinter import Browser
from bs4 import BeautifulSoup
import os
import requests
import json

#### Context : Finance

#### Goal: Our goal is to create tables showing today’s 10 best/most profitable cryptocurrencies to invest in. We will look at historical data for the previous year and create a database by hourly and daily price changes. 

#### METHOD: we aim to do this by extracting data based on the ranks of the cryptocurrency and also the daily and hourly change of the cryptocurrency. 

#### FINDING DATA: Following are the sources for finding and collecting our data. Sources: Cryptocurrencies: https://coinmarketcap.com/ 
CSV files: https://www.cryptodatadownload.com/data/binance/
APIs: https://financialmodelingprep.com/api/v3/ https://rest.coinapi.io/v1/

#### Data collection tools: Splinter APIs BeautifulSoup

#### DATA AND CLEANUP: 
##### Cleanup process: Pandas SqlAlchemy Database: PostgreSQL

##### TEAM MEMBERS Juan Castaneda Elif Evrim Polat Nichole Edet

<div><h3 style="color:green;">Web Scraping - Obtaining today's best cryptocurrencies liste at https://coinmarketcap.com </h3></div>

In [2]:
# Obtaining Tables at the next link using pandas
url = 'https://coinmarketcap.com/'

In [3]:
tables = pd.read_html(url)

In [4]:
df = tables[0]
df.head()

Unnamed: 0.1,Unnamed: 0,#,Name,Price,24h %,7d %,Market Cap,Volume(24h),Circulating Supply,Last 7 Days,Unnamed: 10
0,,1.0,Bitcoin1BTCBuy,"$62,746.67",0.57%,12.28%,"$1,172,275,567,935","$76,419,960,4031,217,913 BTC","18,682,675 BTC",,
1,,2.0,Ethereum2ETHBuy,"$2,398.12",4.48%,21.87%,"$276,910,190,566","$34,052,526,63414,199,678 ETH","115,469,714 ETH",,
2,,3.0,Binance Coin3BNBBuy,$540.95,3.35%,45.94%,"$83,594,343,791","$7,354,112,35913,594,837 BNB","154,532,785 BNB",,
3,,4.0,XRP4XRP,$1.77,2.65%,99.35%,"$80,583,322,578","$29,125,065,50216,410,285,229 XRP","45,404,028,640 XRP",,
4,,5.0,Cardano5ADA,$1.45,2.95%,23.67%,"$46,416,973,080","$9,470,753,7416,518,619,184 ADA","31,948,309,441 ADA",,


In [5]:
# using only the tables that will match our PostgreSQL database schema
columns = ['#', 'Name', 'Price']
cryptos_df = df[columns]
cryptos_df.head()

Unnamed: 0,#,Name,Price
0,1.0,Bitcoin1BTCBuy,"$62,746.67"
1,2.0,Ethereum2ETHBuy,"$2,398.12"
2,3.0,Binance Coin3BNBBuy,$540.95
3,4.0,XRP4XRP,$1.77
4,5.0,Cardano5ADA,$1.45


In [6]:
# converting # column values to integer
cryptos_df['#'] = cryptos_df['#'].astype(float).map("{:.0f}".format).copy()
cryptos_df.rename(columns={'#':'id','Name':'coin_name','Price':'latest_price'}, inplace=True)
# obtaiting only the first 10 coins
cryptos_df = cryptos_df[:10]

In [7]:
cryptos_df.head()

Unnamed: 0,id,coin_name,latest_price
0,1,Bitcoin1BTCBuy,"$62,746.67"
1,2,Ethereum2ETHBuy,"$2,398.12"
2,3,Binance Coin3BNBBuy,$540.95
3,4,XRP4XRP,$1.77
4,5,Cardano5ADA,$1.45


In [8]:
# Splitting Name column into 2 different columns which will containt the coin name and coin symbol using a number as a delimiter
# and using n=1 to do this only once, in case the coin name contains a number already
columnsplit = cryptos_df['coin_name'].str.split('(\d+)',n=1, expand=True)
columnsplit.head()

Unnamed: 0,0,1,2
0,Bitcoin,1,BTCBuy
1,Ethereum,2,ETHBuy
2,Binance Coin,3,BNBBuy
3,XRP,4,XRP
4,Cardano,5,ADA


In [9]:
columnsplit = cryptos_df['coin_name'].str.split('(\d+)',n=1, expand=True)
# since coin_symbol column originally contains a recommendation to "Buy" if applicable on coinmarketcap, we will get rid of that
# columnsplit[1] = columnsplit[1].str.replace('Buy', '', regex=True) by ignoring index 1
cryptos_df = cryptos_df.assign(coin_name=columnsplit[0],coin_symbol=columnsplit[2].str.replace('Buy',''))

In [10]:
cryptos_df

Unnamed: 0,id,coin_name,latest_price,coin_symbol
0,1,Bitcoin,"$62,746.67",BTC
1,2,Ethereum,"$2,398.12",ETH
2,3,Binance Coin,$540.95,BNB
3,4,XRP,$1.77,XRP
4,5,Cardano,$1.45,ADA
5,6,Tether,$1.00,USDT
6,7,Polkadot,$41.89,DOT
7,8,Uniswap,$35.10,UNI
8,9,Litecoin,$269.67,LTC
9,10,Chainlink,$39.92,LINK


In [11]:
# rearranging columns
columns = cryptos_df.columns.to_list()

In [12]:
# new order
columns = columns[:2] + [columns[-1]] + [columns[-2]]
columns

['id', 'coin_name', 'coin_symbol', 'latest_price']

In [14]:
cryptos_df = cryptos_df[columns]
cryptos_df

Unnamed: 0,id,coin_name,coin_symbol,latest_price
0,1,Bitcoin,BTC,"$62,746.67"
1,2,Ethereum,ETH,"$2,398.12"
2,3,Binance Coin,BNB,$540.95
3,4,XRP,XRP,$1.77
4,5,Cardano,ADA,$1.45
5,6,Tether,USDT,$1.00
6,7,Polkadot,DOT,$41.89
7,8,Uniswap,UNI,$35.10
8,9,Litecoin,LTC,$269.67
9,10,Chainlink,LINK,$39.92
