# Today's cryptocurrencies historical data

In [1]:
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'
import pendulum
from sqlalchemy import create_engine
import numpy as np
from splinter import Browser
from bs4 import BeautifulSoup
import os
import requests
import json

#### Context : Finance

#### Goal: Our goal is to create tables showing today’s 10 best/most profitable cryptocurrencies to invest in. We will look at historical data for the previous year and create a database by hourly and daily price changes. 

#### METHOD: we aim to do this by extracting data based on the ranks of the cryptocurrency and also the daily and hourly change of the cryptocurrency. 

#### FINDING DATA: Following are the sources for finding and collecting our data. Sources: Cryptocurrencies: https://coinmarketcap.com/ 
CSV files: https://www.cryptodatadownload.com/data/binance/
APIs: https://financialmodelingprep.com/api/v3/ https://rest.coinapi.io/v1/

#### Data collection tools: Splinter APIs BeautifulSoup

#### DATA AND CLEANUP: 
##### Cleanup process: Pandas SqlAlchemy Database: PostgreSQL

##### TEAM MEMBERS Juan Castaneda Elif Evrim Polat Nichole Edet

<div><h3 style="color:green;">Web Scraping - Obtaining today's best cryptocurrencies liste at https://coinmarketcap.com </h3></div>

In [2]:
# Obtaining Tables at the next link using pandas
url = 'https://coinmarketcap.com/'

In [3]:
tables = pd.read_html(url)

In [4]:
df = tables[0]
df.head()

Unnamed: 0.1,Unnamed: 0,#,Name,Price,24h %,7d %,Market Cap,Volume(24h),Circulating Supply,Last 7 Days,Unnamed: 10
0,,1.0,Bitcoin1BTCBuy,"$62,258.33",0.94%,11.22%,"$1,163,152,202,111","$75,709,967,3161,216,062 BTC","18,682,675 BTC",,
1,,2.0,Ethereum2ETHBuy,"$2,351.83",3.00%,19.10%,"$271,565,043,644","$33,474,332,22014,233,318 ETH","115,469,714 ETH",,
2,,3.0,Binance Coin3BNBBuy,$539.11,2.97%,45.00%,"$83,310,372,325","$7,320,010,21213,577,920 BNB","154,532,785 BNB",,
3,,4.0,XRP4XRP,$1.74,3.35%,89.96%,"$78,959,453,845","$28,771,145,44616,544,262,254 XRP","45,404,028,640 XRP",,
4,,5.0,Tether5USDTBuy,$1.00,0.14%,0.05%,"$45,923,672,909","$166,444,451,962166,198,273,532 USDT","45,855,749,841 USDT",,


In [5]:
# using only the tables that will match our PostgreSQL database schema
columns = ['#', 'Name', 'Price']
cryptos_df = df[columns]
cryptos_df.head()

Unnamed: 0,#,Name,Price
0,1.0,Bitcoin1BTCBuy,"$62,258.33"
1,2.0,Ethereum2ETHBuy,"$2,351.83"
2,3.0,Binance Coin3BNBBuy,$539.11
3,4.0,XRP4XRP,$1.74
4,5.0,Tether5USDTBuy,$1.00


In [6]:
# converting # column values to integer
cryptos_df['#'] = cryptos_df['#'].astype(float).map("{:.0f}".format).copy()

In [7]:
cryptos_df.head()

Unnamed: 0,#,Name,Price
0,1,Bitcoin1BTCBuy,"$62,258.33"
1,2,Ethereum2ETHBuy,"$2,351.83"
2,3,Binance Coin3BNBBuy,$539.11
3,4,XRP4XRP,$1.74
4,5,Tether5USDTBuy,$1.00


In [26]:
# Splitting Name column into 2 different columns which will containt the coin name and coin symbol using a number as a delimiter
# and using n=1 to do this only once, in case the coin name contains a number already
columnsplit = cryptos_df['Name'].str.split('(\d+)',n=1, expand=True)
# Dropping column with index 1(which contain the digit) and immediately renaming the columns
columnsplit = columnsplit.drop(columns=1).rename({0:"coin_name", 2:"coin_symbol"}, axis=1)
# since coin_symbol column originally contains a recommendation to "Buy" if applicable on coinmarketcap, we will get rid of that
columnsplit['coin_symbol'] = columnsplit['coin_symbol'].str.replace('Buy', '', regex=True)
columnsplit.head()

Unnamed: 0,coin_name,coin_symbol
0,Bitcoin,BTC
1,Ethereum,ETH
2,Binance Coin,BNB
3,XRP,XRP
4,Tether,USDT
