# Today's cryptocurrencies historical data

In [32]:
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'
import pendulum
from sqlalchemy import create_engine
import numpy as np
from splinter import Browser
from bs4 import BeautifulSoup
import os
import requests
import json

#### Context : Finance

#### Goal: Our goal is to create tables showing today’s 10 best/most profitable cryptocurrencies to invest in. We will look at historical data for the previous year and create a database by hourly and daily price changes. 

#### METHOD: we aim to do this by extracting data based on the ranks of the cryptocurrency and also the daily and hourly change of the cryptocurrency. 

#### FINDING DATA: Following are the sources for finding and collecting our data. Sources: Cryptocurrencies: https://coinmarketcap.com/ 
CSV files: https://www.cryptodatadownload.com/data/binance/
APIs: https://financialmodelingprep.com/api/v3/ https://rest.coinapi.io/v1/

#### Data collection tools: Splinter APIs BeautifulSoup

#### DATA AND CLEANUP: 
##### Cleanup process: Pandas SqlAlchemy Database: PostgreSQL

##### TEAM MEMBERS Juan Castaneda Elif Evrim Polat Nichole Edet

<div><h3 style="color:green;">Web Scraping - Obtaining today's best cryptocurrencies liste at https://coinmarketcap.com </h3></div>

In [2]:
# Obtaining Tables at the next link using pandas
url = 'https://coinmarketcap.com/'

In [3]:
tables = pd.read_html(url)

In [4]:
df = tables[0]
df.head()

Unnamed: 0.1,Unnamed: 0,#,Name,Price,24h %,7d %,Market Cap,Volume(24h),Circulating Supply,Last 7 Days,Unnamed: 10
0,,1.0,Bitcoin1BTCBuy,"$62,266.15",0.76%,11.28%,"$1,163,293,150,987","$75,571,480,7971,213,685 BTC","18,682,593 BTC",,
1,,2.0,Ethereum2ETHBuy,"$2,339.94",3.15%,19.13%,"$270,189,359,555","$33,419,839,83014,282,366 ETH","115,468,639 ETH",,
2,,3.0,Binance Coin3BNBBuy,$537.32,2.42%,44.90%,"$83,033,852,574","$7,296,430,38813,579,253 BNB","154,532,785 BNB",,
3,,4.0,XRP4XRP,$1.71,3.64%,88.10%,"$77,713,921,716","$28,869,807,54116,867,062,419 XRP","45,404,028,640 XRP",,
4,,5.0,Tether5USDTBuy,$1.00,0.05%,0.03%,"$45,920,382,502","$165,845,034,969165,611,609,084 USDT","45,855,749,841 USDT",,


In [39]:
# using only the tables that will match our PostgreSQL database schema
columns = ['#', 'Name', 'Price']
cryptos_df = df[columns]
cryptos_df.head()

Unnamed: 0,#,Name,Price
0,1.0,Bitcoin1BTCBuy,"$62,266.15"
1,2.0,Ethereum2ETHBuy,"$2,339.94"
2,3.0,Binance Coin3BNBBuy,$537.32
3,4.0,XRP4XRP,$1.71
4,5.0,Tether5USDTBuy,$1.00


In [71]:
# converting # column values to integer
cryptos_df['#'] = cryptos_df['#'].astype(float).map("{:.0f}".format).copy()

In [72]:
cryptos_df.head()

Unnamed: 0,#,Name,Price
0,1,Bitcoin1BTCBuy,"$62,266.15"
1,2,Ethereum2ETHBuy,"$2,339.94"
2,3,Binance Coin3BNBBuy,$537.32
3,4,XRP4XRP,$1.71
4,5,Tether5USDTBuy,$1.00


In [76]:
# Splitting Name column into 2 different columns which will containt the coin name and coin symbol
columnsplit = cryptos_df['Name'].str.split('(\d+)',n=1, expand=True)
columnsplit = columnsplit.drop(columns=1).rename({0:"coin_name", 2:"coin_symbol"}, axis=1)
columnsplit['coin_symbol'] = columnsplit['coin_symbol'].str.replace('Buy', '', regex=True)
columnsplit.head()

Unnamed: 0,coin_name,coin_symbol
0,Bitcoin,BTC
1,Ethereum,ETH
2,Binance Coin,BNB
3,XRP,XRP
4,Tether,USDT
