# CMC Data Collection

In [2]:
import pandas as pd
import os
import datetime as datetime

from requests import Request, Session
from requests.exceptions import ConnectionError, Timeout, TooManyRedirects
import json

**CMC API DATA**

In [3]:
api_key = os.environ.get("MY_API_KEY")
api_key = "d46ce64b-a724-4e6e-a7f7-4d4a704093ac"

In [4]:
def cmc_api(url):
    parameters = {
      'start':'1',
      'limit':'5000',
      'convert':'USD'
    }
    headers = {
      'Accepts': 'application/json',
      'X-CMC_PRO_API_KEY': api_key,
    }

    session = Session()
    session.headers.update(headers)

    try:
        response = session.get(url, params=parameters)
        data = json.loads(response.text)
    except (ConnectionError, Timeout, TooManyRedirects) as e:
        print(e)
        
    # normalize json file into pd
    df = pd.json_normalize(data['data'])

    # put data into df
    df['timestamp'] = pd.to_datetime('now')

    return df

In [5]:
# top 5000 coins listed  on cmc
latest_listings = cmc_api('https://pro-api.coinmarketcap.com/v1/cryptocurrency/listings/latest')

In [6]:
latest_listings.columns

Index(['id', 'name', 'symbol', 'slug', 'num_market_pairs', 'date_added',
       'tags', 'max_supply', 'circulating_supply', 'total_supply', 'platform',
       'cmc_rank', 'self_reported_circulating_supply',
       'self_reported_market_cap', 'tvl_ratio', 'last_updated',
       'quote.USD.price', 'quote.USD.volume_24h',
       'quote.USD.volume_change_24h', 'quote.USD.percent_change_1h',
       'quote.USD.percent_change_24h', 'quote.USD.percent_change_7d',
       'quote.USD.percent_change_30d', 'quote.USD.percent_change_60d',
       'quote.USD.percent_change_90d', 'quote.USD.market_cap',
       'quote.USD.market_cap_dominance', 'quote.USD.fully_diluted_market_cap',
       'quote.USD.tvl', 'quote.USD.last_updated', 'platform.id',
       'platform.name', 'platform.symbol', 'platform.slug',
       'platform.token_address', 'timestamp'],
      dtype='object')

In [7]:
pd.set_option('display.max_columns', 5000)
pd.set_option('display.max_rows', 5000)
latest_listings.head(1)

Unnamed: 0,id,name,symbol,slug,num_market_pairs,date_added,tags,max_supply,circulating_supply,total_supply,platform,cmc_rank,self_reported_circulating_supply,self_reported_market_cap,tvl_ratio,last_updated,quote.USD.price,quote.USD.volume_24h,quote.USD.volume_change_24h,quote.USD.percent_change_1h,quote.USD.percent_change_24h,quote.USD.percent_change_7d,quote.USD.percent_change_30d,quote.USD.percent_change_60d,quote.USD.percent_change_90d,quote.USD.market_cap,quote.USD.market_cap_dominance,quote.USD.fully_diluted_market_cap,quote.USD.tvl,quote.USD.last_updated,platform.id,platform.name,platform.symbol,platform.slug,platform.token_address,timestamp
0,1,Bitcoin,BTC,bitcoin,9722,2013-04-28T00:00:00.000Z,"[mineable, pow, sha-256, store-of-value, state...",21000000.0,19142068.0,19142068.0,,1,,,,2022-09-05T13:05:00.000Z,19736.035626,27577270000.0,14.2138,0.129165,-0.684142,-0.920533,-14.892028,-3.408092,-33.081996,377788500000.0,38.7344,414456700000.0,,2022-09-05T13:05:00.000Z,,,,,,2022-09-05 13:08:29.576031


In [8]:
useful_columns = ['id', 'name', 'symbol', 'slug', 'num_market_pairs', 'date_added',
       'max_supply', 'circulating_supply', 'total_supply','cmc_rank', 'last_updated',
       'quote.USD.price', 'quote.USD.volume_24h','quote.USD.volume_change_24h', 
       'quote.USD.percent_change_1h', 'quote.USD.percent_change_24h', 'quote.USD.percent_change_7d',
       'quote.USD.percent_change_30d', 'quote.USD.percent_change_60d','quote.USD.percent_change_90d', 
       'quote.USD.market_cap', 'quote.USD.market_cap_dominance', 'quote.USD.fully_diluted_market_cap', 'timestamp']

In [9]:
df = latest_listings[useful_columns]

In [10]:
# rename columns
for col in useful_columns:
    if 'quote' in col:
        new_col = col.split('.')[-1]
        df = df.rename(columns={col:new_col})

In [11]:
df.columns

Index(['id', 'name', 'symbol', 'slug', 'num_market_pairs', 'date_added',
       'max_supply', 'circulating_supply', 'total_supply', 'cmc_rank',
       'last_updated', 'price', 'volume_24h', 'volume_change_24h',
       'percent_change_1h', 'percent_change_24h', 'percent_change_7d',
       'percent_change_30d', 'percent_change_60d', 'percent_change_90d',
       'market_cap', 'market_cap_dominance', 'fully_diluted_market_cap',
       'timestamp'],
      dtype='object')

In [12]:
df.to_parquet('data/output.parquet')

**MORE CMC DATA USING BS4**

In [13]:
from bs4 import BeautifulSoup
import requests

In [14]:
# trending (30 coins)
url = 'https://coinmarketcap.com/trending-cryptocurrencies/'
website = requests.get(url).content
soup = BeautifulSoup(website, 'lxml')

trending = []
ranks = list(range(1,31))

for coin in soup.find_all(class_='sc-1eb5slv-0 gGIpIK coin-item-symbol'):
    trending.append(coin.text)

zip_list = zip(ranks, trending)
trending_coins = list(zip_list)
trending_coins = pd.DataFrame(trending_coins, columns=['rank', 'symbol'])
trending_coins.to_parquet('data/trending.parquet')

## Transformations using Spark

In [94]:
# run it on azure but for now run it locally
import sys

# have my file path set to python not anaconda
os.environ['PYSPARK_PYTHON'] = sys.executable
os.environ['PYSPARK_DRIVER_PYTHON'] = sys.executable
os.path.dirname(sys.executable)

'C:\\Users\\Abdulkadir\\anaconda3'

In [95]:
import pyspark
from pyspark.sql import SparkSession

In [96]:
spark = SparkSession.builder \
.master("local[*]") \
.appName('project').getOrCreate()

In [97]:
df = spark.read.parquet('data/output.parquet', header=True, inferSchema=True)

In [98]:
df.printSchema()

root
 |-- id: long (nullable = true)
 |-- name: string (nullable = true)
 |-- symbol: string (nullable = true)
 |-- slug: string (nullable = true)
 |-- num_market_pairs: long (nullable = true)
 |-- date_added: string (nullable = true)
 |-- max_supply: double (nullable = true)
 |-- circulating_supply: double (nullable = true)
 |-- total_supply: double (nullable = true)
 |-- cmc_rank: long (nullable = true)
 |-- last_updated: string (nullable = true)
 |-- price: double (nullable = true)
 |-- volume_24h: double (nullable = true)
 |-- volume_change_24h: double (nullable = true)
 |-- percent_change_1h: double (nullable = true)
 |-- percent_change_24h: double (nullable = true)
 |-- percent_change_7d: double (nullable = true)
 |-- percent_change_30d: double (nullable = true)
 |-- percent_change_60d: double (nullable = true)
 |-- percent_change_90d: double (nullable = true)
 |-- market_cap: double (nullable = true)
 |-- market_cap_dominance: double (nullable = true)
 |-- fully_diluted_market_c

In [99]:
df = df.repartition(24)
df.write.parquet('data/raw/', mode='overwrite')

In [100]:
df = spark.read.parquet('raw/*', header=True, inferSchema=True)
df.createOrReplaceTempView('cmc_data')

### TOP RANKED COINS

In [117]:
spark.sql(
"""
    SELECT 
        symbol,
        name, 
        market_cap
    FROM 
        cmc_data
    ORDER BY
        cmc_rank 
    """
).show()

+------+----------------+--------------------+
|symbol|            name|          market_cap|
+------+----------------+--------------------+
|   BTC|         Bitcoin|3.805906364441949...|
|   ETH|        Ethereum|1.914939002048660...|
|  USDT|          Tether|6.755486005050185E10|
|  USDC|        USD Coin|5.187410094306044E10|
|   BNB|             BNB|4.482078903879888E10|
|  BUSD|     Binance USD|1.943410062155310...|
|   ADA|         Cardano|1.709537568886378...|
|   XRP|             XRP|1.642197102232885...|
|   SOL|          Solana|1.109399510106651...|
|  DOGE|        Dogecoin| 8.378473784006645E9|
|   DOT|        Polkadot| 8.096435092846563E9|
| MATIC|         Polygon| 7.768500583922788E9|
|  SHIB|       Shiba Inu| 7.018334441082997E9|
|   DAI|             Dai| 6.906040799139284E9|
|   TRX|            TRON|  5.86477209162805E9|
|  AVAX|       Avalanche| 5.582471428717998E9|
|   LEO|    UNUS SED LEO| 4.941050865668064E9|
|  WBTC| Wrapped Bitcoin| 4.931017581407625E9|
|   UNI|     

### PRICE FLUCTUATION OF TOP CRYPTOS

In [128]:
spark.sql(
"""
    SELECT 
        symbol,
        name, 
        price,
        percent_change_1h,
        percent_change_24h,
        percent_change_7d,
        percent_change_30d,
        percent_change_60d,
        percent_change_90d
    FROM 
        cmc_data
    ORDER BY
        cmc_rank 
    """
)

DataFrame[symbol: string, name: string, price: double, percent_change_1h: double, percent_change_24h: double, percent_change_7d: double, percent_change_30d: double, percent_change_60d: double, percent_change_90d: double]

### BEST PERFORMING COINS IN THE LAST 90 DAYS WITH AT LEAST 1B MC

In [132]:
spark.sql(
"""
    SELECT 
        symbol,
        name, 
        price,
        percent_change_90d
    FROM 
        cmc_data
    WHERE 
        market_cap > 1000000000
    ORDER BY
        percent_change_90d DESC 
    """
).show()

+------+----------------+--------------------+------------------+
|symbol|            name|               price|percent_change_90d|
+------+----------------+--------------------+------------------+
|  LUNC|   Terra Classic|2.399075135355797...|      194.88914734|
|   CHZ|          Chiliz|  0.2118642317375147|       65.84961719|
|   ETC|Ethereum Classic|   32.34002726370627|       46.02605671|
|   QNT|           Quant|   95.35640934564094|       44.10979602|
| MATIC|         Polygon|   0.894769523419626|       43.42803672|
|  ATOM|          Cosmos|  12.788097843341825|       37.06566737|
|   UNI|         Uniswap|   6.471736767635462|       22.50008665|
|  SHIB|       Shiba Inu|1.278237811759426...|        17.1303943|
|   EOS|             EOS|  1.4860804504613887|       14.90514186|
|   TON|         Toncoin|   1.473846480861599|       13.31421436|
|   BIT|          BitDAO|   0.630423018370809|        9.68925969|
|   LEO|    UNUS SED LEO|   5.179547643101104|        2.92137599|
|  YOUC|  

### PRICE

In [135]:
spark.sql(
"""
    SELECT 
        symbol,
        name, 
        price
    FROM 
        cmc_data
    ORDER BY
        cmc_rank
    """
).show()

+------+----------------+--------------------+
|symbol|            name|               price|
+------+----------------+--------------------+
|   BTC|         Bitcoin|  19883.185575976662|
|   ETH|        Ethereum|  1566.5482625927207|
|  USDT|          Tether|  1.0000659647715846|
|  USDC|        USD Coin|  0.9999594030196143|
|   BNB|             BNB|   277.8080446884254|
|  BUSD|     Binance USD|  1.0000451004998463|
|   ADA|         Cardano|  0.5001273654825863|
|   XRP|             XRP|  0.3307780718316204|
|   SOL|          Solana|   31.71163822893916|
|  DOGE|        Dogecoin| 0.06315237443772934|
|   DOT|        Polkadot|   7.270472559852171|
| MATIC|         Polygon|   0.894769523419626|
|  SHIB|       Shiba Inu|1.278237811759426...|
|   DAI|             Dai|  1.0000863568350948|
|   TRX|            TRON| 0.06348611107785139|
|  AVAX|       Avalanche|  18.956469819450298|
|   LEO|    UNUS SED LEO|   5.179547643101104|
|  WBTC| Wrapped Bitcoin|  19878.358290412696|
|   UNI|     

### TOP GAINERS (Vol > 50000USD & Rank < 100) ON CMC

In [146]:
spark.sql(
"""
    SELECT 
        symbol,
        name, 
        percent_change_24h
    FROM 
        cmc_data
    WHERE 
        volume_24h > 50000 AND cmc_rank <= 100
    ORDER BY
        percent_change_24h DESC
    """
).show()

+------+--------------------+------------------+
|symbol|                name|percent_change_24h|
+------+--------------------+------------------+
|   RVN|           Ravencoin|        28.9196728|
|  ANKR|                Ankr|        5.48881997|
|  LINK|           Chainlink|        5.22336399|
|   KSM|              Kusama|        4.97553894|
|  SAND|         The Sandbox|        4.16581293|
|  SHIB|           Shiba Inu|        4.08841562|
|   VET|             VeChain|        3.95098481|
| MIOTA|                IOTA|        3.81880969|
|   LRC|            Loopring|        3.72841622|
|   ADA|             Cardano|        3.65592751|
|   BAT|Basic Attention T...|        3.56960804|
|   NEO|                 Neo|        3.40138545|
|   GMT|               STEPN|        3.18055867|
|  MANA|        Decentraland|        3.17400425|
|   HOT|                Holo|        2.82774424|
|   ENJ|          Enjin Coin|         2.7368382|
|   UNI|             Uniswap|        2.66229856|
|   QNT|            

### TOP LOSERS (Vol > 50000USD & Rank < 100) ON CMC


In [145]:
spark.sql(
"""
    SELECT 
        symbol,
        name, 
        percent_change_24h
    FROM 
        cmc_data
    WHERE 
        volume_24h > 50000 AND cmc_rank <= 100
    ORDER BY
        percent_change_24h ASC
    """
).show()

+------+----------------+------------------+
|symbol|            name|percent_change_24h|
+------+----------------+------------------+
|   AXS|   Axie Infinity|       -4.53596768|
|   FIL|        Filecoin|       -3.66847451|
|   YFI|   yearn.finance|       -2.98563186|
|   EOS|             EOS|       -2.92437485|
| 1INCH|   1inch Network|        -2.6218788|
|   LTC|        Litecoin|       -2.56960339|
|   CHZ|          Chiliz|       -1.68964785|
|   LDO|        Lido DAO|       -1.60479661|
|   CEL|         Celsius|       -1.09245306|
|   LEO|    UNUS SED LEO|       -0.95029925|
|   OKB|             OKB|       -0.68771638|
|   XEC|           eCash|       -0.67905169|
|   BCH|    Bitcoin Cash|       -0.63249372|
|    AR|         Arweave|        -0.3875203|
|  EGLD|          Elrond|       -0.34233447|
|   MKR|           Maker|       -0.29529886|
|   DOT|        Polkadot|       -0.22912453|
|   BTT|  BitTorrent-New|       -0.13332799|
|  HBAR|          Hedera|       -0.11712464|
|   ETC|Et