## Import Libraries

In [2]:
import pandas as pd # For Data Transformation
import requests # to establish connection with the web

## Extracting data from API

In [5]:
url ='https://api.coincap.io/v2/assets'

In [7]:
response = requests.get(url)
response

<Response [200]>

In [8]:
# Json file
responsedata = response.json()
responsedata

{'data': [{'id': 'bitcoin',
   'rank': '1',
   'symbol': 'BTC',
   'name': 'Bitcoin',
   'supply': '19658368.0000000000000000',
   'maxSupply': '21000000.0000000000000000',
   'marketCapUsd': '1355165909044.6780711946191232',
   'volumeUsd24Hr': '12006398229.6472977917543412',
   'priceUsd': '68935.8297211995457199',
   'changePercent24Hr': '-1.2559745009359602',
   'vwap24Hr': '69867.2512524566142959',
   'explorer': 'https://blockchain.info/'},
  {'id': 'ethereum',
   'rank': '2',
   'symbol': 'ETH',
   'name': 'Ethereum',
   'supply': '120077426.1898597300000000',
   'maxSupply': None,
   'marketCapUsd': '421345628047.0649051305278377',
   'volumeUsd24Hr': '6982423467.7926447631014626',
   'priceUsd': '3508.9495287886725271',
   'changePercent24Hr': '-1.8886614233219535',
   'vwap24Hr': '3564.9111712248007983',
   'explorer': 'https://etherscan.io/'},
  {'id': 'tether',
   'rank': '3',
   'symbol': 'USDT',
   'name': 'Tether',
   'supply': '103618183617.7160800000000000',
   'maxSup

In [None]:
## Transformation( Restructuring and Data Cleaning)

In [11]:
data = pd.json_normalize(responsedata, "data")
data.head()

Unnamed: 0,id,rank,symbol,name,supply,maxSupply,marketCapUsd,volumeUsd24Hr,priceUsd,changePercent24Hr,vwap24Hr,explorer
0,bitcoin,1,BTC,Bitcoin,19658368.0,21000000.0,1355165909044.678,12006398229.647297,68935.82972119954,-1.2559745009359602,69867.25125245661,https://blockchain.info/
1,ethereum,2,ETH,Ethereum,120077426.18985972,,421345628047.0649,6982423467.7926445,3508.9495287886725,-1.8886614233219532,3564.911171224801,https://etherscan.io/
2,tether,3,USDT,Tether,103618183617.71608,,103601221120.51807,28072783650.227947,0.9998362980646274,-0.0492353998425023,0.9999288321105012,https://www.omniexplorer.info/asset/31
3,binance-coin,4,BNB,BNB,166801148.0,166801148.0,95086174456.69368,477060035.9600064,570.0570745274108,-1.370008373430419,575.8235705316241,https://etherscan.io/token/0xB8c77482e45F1F44d...
4,solana,5,SOL,Solana,443884942.24731934,,81882191088.50537,1416695336.7724361,184.46715194696347,-2.323518224278232,186.7407969824341,https://explorer.solana.com/


In [12]:
data.info() # fill description of the dataset

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 12 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   id                 100 non-null    object
 1   rank               100 non-null    object
 2   symbol             100 non-null    object
 3   name               100 non-null    object
 4   supply             100 non-null    object
 5   maxSupply          55 non-null     object
 6   marketCapUsd       100 non-null    object
 7   volumeUsd24Hr      100 non-null    object
 8   priceUsd           100 non-null    object
 9   changePercent24Hr  100 non-null    object
 10  vwap24Hr           100 non-null    object
 11  explorer           98 non-null     object
dtypes: object(12)
memory usage: 9.5+ KB


## Convert cateforical data into Numerical Data

In [14]:
def convert_columns_to_datatypes(df,column_datatypes):
    for column, datatype in column_datatypes.items():
        if column in df.columns:
            df[column] = df[column].astype(datatype)
    return df

In [18]:
column_datatypes ={
    'rank': int,
    'supply': float,
    'maxSupply': float,
    'marketCapUsd': float,
    'volumeUsd24Hr':float,
    'priceUsd':float,
    'changePercent24Hr':float,
    'vwap24Hr':float
}

data = convert_columns_to_datatypes(data,column_datatypes)

In [19]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 12 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   id                 100 non-null    object 
 1   rank               100 non-null    int32  
 2   symbol             100 non-null    object 
 3   name               100 non-null    object 
 4   supply             100 non-null    float64
 5   maxSupply          55 non-null     float64
 6   marketCapUsd       100 non-null    float64
 7   volumeUsd24Hr      100 non-null    float64
 8   priceUsd           100 non-null    float64
 9   changePercent24Hr  100 non-null    float64
 10  vwap24Hr           100 non-null    float64
 11  explorer           98 non-null     object 
dtypes: float64(7), int32(1), object(4)
memory usage: 9.1+ KB


## Dealing with Missing Data

In [20]:
data["maxSupply"] = data["maxSupply"].fillna(0)
data["vwap24Hr"] = data["vwap24Hr"].fillna(0)
data["explorer"] = data["explorer"].fillna('na')

data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 12 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   id                 100 non-null    object 
 1   rank               100 non-null    int32  
 2   symbol             100 non-null    object 
 3   name               100 non-null    object 
 4   supply             100 non-null    float64
 5   maxSupply          100 non-null    float64
 6   marketCapUsd       100 non-null    float64
 7   volumeUsd24Hr      100 non-null    float64
 8   priceUsd           100 non-null    float64
 9   changePercent24Hr  100 non-null    float64
 10  vwap24Hr           100 non-null    float64
 11  explorer           100 non-null    object 
dtypes: float64(7), int32(1), object(4)
memory usage: 9.1+ KB
