## How to apply `if...else...` on column values



In [1]:
import pandas as pd
import numpy as np
from defillama2 import DefiLlama

### Data Prep

In [2]:
obj = DefiLlama() # create a DefiLlama instance
df = obj.get_protocols_fundamentals() # get fundamentals for all protocols
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3524 entries, 0 to 3523
Data columns (total 10 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   name         3524 non-null   object 
 1   symbol       3524 non-null   object 
 2   chain        3524 non-null   object 
 3   category     3524 non-null   object 
 4   chains       3524 non-null   object 
 5   tvl          3524 non-null   float64
 6   change_1d    2887 non-null   float64
 7   change_7d    2822 non-null   float64
 8   mcap         1455 non-null   float64
 9   forked_from  2675 non-null   object 
dtypes: float64(4), object(6)
memory usage: 275.4+ KB


In [3]:
# get freq count for each category
category_cnts = df.category.value_counts()
category_cnts 

Dexes                       1079
Yield                        458
Lending                      313
Derivatives                  175
Services                     166
Liquid Staking               127
Reserve Currency             123
Yield Aggregator             122
Algo-Stables                 111
CDP                          108
Farm                          91
Indexes                       52
Bridge                        47
Options                       44
Launchpad                     41
Gaming                        39
CEX                           37
Synthetics                    35
NFT Marketplace               34
Prediction Market             34
RWA                           31
Liquidity manager             28
NFT Lending                   28
SoFi                          28
Cross Chain                   27
Insurance                     25
Leveraged Farming             22
Staking Pool                  17
Payments                      16
Chain                         16
Privacy   

In [4]:
# get the top5 and top10 categories
top5  = category_cnts.index[:5]
top10 = category_cnts.index[:10]
print(top5, "\n\n")
print(top10, "\n\n")

Index(['Dexes', 'Yield', 'Lending', 'Derivatives', 'Services'], dtype='object') 


Index(['Dexes', 'Yield', 'Lending', 'Derivatives', 'Services',
       'Liquid Staking', 'Reserve Currency', 'Yield Aggregator',
       'Algo-Stables', 'CDP'],
      dtype='object') 




### Use `.where()` to perform `if...else...`

In [5]:
# derive a new category col based on if its values are in top5:
#   if top5, use the original value
#   else, use 'Other'
df['category2'] = df['category'].where(df['category'].isin(top5), 'Other')
# remark: col.where(condition, other='some value'), and whenever condition is 
#   true, the original values of col are used, otherwise, 'some value' are used.
df['category2']

0          Other
1          Other
2          Other
3          Other
4          Other
          ...   
3519       Dexes
3520    Services
3521       Other
3522       Other
3523    Services
Name: category2, Length: 3524, dtype: object

### What if we want to do `if...elif...else`?

In [6]:
# derive a new category col based on the frequencies of its values:
#   if top5, use the original value
#   else if top10, use 'top6 ~ top10'
#   else, use 'not in top10'
arr = np.select([df['category'].isin(top5), df['category'].isin(top10)], 
                [df['category'], 'top6 ~ top10'], 'not in top10')
print(arr, '\n\n')
df['category3'] = arr
print(df['category3'])

['not in top10' 'top6 ~ top10' 'not in top10' ... 'not in top10'
 'not in top10' 'Services'] 


0       not in top10
1       top6 ~ top10
2       not in top10
3       not in top10
4       not in top10
            ...     
3519           Dexes
3520        Services
3521    not in top10
3522    not in top10
3523        Services
Name: category3, Length: 3524, dtype: object


### Sanity Check

In [7]:
pd.merge(df['category2'].value_counts(), category_cnts[:5], 
         left_index=True, right_index=True)

Unnamed: 0,category2,category
Dexes,1079,1079
Yield,458,458
Lending,313,313
Derivatives,175,175
Services,166,166


In [8]:
assert df['category2'].value_counts()['Other'] == category_cnts[5:].sum()

In [9]:
pd.merge(df['category3'].value_counts(), category_cnts[:5], 
         left_index=True, right_index=True)

Unnamed: 0,category3,category
Dexes,1079,1079
Yield,458,458
Lending,313,313
Derivatives,175,175
Services,166,166


In [10]:
assert df['category3'].value_counts()['top6 ~ top10'] == category_cnts[5:10].sum()

In [11]:
assert df['category3'].value_counts()['not in top10'] == category_cnts[10:].sum()