In [19]:
import numpy as np
import pandas as pd
import os
import requests
from difflib import SequenceMatcher
from tqdm import tqdm_notebook
from time import sleep
import ast

In [20]:
def loadCoinData():
    dataList = []
    tokenFileList = os.listdir('./tokenData')
    
    for token in tqdm_notebook(tokenFileList):
        tokenData = pd.read_csv('./tokenData/'+token, skiprows=1).set_index('id')
        dataList.append(tokenData)
        
    return pd.concat(dataList, axis=1).T
    

In [21]:
def hasCategory(x):
    if len(ast.literal_eval(x)) > 0:
        return 1
    else:
        return 0

In [22]:
def hasSpecificCategory(x, category):
    if category in x:
        return 1
    else:
        return 0

In [23]:
def hasBaseCategory(x, catArr):
    
#     print(set(catArr), set(ast.literal_eval(x)))
#     print(set(catArr).intersection(ast.literal_eval(x)))
#     print(any(x in catArr for x in ast.literal_eval(x)))

    if len(set(catArr).intersection(ast.literal_eval(x))) > 0:
        return 1
    else:
        return 0

In [24]:
def getTokensWithCategory(coinDF):
    coinDF['has_category'] = coinDF['categories'].map(lambda x: hasCategory(x))
    return coinDF[coinDF['has_category'] == 1].drop(columns=['has_category'])


In [25]:
def getAllCategories(coinDF):
    cat_list = [ast.literal_eval(x) for x in list(coinDF['categories'].values)]
    all_categories = set([item for sublist in cat_list for item in sublist])
    return all_categories

In [26]:
def getCategoryCounts(coinDF, all_categories):
    categoryTotals = []
    for cat in all_categories:
        categoryCount = coinDF['categories'].map(lambda x: hasSpecificCategory(x, cat))
        categoryTotals.append(categoryCount.sum())
        
    return pd.Series(categoryTotals, index=all_categories).sort_values(ascending=False)

In [27]:
def getCoinsWithCategory(coinDF, category):
    coinDF['has_given_category'] = coinDF['categories'].map(lambda x: hasSpecificCategory(x, category))
    return coinDF[coinDF['has_given_category'] > 0].drop(columns=['has_given_category'])


In [28]:
def getCoinsInBaseCategory(coinDF, catArr):
    coinDF['has_given_category'] = coinDF['categories'].map(lambda x: hasBaseCategory(x, catArr))
    return coinDF[coinDF['has_given_category'] > 0].drop(columns=['has_given_category'])


In [29]:
def mapTrustScore(score):
    if score == 'green':
        return 1
    if score == 'yellow':
        return 0
    if score == 'red':
        return -1

In [30]:
def getNumericalQualitative(CC, useColumns):
    DF = CC[useColumns].copy()
    DF['trust_score'] = DF['trust_score'].map(lambda x: mapTrustScore(x))
    return DF

In [31]:
def describeCategory(catCoinDF, category, useColumns):
    CC = getCoinsInBaseCategory(catCoinDF, baseCategories[category])
    NQ = getNumericalQualitative(CC, useColumns)
    print('Number of tokens:', CC.shape[0])
    return NQ.astype('float').fillna(0).describe()

### run scripts

In [32]:
coinDF = loadCoinData()

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for token in tqdm_notebook(tokenFileList):


HBox(children=(FloatProgress(value=0.0, max=5713.0), HTML(value='')))




In [33]:
catCoinDF = getTokensWithCategory(coinDF)

In [34]:
coinDF.shape

(5713, 46)

In [35]:
catCoinDF.shape

(2269, 45)

In [36]:
all_categories = getAllCategories(catCoinDF)

In [37]:
categoryTotals = getCategoryCounts(coinDF, all_categories)

In [38]:
pd.set_option('display.max_rows', 100)

In [48]:
categoryTotals.shape

(107,)

In [39]:
categoryTotals[categoryTotals > 10]

Decentralized Finance (DeFi)          308
Polygon Ecosystem                     209
Non-Fungible Tokens (NFT)             199
Finance / Banking                     165
Cryptocurrency                        152
Yield Farming                         140
Exchange-based Tokens                 137
Binance Smart Chain Ecosystem         132
Decentralized Exchange Token (DEX)    102
Governance                            101
Number                                101
RealT Tokens                           98
Business Services                      97
Asset-backed Tokens                    88
Business Platform                      87
Entertainment                          72
Infrastructure                         59
Polkadot Ecosystem                     58
Protocol                               54
Meme Tokens                            51
xDAI Ecosystem                         45
Stablecoins                            42
Centralized Exchange Token (CEX)       39
Smart Contract Platform           

In [40]:
baseCategories = {
    'entertainment': ['Non-Fungible Tokens (NFT)', 'Entertainment'],
    'social': ['Gaming', 'Communication', 'Fan Token', 'Media', 'Sports', 'Tourism', 'Charity', 'Collectible', 'Social Money'],
    'defi': ['Launchpad', 'Lending/Borrowing', 'Automated Market Maker (AMM)', 'Seigniorage'],
    'business': ['Business Platform', 'Energy', 'Big Data', 'Software', 'Insurance', 'Retail', 'Real Estate'],
    'blockchain': ['Smart Contract Platform','Protocol','Oracle'],
    'meme': ['Meme Tokens']
}

In [41]:
useColumns = ['sentiment_votes_up_percentage', 'sentiment_votes_down_percentage','market_cap_rank',\
              'coingecko_rank', 'coingecko_score', 'developer_score', 'community_score', 'liquidity_score', \
              'public_interest_score', 'facebook_likes', 'twitter_followers', \
              'reddit_subscribers','reddit_accounts_active_48h', 'telegram_channel_user_count', \
              'mean', 'std', 'kurt', 'trust_score']

#### describe category

In [42]:
for k, v in baseCategories.items():
    CC = getCoinsInBaseCategory(catCoinDF, v)
    CC.to_csv('./categoryLists/'+k+'_token_list.csv')


### entertainment

In [43]:
describeCategory(catCoinDF, 'entertainment', useColumns)

Number of tokens: 257


id,sentiment_votes_up_percentage,sentiment_votes_down_percentage,market_cap_rank,coingecko_rank,coingecko_score,developer_score,community_score,liquidity_score,public_interest_score,facebook_likes,twitter_followers,reddit_subscribers,reddit_accounts_active_48h,telegram_channel_user_count,mean,std,kurt,trust_score
count,257.0,257.0,257.0,257.0,257.0,257.0,257.0,257.0,257.0,257.0,257.0,257.0,257.0,257.0,257.0,257.0,257.0,257.0
mean,52.57642,13.960545,1054.136187,2762.715953,13.906039,3.499537,11.72135,10.413381,0.014724,0.0,26369.058366,793.669261,15.420233,7864.287938,6.334631,225.847874,89.130659,0.431907
std,42.313589,22.207831,880.590649,1912.846783,8.799194,11.194509,9.819131,14.250599,0.049794,0.0,60823.210031,4457.718044,166.346807,15875.829084,99.927069,3586.303736,191.686342,0.634485
min,0.0,0.0,0.0,58.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.035895,0.021854,0.130123,-1.0
25%,0.0,0.0,111.0,1400.0,9.33,0.0,7.331,1.0,0.001,0.0,1060.0,0.0,0.0,184.0,0.000833,0.105324,6.058439,0.0
50%,66.67,0.0,1023.0,2421.0,12.922,0.0,8.979,1.0,0.003,0.0,8987.0,0.0,0.0,1947.0,0.00963,0.155115,17.877359,1.0
75%,90.91,20.0,1843.0,3268.0,18.436,0.0,10.861,17.547,0.008,0.0,24104.0,0.0,0.0,7887.0,0.020503,0.22001,69.615963,1.0
max,100.0,100.0,2819.0,8326.0,50.102,66.72,52.681,63.503,0.43,0.0,681856.0,48127.0,2645.0,124801.0,1602.028292,57494.434224,1288.0,1.0


### social

In [44]:
describeCategory(catCoinDF, 'social', useColumns)

Number of tokens: 166


id,sentiment_votes_up_percentage,sentiment_votes_down_percentage,market_cap_rank,coingecko_rank,coingecko_score,developer_score,community_score,liquidity_score,public_interest_score,facebook_likes,twitter_followers,reddit_subscribers,reddit_accounts_active_48h,telegram_channel_user_count,mean,std,kurt,trust_score
count,166.0,166.0,166.0,166.0,166.0,166.0,166.0,166.0,166.0,166.0,166.0,166.0,166.0,166.0,166.0,166.0,166.0,166.0
mean,40.497229,13.117229,934.253012,3103.26506,12.838066,5.654741,11.575548,9.85138,0.015867,0.0,174264.4,655.277108,3.301205,5375.138554,20896830000000.0,358935300000000.0,136.670857,0.174699
std,44.141656,25.89889,944.344285,2094.419292,9.560229,14.113416,9.953629,14.087166,0.051885,0.0,1080527.0,3401.946073,14.36767,11444.772368,269236900000000.0,4624558000000000.0,228.788559,0.721672
min,0.0,0.0,0.0,59.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.02514584,0.0,0.0,-1.0
25%,0.0,0.0,0.0,1426.5,5.10675,0.0,6.74175,1.0,0.0,0.0,549.0,0.0,0.0,0.0,0.005092983,0.1158821,13.070207,0.0
50%,0.0,0.0,692.0,2654.5,12.09,0.0,8.7845,1.0,0.001,0.0,5071.5,0.0,0.0,677.5,0.01303104,0.1658554,42.476834,0.0
75%,86.2575,17.5625,1727.75,4349.75,18.247,0.0,13.16675,17.2885,0.0075,0.0,22751.0,0.0,0.0,4449.75,0.06958373,0.7440676,139.713251,1.0
max,100.0,100.0,2880.0,8238.0,49.933,65.218,43.877,63.503,0.43,0.0,10059860.0,33809.0,169.0,70503.0,3468874000000000.0,5.958326e+16,1386.896486,1.0


### defi

In [45]:
describeCategory(catCoinDF, 'defi', useColumns)

Number of tokens: 127


id,sentiment_votes_up_percentage,sentiment_votes_down_percentage,market_cap_rank,coingecko_rank,coingecko_score,developer_score,community_score,liquidity_score,public_interest_score,facebook_likes,twitter_followers,reddit_subscribers,reddit_accounts_active_48h,telegram_channel_user_count,mean,std,kurt,trust_score
count,127.0,127.0,127.0,127.0,127.0,127.0,127.0,127.0,127.0,127.0,127.0,127.0,127.0,127.0,127.0,127.0,127.0,127.0
mean,61.689921,19.412441,866.094488,1819.708661,19.747638,9.863488,11.522165,21.722024,0.070016,0.0,48745.905512,979.212598,6.669291,11622.559055,0.042663,0.754095,47.512117,0.76378
std,36.980122,23.711687,812.875305,1505.648181,11.38586,20.470293,9.175562,19.005945,0.237269,0.0,108204.227639,5199.639097,31.368535,17559.374835,0.225724,4.008871,124.024058,0.511094
min,0.0,0.0,0.0,33.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.038561,0.009533,0.346538,-1.0
25%,50.0,0.0,194.0,765.0,12.6385,0.0,8.1065,1.255,0.003,0.0,4948.5,0.0,0.0,1529.0,-0.000303,0.092511,5.451322,1.0
50%,76.19,13.64,699.0,1327.0,18.871,0.0,9.222,17.193,0.016,0.0,17027.0,0.0,0.0,4778.0,0.00349,0.137273,12.022957,1.0
75%,89.615,27.415,1283.0,2500.5,23.918,0.0,10.5375,33.9215,0.053,0.0,42439.0,0.0,0.0,14907.0,0.014084,0.200409,31.918782,1.0
max,100.0,100.0,2906.0,7706.0,56.009,73.946,45.991,70.119,2.306,0.0,944515.0,48119.0,230.0,99593.0,1.837488,39.099211,927.84801,1.0


### blockchain

In [46]:
describeCategory(catCoinDF, 'blockchain', useColumns)

Number of tokens: 108


id,sentiment_votes_up_percentage,sentiment_votes_down_percentage,market_cap_rank,coingecko_rank,coingecko_score,developer_score,community_score,liquidity_score,public_interest_score,facebook_likes,twitter_followers,reddit_subscribers,reddit_accounts_active_48h,telegram_channel_user_count,mean,std,kurt,trust_score
count,108.0,108.0,108.0,108.0,108.0,108.0,108.0,108.0,108.0,108.0,108.0,108.0,108.0,108.0,108.0,108.0,108.0,108.0
mean,51.31213,19.058241,628.898148,1597.037037,24.63437,23.891324,22.00725,23.410269,0.013463,0.0,40451.490741,4633.361111,23.518519,10635.027778,inf,3.711868,102.598595,0.703704
std,40.142695,25.432925,703.681626,1880.283678,14.957121,30.110642,13.495543,19.337944,0.059043,0.0,77394.812328,12749.251643,47.760595,20928.422918,,27.754256,241.404343,0.630372
min,0.0,0.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.011955,0.0,0.0,-1.0
25%,0.0,0.0,0.0,262.5,14.93225,0.0,8.68325,7.625,0.001,0.0,4770.75,0.0,0.0,1489.5,0.002077,0.086492,7.451221,1.0
50%,67.425,11.48,366.5,1031.0,21.042,0.0,24.588,20.739,0.0035,0.0,12394.0,296.5,6.0,4235.0,0.006378,0.121054,15.759496,1.0
75%,85.8,27.595,1089.75,2027.5,34.44,57.64875,33.2355,41.85175,0.009,0.0,33794.25,2858.5,21.0,9803.0,0.015863,0.20461,52.77582,1.0
max,100.0,100.0,2499.0,8149.0,62.936,84.683,47.453,73.649,0.606,0.0,495631.0,89213.0,295.0,155342.0,inf,285.347638,1270.482933,1.0


### business

In [49]:
describeCategory(catCoinDF, 'business', useColumns)

Number of tokens: 186


id,sentiment_votes_up_percentage,sentiment_votes_down_percentage,market_cap_rank,coingecko_rank,coingecko_score,developer_score,community_score,liquidity_score,public_interest_score,facebook_likes,twitter_followers,reddit_subscribers,reddit_accounts_active_48h,telegram_channel_user_count,mean,std,kurt,trust_score
count,186.0,186.0,186.0,186.0,186.0,186.0,186.0,186.0,186.0,186.0,186.0,186.0,186.0,186.0,186.0,186.0,186.0,186.0
mean,33.837957,14.011505,870.403226,2905.677419,14.586984,10.341785,14.440995,10.15407,0.003747,0.0,14709.94086,1051.623656,6.370968,5432.33871,5.90809e+28,7.089708e+29,187.363095,0.193548
std,43.166668,28.702127,933.0498,2252.427004,11.124302,17.91582,11.137464,14.119079,0.007165,0.0,24703.040831,4827.999767,18.570742,12573.839613,8.057560999999999e+29,9.669073e+30,263.10958,0.82865
min,0.0,0.0,0.0,25.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.02078912,0.02074884,0.274368,-1.0
25%,0.0,0.0,0.0,1172.5,5.71125,0.0,7.3425,1.0,0.0,0.0,1546.0,0.0,0.0,187.25,0.004782525,0.1196583,15.520548,-1.0
50%,0.0,0.0,599.5,2376.0,13.113,0.0,8.883,1.0,0.001,0.0,5680.0,0.0,0.0,1965.5,0.01417327,0.2028535,57.524682,0.0
75%,79.6425,13.355,1600.5,4112.5,19.9315,13.6365,24.4075,17.41575,0.003,0.0,15505.25,219.75,6.0,4287.5,0.09894632,1.021757,255.340724,1.0
max,100.0,100.0,2934.0,8281.0,56.991,89.368,43.487,60.55,0.05,0.0,158551.0,58011.0,169.0,90058.0,1.098905e+31,1.318686e+32,1175.524164,1.0


### meme

In [47]:
describeCategory(catCoinDF, 'meme', useColumns)

Number of tokens: 51


id,sentiment_votes_up_percentage,sentiment_votes_down_percentage,market_cap_rank,coingecko_rank,coingecko_score,developer_score,community_score,liquidity_score,public_interest_score,facebook_likes,twitter_followers,reddit_subscribers,reddit_accounts_active_48h,telegram_channel_user_count,mean,std,kurt,trust_score
count,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0
mean,74.276667,21.801765,342.392157,3495.745098,10.71302,0.365039,16.73098,16.353627,0.021608,0.0,84080.35,46200.31,136.313725,23053.196078,0.013336,0.254365,85.730319,0.235294
std,22.220538,16.847929,631.544807,1997.939317,9.018925,2.606901,15.21532,17.307375,0.046546,0.0,298249.3,295591.4,646.554514,44436.229522,0.038157,0.697619,201.559615,0.472789
min,0.0,0.0,0.0,125.0,0.2,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.016649,0.015225,0.289747,-1.0
25%,65.425,8.71,0.0,1553.0,3.6005,0.0,7.6,1.0,0.002,0.0,2169.0,0.0,0.0,1496.5,-0.000327,0.054093,11.370824,0.0
50%,77.78,21.0,0.0,3646.0,7.621,0.0,9.243,10.618,0.004,0.0,7623.0,0.0,0.0,5669.0,0.001848,0.079133,22.259937,0.0
75%,89.345,33.33,440.5,4951.5,17.5025,0.0,24.99,24.532,0.0155,0.0,20776.0,337.5,4.0,18909.0,0.004253,0.122357,53.744994,0.5
max,100.0,75.0,2193.0,7695.0,43.92,18.617,71.223,70.897,0.211,0.0,1930095.0,2106870.0,3827.0,192985.0,0.169055,4.582374,1346.030462,1.0
