In [9]:
from bs4 import BeautifulSoup
import urllib.request
from lxml import etree
import pickle
import os
import glob
import re
import sys

In [10]:
crypto = r'bitcoin'

In [11]:
def getUrl(crypto_name):
    url = r'https://www.coingecko.com/en/coins/' + crypto_name + r'/developer'
    req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
    html = urllib.request.urlopen(req).read()
    soup = BeautifulSoup(html, 'lxml')
    try:
        base_url = soup.find('a', href=True, text='Code Repository')['href']
        return base_url
    except TypeError:
        return None
        

In [12]:
def getNames():
    url = r'https://coinmarketcap.com/coins/views/all/'
    response = urllib.request.urlopen(url)
    soup = BeautifulSoup(response, 'lxml')
    names_divs = soup.findAll('a', { 'class' : 'currency-name-container' })
    names_text = [div.text for div in names_divs]
    return names_text

In [13]:
crypto_currency_names = [name.replace(' ','-').lower() for name in getNames()]
N = len(crypto_currency_names)

print('There are ' + str(N) + ' crypto-currencies.   -source coinmarketcap.com')
print(crypto_currency_names[0:5]) # (These are the top five)

There are 916 crypto-currencies.   -source coinmarketcap.com
['bitcoin', 'ethereum', 'bitcoin-cash', 'ripple', 'bitcoin-gold']


In [14]:
# Create dict of  {cryto-currencey: github-url,} eg {'bitcoin: https://github.com/bitcoin/bitcoin}
cryptoName_gitUrl_dict = {}

for i,name in enumerate(crypto_currency_names):
    try: 
        git_url = getUrl(name)
    except:
        git_url = None

    cryptoName_gitUrl_dict[name] = git_url
    
    # Prints progress (%) to standard output
    if (i % 90 == 0):
        print(str("\r{0}".format(round((float(i)/N)*100),0)) + r'% ...')

0% ...
10% ...
20% ...
29% ...
39% ...
49% ...
59% ...
69% ...
79% ...
88% ...
98% ...


In [15]:
# Check our results of assigning a Github repo to each repository
for name in crypto_currency_names[0:5]:
    print(cryptoName_gitUrl_dict[name])

https://github.com/bitcoin/bitcoin
https://github.com/ethereum/go-ethereum
https://github.com/Bitcoin-ABC/bitcoin-abc
https://github.com/ripple/rippled
https://github.com/BTCGPU/BTCGPU


In [None]:
cryptoName_gitUrl_dict['bitdeal']

In [8]:
# Pickle dict to current directory. {'bitcoin: https://github.com/bitcoin/bitcoin}
with open('./cryptoName_gitUrl_dict.pickle', 'wb') as handle:
    pickle.dump(cryptoName_gitUrl_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [9]:
nones = {k:v for k,v in cryptoName_gitUrl_dict.items() if v is None}
print('Number of crypto-currencies without a Github repo: ' + str(len(nones)))

Number of crypto-currencies without a Github repo: 479


In [10]:
# Inspect the crypto-currencies that DON'T have a GitHub repository.
for crypto in nones:
    print(crypto)

stellar-lumens
byteball-bytes
metaverse-etp
vechain
b3coin
cryptonex
kucoin-shares
i/o-coin
atbcoin
decent
crown
e-coin
peerplays
the-champcoin
bitcloud
compcoin
voxels
sphere
pascal-coin
credence-coin
kore
global-curren...
myriad
thegcccoin
neutron
artbyte
equitrader
europecoin
bela
internet-of-p...
hempcoin
xgox
xp
condensate
e-gulden
putincoin
atmos
zennies
zoin
supercoin
mao-zedong
draftcoin
growers-inter...
zephyr
greencoin
eboost
bytecent
pure
helleniccoin
netko
hicoin
kekcoin
briacoin
linx
happycoin
allsafe
sacoin
footy-cash
unify
shield
zero
gcoin
solaris
42-coin
inflationcoin
smileycoin
elementrem
blakestar
universal-cur...
tychocoin
altcommunity-...
goldreserve
808coin
valorbit
atomic-coin
litebar
postcoin
scorecoin
kilocoin
ecocoin
prototanium
devcoin
elcoin
chancoin
reecoin
evotion
wyvern
womencoin
prcoin
francs
coinonatx
pascal-lite
virta-unique-...
tristar-coin
doubloon
litecoin-plus
nevacoin
beatcoin
eryllium
printerium
bitcurrency
morningstar
magnum
selfiecoin
project-x

By inspection of the crypt-currencies that are do not have a github repository it can be seen that these crypt-currencies are those with the lower market caps.<br>
For example in the top 50 crypto-currencies by marketcap only 3 do not have an active git repository.
<br><br>

*These are:*
* Stellar-Lumens (no. 17 by market cap)
* Byteball-btyes (no. 31 by market cap)
* Metaverse-etp (no. 38by market cap)
* Vechain (no. 47 by market cap)

<br>
**Note that over 95% of  the marketcap  of crypto-currencies is in the top 10 crypto currencies**

In [11]:
# We only want the crypto-currencies that have a Github repository.
# Hence we remove the crypto-currencies without a Github repo from the dictionary.
cryptoName_gitUrl_dict = {k:v for k,v in cryptoName_gitUrl_dict.items() if v is not None}

In [12]:
print('Number of crypto-currencies WITH a Github repo: ' + str(len(cryptoName_gitUrl_dict)))
print('')
print('')
print('Example entry:')
print(list(cryptoName_gitUrl_dict.items())[-0])

Number of crypto-currencies WITH a Github repo: 423


Example entry:
('bitcoin', 'https://github.com/bitcoin/bitcoin')


In [13]:
# Re-pickle dict to current directory without "None" values() {'bitcoin: https://github.com/bitcoin/bitcoin}
with open('./cryptoName_gitUrl_dict.pickle', 'wb') as handle:
    pickle.dump(cryptoName_gitUrl_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

We are now  ready to carry out our inital analysis

In [14]:
print(cryptoName_gitUrl_dict)

{'bitcoin': 'https://github.com/bitcoin/bitcoin', 'ethereum': 'https://github.com/ethereum/go-ethereum', 'bitcoin-cash': 'https://github.com/Bitcoin-ABC/bitcoin-abc', 'ripple': 'https://github.com/ripple/rippled', 'litecoin': 'https://github.com/litecoin-project/litecoin', 'dash': 'https://github.com/dashpay/dash', 'neo': 'https://github.com/neo-project/neo', 'monero': 'https://github.com/monero-project/bitmonero', 'iota': 'https://github.com/iotaledger/wallet', 'nem': 'https://github.com/NewEconomyMovement/NemCommunityClient', 'ethereum-classic': 'https://github.com/ethereumproject/go-ethereum', 'lisk': 'https://github.com/LiskHQ/lisk', 'qtum': 'https://github.com/qtumproject/qtum', 'zcash': 'https://github.com/zcash/zcash', 'hshare': 'https://github.com/HcashOrg/Hshare', 'cardano': 'https://github.com/input-output-hk/cardano-sl', 'bitconnect': 'https://github.com/bitconnectcoin/bitconnectcoin', 'waves': 'https://github.com/wavesplatform/Waves', 'stratis': 'https://github.com/stratisp

In [2]:
# Re-pickle dict to current directory without "None" values() {'bitcoin: https://github.com/bitcoin/bitcoin}
with open('./cryptoName_gitUrl_dict.pickle', 'rb') as handle:
    d = pickle.load(handle)

In [8]:
d['bitdeal']

'/bitdeal/'