In [1]:
import bs4 as bs
import urllib.request
from pymongo import MongoClient
from flask import Flask, request, jsonify
import pprint
app = Flask(__name__)


In [2]:
client = MongoClient('localhost', 27017)
db = client.test_database

In [3]:
def clean(inp):
    numbers = -10000
    if '$' in inp:
        inp = inp.strip('').split('$')[1]
    if ',' in inp:
        numbers = inp.strip('').split(',')
    if '?' in inp or inp == 'Low Vol':
        return numbers
    if(type(numbers) is int):
        return float(inp)
    else:
        new_inp = ''
        for number in numbers:
            new_inp += number
        return int(new_inp)    

In [5]:
def extract_features(category):
    url_opener = urllib.request.urlopen('https://coinmarketcap.com/' + category + '/views/all/')   #replace coins with all or tokens(tokens also contain an extar field called platform)
    html_code = bs.BeautifulSoup(url_opener,'lxml')
    links = html_code.findAll('a')
    td_tags = html_code.findAll('td')
    curr_name = []
    for link in links:
        if(link.has_attr("class")):
            if("currency-name-container" in link["class"]):
                curr_name.append(link.contents[0])
    market_cap = []
    for tag in td_tags:
        if(tag.has_attr("class")):
            if("market-cap" in tag["class"]):
                market_cap.append(tag.contents[0].strip(' ').split('\n')[1])   
    prices = []
    for link in links:
        if(link.has_attr("class")):
            if("price" in link["class"]):
                prices.append(link.contents[0])

    circ_supply = []
    mineable = []
    for tag in td_tags:
        if(tag.has_attr("class")):
            if("circulating-supply" in tag["class"]):
                child = tag.findChildren()
                mineable.append(True if '*' in tag.contents[-1] else False)
                if('\n' in child[0].contents[0]):
                    circ_supply.append(child[0].contents[0].split('\n')[1])
                else:
                    circ_supply.append(child[0].contents[0])
    volume = []
    for link in links:
        if(link.has_attr("class")):
            if("volume" in link["class"]):
                volume.append(link.contents[0])

    new_market_cap = [clean(i) for i in market_cap]
    new_prices = [clean(i) for i in prices]
    new_circ_supply = [clean(i) for i in circ_supply]
    new_volume = [clean(i) for i in volume]                                   

    headers = ['Currency_Name', 'Market-Capital', 'Price', 'Circulating Supply', 'Mineable', 'Volume']
    entries = []
    for entry in range(len(curr_name)):
        dct = {}
        dct[headers[0]] = curr_name[entry]
        dct[headers[1]] = new_market_cap[entry]
        dct[headers[2]] = new_prices[entry]
        dct[headers[3]] = new_circ_supply[entry]
        dct[headers[4]] = mineable[entry]
        dct[headers[5]] = new_volume[entry]   
        entries.append(dct)
    return entries                  

In [6]:
@app.route('/',methods=['GET'])
def hello():
    return jsonify({'hello':'world'})

In [7]:
@app.route('/download', methods = ['GET'])
def download():
    category = request.args.get('category', 'all')
    entries = extract_features(category)
    db.crypto.insert_many(entries)
    return jsonify({"extracted" : "done"}), 200

In [8]:
@app.route('/show', methods = ['GET'])
def show():
    entries = db.crypto.find()
    c = 0
    for entry in entries:
        c += 1
        if(c == 10):
            return jsonify({"entries" : "shown"})
        pprint.pprint(entry)
    return jsonify({"entries" : "shown"})    

In [None]:
if __name__ == '__main__':
    app.run(port = 5000)

 * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)
127.0.0.1 - - [11/Jun/2018 14:54:59] "GET /show HTTP/1.1" 200 -


{'Circulating Supply': 17087075,
 'Currency_Name': 'Bitcoin',
 'Market-Capital': 116450466574,
 'Mineable': False,
 'Price': 6815.12,
 'Volume': 5764220000,
 '_id': ObjectId('5b1e3f5a50e51018ac83807d')}
{'Circulating Supply': 100004196,
 'Currency_Name': 'Ethereum',
 'Market-Capital': 53432241789,
 'Mineable': False,
 'Price': 534.3,
 'Volume': 2287810000,
 '_id': ObjectId('5b1e3f5a50e51018ac83807e')}
{'Circulating Supply': 39244312603,
 'Currency_Name': 'Ripple',
 'Market-Capital': 22916559367,
 'Mineable': True,
 'Price': 0.583946,
 'Volume': 491792000,
 '_id': ObjectId('5b1e3f5a50e51018ac83807f')}
{'Circulating Supply': 17177400,
 'Currency_Name': 'Bitcoin Cash',
 'Market-Capital': 16172848471,
 'Mineable': False,
 'Price': 941.52,
 'Volume': 779455000,
 '_id': ObjectId('5b1e3f5a50e51018ac838080')}
{'Circulating Supply': 896149492,
 'Currency_Name': 'EOS',
 'Market-Capital': 10082577936,
 'Mineable': True,
 'Price': 11.25,
 'Volume': 2468430000,
 '_id': ObjectId('5b1e3f5a50e51018ac8