In [6]:
#!/usr/bin/python3
# coding: utf-8

import requests
from bs4 import BeautifulSoup
from scrapy import Selector
import csv
import datetime

#pip3 install --user bs4
#pip3 install --user scrapy

def extract(url):
    print("Export all cryptodata from cryptomarketcap.com")
    """
    USAGE:
    Arguments:
         url (str):
            url of the aimed Coinmarketcap page
    Returns:
        .csv file
    """

    # Initialization
    r = requests.session()
    start = datetime.datetime.now()

    #retry if site is inaccessible
    for retry in range(10):
        response = r.get(url=url)

        print("response is: ")
        print(response.headers)
        print("-- STATUS CODE --")
        print(response.status_code)

        #I gave up on maintaining this code since screen-scraping is against the terms of coinmarketcap.com 
        #and they work very hard to prevent that.  Sustainable solution was for me to make an account with 
        #coinbase.com and use the developer's API key and there's some python code to request ohlcv daily prices
        
        #The requets, scrapy and BeautifulSoup isn't strong enough for this job.
        #They obfuscate the html every few months anyway, to throw you off.
        print("now do parsing")
        if response.status_code == 200:
            print("response code is good")

            #with open("/path/to/coinmarketcap/cryptocurrencies_{}.csv".format(str(datetime.date.today())), "w") as f:
            with open("crypto/cryptocurrencies_{}.csv".format(str(datetime.date.today())), "w") as f:

                fieldnames = ['Nom', 'Symbole', 'Cap. marche', 'Prix', 'Offre en circulation', 'Volume (24h)', '% 1h', '% 24h', '7 j']
                writer = csv.DictWriter(f, fieldnames=fieldnames, delimiter='\t')
                writer.writeheader()

                #print("response.text is")

                soup = BeautifulSoup(response.text, features='html.parser')
                sel = Selector(text=soup.prettify())

                #cryptos = sel.xpath("//tr[contains(@id, 'id-')]").extract()
               
                cryptos = sel.xpath("//tr[contains(@class, 'cmc-table-row')]").extract()
                print("cryptos list: '" + str(cryptos) + "'")


                for crypto in cryptos:
                    soup = BeautifulSoup(crypto, features='html.parser')
                    sel = Selector(text=soup.prettify())
                    print("found a crypto: " + str(crypto))
                    #nom = sel.xpath("//td[contains(@class, 'currency-name')]/@data-sort").extract_first()
                    nom = sel.xpath("//a[contains(@class, 'cmc-table__column-name--name')]/text()").extract_first()
                    symbole = sel.xpath("//td[contains(@class, 'col-symbol')]/text()").extract_first()
                    cap_marche = sel.xpath("//td[contains(@class, 'market-cap')]/text()").extract_first()
                    prix = sel.xpath("//a[@class='price']/@data-usd").extract_first()
                    offre_circulation = sel.xpath("//a[@class='volume']/@data-usd").extract_first()
                    volume = sel.xpath("//td[contains(@class, 'circulating-supply')]/@data-sort").extract_first()
                    percent_1h = sel.xpath("//td[@data-timespan='1h']/@data-sort").extract_first()
                    percent_24h = sel.xpath("//td[@data-timespan='24h']/@data-sort").extract_first()
                    percent_7j = sel.xpath("//td[@data-timespan='7d']/@data-sort").extract_first()

                    clean_values = []
                    values = [nom, symbole, cap_marche, prix, offre_circulation, volume, percent_1h, percent_24h, percent_7j]
                    for value in values:
                        if value:
                            value = value.strip().replace('\n', '')
                        clean_values.append(value)

                    #print(', '.join(clean_values))

                    dict_row = dict(zip(fieldnames, clean_values))
                    writer.writerow(dict_row)

            # amount of time elapsed
            end = datetime.datetime.now()
            time_elapsed = str(end - start)
            print('\n')
            print('-- TIME ELAPSED --')
            print(time_elapsed)
            break

        elif response.status_code == 404:
            print("Page indisponible")
            break

        else:
            print("Can't load page.")
            return []


def main():
    #url = "https://coinmarketcap.com/fr/all/views/all/"
    url = "https://coinmarketcap.com/all/views/all/"
    print("url: '" + str(url) + "'")
    extract(url)


if __name__ == '__main__':
    main()



url: 'https://coinmarketcap.com/all/views/all/'
Export all cryptodata from cryptomarketcap.com
response is: 
{'Content-Type': 'text/html; charset=utf-8', 'Transfer-Encoding': 'chunked', 'Connection': 'keep-alive', 'Date': 'Sat, 14 May 2022 15:58:51 GMT', 'Server': 'Tengine', 'bnc-cache-proxy-expire-time': '1652543986', 'bnc-cache-proxy-rewrite': '', 'bnc-cache-proxy-type': 'redis-hit', 'Cache-Control': 'private, no-cache, no-store, max-age=0, must-revalidate', 'Content-Encoding': 'gzip', 'Content-Language': 'en', 'ETag': '"86769-AczYbCkOj/blyiFO1ERMKAsV7OA"', 'x-envoy-upstream-service-time': '0', 'x-traefik-route': 'coinmarketcap-next', 'x-envoy-decorator-operation': 'cache-proxy.cache-proxy.svc.cluster.local:80/*', 'X-Permitted-Cross-Domain-Policies': 'none', 'Content-Security-Policy': "frame-ancestors 'self' https://ss.datasconsole.com;", 'X-Frame-Options': 'SAMEORIGIN', 'X-Xss-Protection': '1; mode=block', 'X-Content-Type-Options': 'nosniff', 'Referrer-Policy': 'origin-when-cross-or

found a crypto: <tr class="s395gx-1 eChPfw cmc-table-row">
              <td>
              </td>
              <td class="name-cell">
               <span class="image-placeholder">
               </span>
               <a class="cmc-link" href="/currencies/1inch/">
                1inch Network
               </a>
              </td>
              <td colspan="999" style="height:44px">
              </td>
             </tr>
found a crypto: <tr class="s395gx-1 eChPfw cmc-table-row">
              <td>
              </td>
              <td class="name-cell">
               <span class="image-placeholder">
               </span>
               <a class="cmc-link" href="/currencies/kava/">
                Kava
               </a>
              </td>
              <td colspan="999" style="height:44px">
              </td>
             </tr>
found a crypto: <tr class="s395gx-1 eChPfw cmc-table-row">
              <td>
              </td>
              <td class="name-cell">
               