# Web scraping of the Algerian MSPRH COVID-19 Dashboard on arcgis.com

API Reference : [https://developers.arcgis.com/labs/rest/query-a-feature-layer/](https://developers.arcgis.com/labs/rest/query-a-feature-layer/)

## URL to fetch

In [78]:
import requests
import json
import pandas as pd

In [1]:
# Define the request URL
url_cumul_global  = 'https://services8.arcgis.com/yhz7DEAMzdabE4ro/arcgis/rest/services/COVID_Death_Cumul/FeatureServer/2/query?f=json&where=1%3D1&returnGeometry=false&spatialRel=esriSpatialRelIntersects&outFields=*&orderByFields=Report%20asc&outSR=102100&resultOffset=0&resultRecordCount=1000&cacheHint=true'

In [14]:
# Send the request
cumul_global_response = requests.get(url_cumul_global)

## Check response

In [15]:
cumul_global_response.ok

True

In [16]:
# Status code should be 200 (OK)
cumul_global_response.status_code

200

In [None]:
cumul_global_response.text

In [None]:
cumul_global_response.content

## Handling json

In [20]:
# create a python variable from the json
cumul_global = cumul_global_response.json()

In [23]:
cumul_global.keys()

dict_keys(['objectIdFieldName', 'uniqueIdField', 'globalIdFieldName', 'geometryType', 'spatialReference', 'fields', 'features'])

In [25]:
print(json.dumps(cumul_global, indent = 4))

{
    "objectIdFieldName": "OBJECTID",
    "uniqueIdField": {
        "name": "OBJECTID",
        "isSystemMaintained": true
    },
    "globalIdFieldName": "GlobalID",
    "geometryType": "esriGeometryPoint",
    "spatialReference": {
        "wkid": 102100,
        "latestWkid": 3857
    },
    "fields": [
        {
            "name": "OBJECTID",
            "type": "esriFieldTypeOID",
            "alias": "OBJECTID",
            "sqlType": "sqlTypeOther",
            "domain": null,
            "defaultValue": null
        },
        {
            "name": "NOM_WILAYA",
            "type": "esriFieldTypeString",
            "alias": "NOM_WILAYA",
            "sqlType": "sqlTypeOther",
            "length": 30,
            "domain": null,
            "defaultValue": null
        },
        {
            "name": "GlobalID",
            "type": "esriFieldTypeGlobalID",
            "alias": "GlobalID",
            "sqlType": "sqlTypeOther",
            "length": 38,
            "domain"

In [29]:
len(cumul_global['features'])

39

In [54]:
new_cases = []
new_cases = [(w['attributes']) for w in cumul_global['features']]
new_cases[15]

{'OBJECTID': 16,
 'NOM_WILAYA': 'ANNABA',
 'GlobalID': '1c354592-4a7d-4145-8891-e1c77d2ade50',
 'CreationDate': 1585127268468,
 'Creator': 'covid19dz',
 'EditDate': 1585400540828,
 'Editor': 'covid19dz',
 'Report': 1584831600000,
 'Cumul': 201,
 'Death_cumul': 17,
 'gueris': 23,
 'an': None,
 'unquatorze': None,
 'vingtquatre': None,
 'quaranteneuf': None,
 'cinquanteneuf': None,
 'soixante': None,
 'Féminin': None,
 'Masculin': None,
 'Testpositif': None,
 'Testnegatif': None,
 'NP': None,
 'Straitem': None,
 'Reanim': None,
 'New_cases': 62}

In [57]:
new_cases[15]['New_cases']

62

# URL with params

In [66]:
base_url  = 'https://services8.arcgis.com/yhz7DEAMzdabE4ro/arcgis/rest/services/COVID_Death_Cumul/FeatureServer/2/query?'
params = {'f' : 'json', 'where' : '1=1', 'returnGeometry' : 'false', 'spatialRel' : 'esriSpatialRelIntersects',
          'outFields' : '*', 'orderByFields' : 'Report asc', 'outSR' : '102100', 'resultOffset' : '0',
          'resultRecordCount' : '1000', 'cacheHint' : 'true'}

In [67]:
r = requests.get(base_url, params)

In [68]:
r.status_code

200

In [71]:
cumul = r.json()

In [75]:
len(cumul['features'])

39

In [77]:
cumul['features'][15]

{'attributes': {'OBJECTID': 16,
  'NOM_WILAYA': 'ANNABA',
  'GlobalID': '1c354592-4a7d-4145-8891-e1c77d2ade50',
  'CreationDate': 1585127268468,
  'Creator': 'covid19dz',
  'EditDate': 1585400540828,
  'Editor': 'covid19dz',
  'Report': 1584831600000,
  'Cumul': 201,
  'Death_cumul': 17,
  'gueris': 23,
  'an': None,
  'unquatorze': None,
  'vingtquatre': None,
  'quaranteneuf': None,
  'cinquanteneuf': None,
  'soixante': None,
  'Féminin': None,
  'Masculin': None,
  'Testpositif': None,
  'Testnegatif': None,
  'NP': None,
  'Straitem': None,
  'Reanim': None,
  'New_cases': 62}}

# Importing to Pandas

In [None]:
print(json.dumps(cumul_global['features'], indent = 4))

In [94]:
cumul['features'][15]['attributes'].keys()

dict_keys(['OBJECTID', 'NOM_WILAYA', 'GlobalID', 'CreationDate', 'Creator', 'EditDate', 'Editor', 'Report', 'Cumul', 'Death_cumul', 'gueris', 'an', 'unquatorze', 'vingtquatre', 'quaranteneuf', 'cinquanteneuf', 'soixante', 'Féminin', 'Masculin', 'Testpositif', 'Testnegatif', 'NP', 'Straitem', 'Reanim', 'New_cases'])

In [113]:
attributes = [f['attributes'] for f in cumul['features']]

In [119]:
df = pd.DataFrame(attributes)

In [121]:
df.columns.values

array(['OBJECTID', 'NOM_WILAYA', 'GlobalID', 'CreationDate', 'Creator',
       'EditDate', 'Editor', 'Report', 'Cumul', 'Death_cumul', 'gueris',
       'an', 'unquatorze', 'vingtquatre', 'quaranteneuf', 'cinquanteneuf',
       'soixante', 'Féminin', 'Masculin', 'Testpositif', 'Testnegatif',
       'NP', 'Straitem', 'Reanim', 'New_cases'], dtype=object)

In [115]:
df.head()

Unnamed: 0,OBJECTID,NOM_WILAYA,GlobalID,CreationDate,Creator,EditDate,Editor,Report,Cumul,Death_cumul,...,cinquanteneuf,soixante,Féminin,Masculin,Testpositif,Testnegatif,NP,Straitem,Reanim,New_cases
0,1,ADRAR,a2c41c84-1e1e-44f9-93fe-b91d0d4c7251,1585127000000.0,covid19dz,1585402000000.0,covid19dz,1582660800000,1,0,...,,,,,,,,,,
1,2,CHLEF,17b3e898-7574-4ef4-9dd0-d99e0630cba2,1585127000000.0,covid19dz,1585402000000.0,covid19dz,1582930800000,3,0,...,,,,,,,,,,2.0
2,3,LAGHOUAT,fc47e77a-fc64-41d1-82ad-9b03cf370e0c,1585127000000.0,covid19dz,1585402000000.0,covid19dz,1583190000000,5,0,...,,,,,,,,,,3.0
3,4,OUM EL BOUAGHI,37d4c6b3-1248-416a-8c83-36595ae22cb3,1585127000000.0,covid19dz,1585402000000.0,covid19dz,1583276400000,17,0,...,,,,,,,,,,12.0
4,5,BATNA,302927cd-98df-477f-af5d-404a36081c9d,1585127000000.0,covid19dz,1585402000000.0,covid19dz,1583535600000,19,0,...,,,,,,,,,,2.0


In [None]:
# save the data

In [None]:
df.to_csv('cumul_covid19-dz.csv', index = False, header = True)

# POST request

In [152]:
recipe = {'f' : 'json', 'where' : '1=1', 'returnGeometry' : 'false', 'spatialRel' : 'esriSpatialRelIntersects',
          'outFields' : '*', 'orderByFields' : 'Report asc', 'outSR' : '102100', 'resultOffset' : '0',
          'resultRecordCount' : '1000', 'cacheHint' : 'true'}

In [158]:
headers = {'Content-Type' : 'application/x-www-form-urlencoded', 
           'Accept' : 'application/x-www-form-urlencoded'}

In [159]:
p = requests.post(base_url, headers = headers, json = recipe)

In [160]:
p.status_code

200

In [None]:
p.text