## Exercise 2. Update Metadata on a Dataset
Let's use the [Metadata API](https://socratametadataapi.docs.apiary.io/#) to add more information to our dataset.

## Import Libraries

In [3]:
import json
import os
import pandas as pd
import requests

## Setup Authentication
### Enter Education Credentials

Also, it is pow possible to set up API Keys and pass key id and secret values as Socrata user name and password or [api keys](https://socrataapikeys.docs.apiary.io) respectively

In [4]:
domain = 'macondointernal.demo.socrata.com'

user_name = ''
password = ''

# how you would pass credentials stored in environment variables
# user_name = os.environ['SOCRATA_KEY_ID']
# password = os.environ['SOCRATA_KEY_SECRET']

# Enter dataset id created from Exercise 1
dataset_id = 'nump-7qcy'

# URL to metadata for any asset
meta_url = 'https://' + domain + '/api/views/metadata/v1/' + dataset_id

dataset_description = 'Median Household Income - American Community Survey 5 Year Estimates for all Arizona Places from 2011-2017'

## Make HTTP request for existing metadata
- Include authentication because dataset is private by default

In [5]:
req = requests.get(meta_url, auth=(user_name,password))

meta = req.text
print(meta)

{
  "id" : "nump-7qcy",
  "name" : "Arizona Places Median Household Income",
  "attribution" : null,
  "attributionLink" : null,
  "category" : null,
  "createdAt" : "2019-04-08T00:22:30+0000",
  "dataUpdatedAt" : "2019-04-08T00:23:16+0000",
  "dataUri" : "https://macondointernal.demo.socrata.com/resource/nump-7qcy",
  "description" : "source = pandas",
  "domain" : "macondointernal.demo.socrata.com",
  "externalId" : null,
  "hideFromCatalog" : false,
  "hideFromDataJson" : false,
  "license" : null,
  "metadataUpdatedAt" : "2019-04-08T00:23:15+0000",
  "provenance" : "OFFICIAL",
  "updatedAt" : "2019-04-08T00:23:15+0000",
  "webUri" : "https://macondointernal.demo.socrata.com/d/nump-7qcy",
  "approvals" : [ {
    "notes" : "",
    "state" : "pending",
    "submissionId" : 343475,
    "submissionObject" : "public_audience_request",
    "submissionOutcome" : "change_audience",
    "submittedAt" : 1554682997,
    "workflowId" : 504,
    "submissionDetails" : {
      "permissionType" : "

## Make HTTP Patch request to update dataset description
- Create the JSON and encode it

In [6]:
# update Description using Patch method which allows updates for selected properties
payload = {"description": dataset_description}
json_data = json.dumps(payload)

req_update = requests.patch(meta_url, json_data, auth=(user_name,password))
meta_new = req_update.text
print(meta_new)

{
  "action" : "modify",
  "metadata" : {
    "id" : "nump-7qcy",
    "name" : "Arizona Places Median Household Income",
    "attribution" : null,
    "attributionLink" : null,
    "category" : null,
    "createdAt" : "2019-04-08T00:22:30+0000",
    "dataUpdatedAt" : "2019-04-08T00:23:16+0000",
    "dataUri" : "https://macondointernal.demo.socrata.com/resource/nump-7qcy",
    "description" : "Median Household Income - American Community Survey 5 Year Estimates for all Arizona Places from 2011-2017",
    "domain" : "macondointernal.demo.socrata.com",
    "externalId" : null,
    "hideFromCatalog" : false,
    "hideFromDataJson" : false,
    "license" : null,
    "metadataUpdatedAt" : "2019-04-08T00:34:33+0000",
    "provenance" : "OFFICIAL",
    "updatedAt" : "2019-04-08T00:34:33+0000",
    "webUri" : "https://macondointernal.demo.socrata.com/d/nump-7qcy",
    "approvals" : [ {
      "notes" : "",
      "state" : "pending",
      "submissionId" : 343475,
      "submissionObject" : "publ

## Update multiple metadata properties by iterating through a file
- `/data/dataset_metadata.xlsx` contains dataset id and other metadata properties for a dataset that can be suitable  for bulk updates
- Set NaNs to blanks

Let's update https://alicia.data.socrata.com/dataset/Arizona-Places-Median-Household-Income/9abs-ubh5 with many properties.

In [9]:
metadata = pd.read_excel('../data/dataset_metadata.xlsx')
metadata = metadata.fillna('')
metadata.head()

Unnamed: 0,dataset_id,name,category,description,attribution,attributionLink,tags
0,nump-7qcy,Arizona Places Median Household Income,Demographics,Median Household Income - American Community S...,Census,https://www.census.gov/programs-surveys/acs/,
1,ebv5-4bfy,Arizona Places Median Household Income,Business,Median Household Income - American Community S...,Census,https://www.census.gov/programs-surveys/acs/,income
2,cfvy-xet3,Arizona Places Median Household Income,Demographics,Median Household Income - American Community S...,,,"income, household income, prosperity"


In [10]:
for idx,row in metadata.iterrows():
    dataset_id = row['dataset_id']
    meta_url = 'https://' + domain + '/api/views/metadata/v1/' + dataset_id
    
    payload = dict()
    payload['name'] = row['name']
    payload['description'] = row['description']
    payload['category'] = row['category']

    # if there are multiple keywords, split them by comma
    if(row['tags'] != ''):
        payload['tags'] = row['tags'].split(',')
    else:
        payload['tags'] = []
    
    # empty links return validation error
    if(row['attributionLink']!=''):
        payload['attributionLink'] = row['attributionLink']
    
    # encode json
    json_data = json.dumps(payload)
    # print(json_data)
    
    req_update = requests.patch(meta_url, json_data, auth=(user_name,password))
    meta_new = req_update.text
    print(meta_new)

{
  "action" : "modify",
  "metadata" : {
    "id" : "nump-7qcy",
    "name" : "Arizona Places Median Household Income",
    "attribution" : null,
    "attributionLink" : "https://www.census.gov/programs-surveys/acs/",
    "category" : "Demographics",
    "createdAt" : "2019-04-08T00:22:30+0000",
    "dataUpdatedAt" : "2019-04-08T00:23:16+0000",
    "dataUri" : "https://macondointernal.demo.socrata.com/resource/nump-7qcy",
    "description" : "Median Household Income - American Community Survey 5 Year Estimates for all Arizona Places from 2011-2017",
    "domain" : "macondointernal.demo.socrata.com",
    "externalId" : null,
    "hideFromCatalog" : false,
    "hideFromDataJson" : false,
    "license" : null,
    "metadataUpdatedAt" : "2019-04-08T00:38:24+0000",
    "provenance" : "OFFICIAL",
    "updatedAt" : "2019-04-08T00:38:24+0000",
    "webUri" : "https://macondointernal.demo.socrata.com/d/nump-7qcy",
    "approvals" : [ {
      "notes" : "",
      "state" : "pending",
      "subm