# IPDS API Sample

<hr>

In [1]:
from bs4 import BeautifulSoup
import requests
from requests_ntlm import HttpNtlmAuth
import getpass

#### IPDS URLs 

In [2]:
cost_center_url = 'https://ipds.usgs.gov/_vti_bin/Listdata.svc/CostCenters()'

data_releases_url = 'https://ipds.usgs.gov/_vti_bin/Listdata.svc/InformationProduct()?$filter=ProductTypeValue%20eq%20%27Data%20Release%27'

### Authentication

In [3]:
p = getpass.getpass()
s = requests.Session()
# Need to change this to your AD username
s.auth = HttpNtlmAuth('GS\\bserna', p, s)

········


In [4]:
# Store records from get_ipds_records function 
record_list = []


def get_center_list(session, cost_center_url):
    content = session.get(cost_center_url)
    soup = BeautifulSoup(content.text, "lxml-xml")
    center_list = []
    for entry in soup.find_all('entry'):
        center_dict = {}
        center_dict['name'] = entry.find('Name').string
        center_dict['id'] = int(entry.find('Id').string)
        center_list.append(center_dict)
    return center_list


def get_ipds_records(session, data_releases_url):
    content = session.get(data_releases_url)
    soup = BeautifulSoup(content.text, "lxml-xml")
    for entry in soup.find_all('entry'):
        record = {}
        record['working_title'] = entry.find('WorkingTitle').string
        record['ip_number'] = entry.find('IPNumber').string
        record['final_title'] = entry.find('FinalTitle').string
        record['abstract'] = entry.find('Abstract').string
        record['DOI'] = entry.find('DigitalObjectIdentifier').string
        record['IPDS_state'] = entry.find('IPDSReviewProcessStateValue').string
        record['citation'] = entry.find('Citation').string
        record['mission_area'] = entry.find('USGSMissionAreaValue').string
        record['created'] = entry.find('Created').string
        record['summary'] = entry.find('ProductSummary').string
        record_list.append(record)
        #print record

In [5]:
center_list = get_center_list(s, cost_center_url)
ipds_records = get_ipds_records(s, data_releases_url)

#### Get DOIs

In [6]:
def get_dois(record_list):
    # List to return the found DOIs only 
    all_dois = []
    
    for item in record_list:
        if item['DOI']:
            all_dois.append(item['DOI'])
    
    return all_dois
            
all_dois = get_dois(record_list)

#### Clean DOI Formatting

In [7]:
def doi_cleaning(all_dois):
    # List to return of cleaned DOIs 
    cleaned = []

    for doi in all_dois:
        doi = doi.strip()

        if doi.startswith('doi: '):
            cleaned.append(doi[5:])

        elif doi.startswith('doi:'):
            cleaned.append(doi[4:])

        elif doi.endswith('.'):
            all_dois.append(doi[:-1])

        elif doi.startswith('http:/d'):
            cleaned.append(doi[17:])
            
        elif doi.startswith('http://'):
            cleaned.append(doi[18:])
            
        elif doi[0] == ' ':
            cleaned.append(doi[1:])
            
        else:
            cleaned.append(doi)
    
    return cleaned

In [8]:
cleaned = doi_cleaning(all_dois)
cleaned

[u'10.5066/F7862DJ2',
 u'10.5066/F7KD1VZ9',
 u'10.5066/F7Q52MNK',
 u'10.5066/F7PG1PT2',
 u'10.5066/F7G44NB6',
 u'10.5066/F78W3BC8',
 u'10.5066/F7HX19RJ',
 u'10.5066/F7RX9954',
 u'10.5066/F78K7749',
 u'10.5066/F7NK3C4V',
 u'10.5066/F7JW8BXT',
 u'10.5066/F7RF5S3P',
 u'10.5066/F74747XS',
 u'10.5066/F74747XS',
 u'10.5066/F7Z60M4M',
 u'10.5066/F798853M',
 u'10.5066/F75H7DBF',
 u'10.5066/F77H1GP3',
 u'10.5066/F76971PM',
 u'10.5066/F7D798GR',
 u'10.5066/F7VX0DMQ',
 u'10.5066/F7542KP9',
 u'10.5066/F7K35RRS']

#### Verify which DOIs are resolving successfully to sciencebase.gov

In [9]:
def sb_hosted(cleaned):
    in_sb = []
    
    for doi in cleaned:
        get_url = 'https://dx.doi.org/' + doi

        r = requests.get(get_url)
        url = r.url
        if url.startswith('https://www.sciencebase.gov/'):
            in_sb.append(url)
    return in_sb

In [10]:
sb_dois = sb_hosted(cleaned)
sb_dois

[u'https://www.sciencebase.gov/catalog/item/56c6303ee4b0946c65228796',
 u'https://www.sciencebase.gov/catalog/item/55f998b7e4b05d6c4e50146f',
 u'https://www.sciencebase.gov/catalog/item/5547a069e4b0a658d794d1f1',
 u'https://www.sciencebase.gov/catalog/item/567ad1d7e4b0a04ef490fd8b',
 u'https://www.sciencebase.gov/catalog/item/564bafdce4b0ebfbef0d3322',
 u'https://www.sciencebase.gov/catalog/item/560997dae4b058f706e53544',
 u'https://www.sciencebase.gov/catalog/item/56c235f5e4b0946c651fc290',
 u'https://www.sciencebase.gov/catalog/item/56a79216e4b0b28f1184d8d8']