In [5]:
import requests
import json
import sqlite3
import time
import pandas as pd
import tabulate
import re

In [6]:
def locationToAddress(raw_location, raw_state):
    clean_location = (raw_location + ' ' + raw_state).replace(' ', '%20')
    location_url = f'https://maps.googleapis.com/maps/api/place/findplacefromtext/json?input={clean_location}&inputtype=textquery&fields=formatted_address,place_id&key=AIzaSyC51Ve7T4wNc_VmP9WwBzrNXwXHA-V6Hak'
    print(location_url)
    req = requests.get(location_url)
    data = json.loads(req.text)
    try:
        final_address = data['candidates'][0]['formatted_address']
        place_id = data['candidates'][0]['place_id']
        phone_number = locationToPhoneNumber(place_id)
    except:
        final_address = '128 Graylyn Drive, Chapel Hill, NC 27516'
        phone_number = '+1 678-793-7360'
    
    return [final_address, phone_number]

def locationToPhoneNumber(place_id):
    phone_url = f'https://maps.googleapis.com/maps/api/place/details/json?place_id={place_id}&fields=international_phone_number&key=AIzaSyC51Ve7T4wNc_VmP9WwBzrNXwXHA-V6Hak'
    req = requests.get(phone_url)
    data = json.loads(req.text)
    if 'international_phone_number' in data['result'].keys():
        return data['result']['international_phone_number']  
    else:
        return '+1 678-793-7360'

def locationToCoords(raw_location, raw_state):
    refined_location = re.sub('[^A-Za-z0-9 ]+', '', raw_location)
    refined_location = (refined_location + ' ' + raw_state).replace(' ', '+')
    url = f'https://maps.googleapis.com/maps/api/geocode/json?address={refined_location}&key=AIzaSyC51Ve7T4wNc_VmP9WwBzrNXwXHA-V6Hak'
    req = requests.get(url)
    data = json.loads(req.text)
    lat = data['results'][0]['geometry']['location']['lat']
    lng = data['results'][0]['geometry']['location']['lng']
    return lat, lng

def conditionsToString(conditions_list):
    conditions_string = ""
    for i in range (0, len(conditions_list)):
        if i == len(conditions_list) - 1:
            conditions_string += conditions_list[i]
        else:
            conditions_string += (conditions_list[i] + ", ")
    return conditions_string

def getData(nct_id, fields):
    url = f'https://clinicaltrials.gov/api/query/study_fields?expr={nct_id}&fields=NCTId%2CBriefTitle%2CCondition%2COrgFullName%2CBriefSummary%2CLocationCity%2CLocationContactEMail%2CLocationContactName%2CLocationContactPhone%2CLocationContactPhoneExt%2CLocationContactRole%2CLocationCountry%2CLocationFacility%2CLocationState%2CLocationStatus%2CLocationZip&min_rnk=1&max_rnk=&fmt=json'
    req = requests.get(url)
    data = json.loads(req.text)
    res = {}
    for field in fields:
        tmp = data["StudyFieldsResponse"]["StudyFields"][0][field]
        res[field] = tmp[0] if tmp else ""
    res['website'] = f'https://clinwiki.org/{nct_id}' 
    if res['LocationContactPhone']:
        res['phoneNum'] = res['LocationContactPhone']
    if res['LocationFacility'] != '':
        address_and_phone = locationToAddress(res['LocationFacility'], res['LocationState'])
        res['address'] = address_and_phone[0]
        res['phoneNum'] = address_and_phone[1]
        res['latitude'], res['longitude'] = locationToCoords(res['LocationFacility'], res['LocationState'])
    else:
        res['address'] = '128 Graylyn Drive, Chapel Hill, NC 27516'
        res['phoneNum'] = '+1 678-793-7360'
        res['latitude'], res['longitude'] = 35.882250, -79.071450 
    res['condition'] = conditionsToString(data['StudyFieldsResponse']['StudyFields'][0]['Condition'])
    res['facilityName'] = res['OrgFullName']
    res['description'] = res['BriefTitle']
    return res



In [7]:
fields = [
         "NCTId",
         "BriefTitle",
         "Condition",
         "OrgFullName",
         "BriefSummary",
         #"LocationCity",
         #"LocationContactEMail",
         #"LocationContactName",
         "LocationContactPhone",
         #"LocationContactPhoneExt",
         #"LocationContactRole",
         #"LocationCountry",
         "LocationFacility",
         "LocationState",
         #"LocationStatus",
         #"LocationZip"
      ]
df = pd.DataFrame(columns = fields)
#ids = ["NCT00001208","NCT01874691"]
with open ('/Users/adamkim/Desktop/nctid_and_conditions.json') as f:
    data = json.load(f)

counter = 0
for id, conditions in data.items():
    print(f'Fetching {id}')
    new_row = getData(id, fields)
    df = df.append(new_row, ignore_index=True)
    counter += 1
    if counter > 5:
        break

cols = ['NCTId', 'BriefTitle', 'Condition', 'OrgFullName', 'BriefSummary', 'LocationContactPhone', 
        'LocationFacility', 'LocationState']
df = df.drop(cols, axis=1)

print(df.to_markdown())
df.to_csv('test.csv', encoding='utf-8', index=False)

Fetching NCT00000105
https://maps.googleapis.com/maps/api/place/findplacefromtext/json?input=Division%20of%20Hematology,%20Oncology,%20and%20Transplantation%20420%20Delaware%20St.,%20SE,%20Box%20806%20Mayo%20Minnesota&inputtype=textquery&fields=formatted_address,place_id&key=AIzaSyC51Ve7T4wNc_VmP9WwBzrNXwXHA-V6Hak
Fetching NCT00000111
Fetching NCT00000118
Fetching NCT00000134
Fetching NCT00000135
Fetching NCT00000136
|    | address                                                  | condition                                                                     | description                                                                                            | facilityName                                    |   latitude |   longitude | phoneNum        | website                          |
|---:|:---------------------------------------------------------|:------------------------------------------------------------------------------|:----------------------------------------------------

In [101]:
print(locationToAddress('University of Michigan'))


University of Michigan
{'a': '500 S State St, Ann Arbor, MI 48109, United States', 'b': '+1 734-764-1817'}
['500 S State St, Ann Arbor, MI 48109, United States', '+1 734-764-1817']


In [69]:
place_id = 'ChIJARfe5ziuPIgR45fpeFFOoVs'
hours_url = f'https://maps.googleapis.com/maps/api/place/details/json?place_id={place_id}&fields=name&key=AIzaSyC51Ve7T4wNc_VmP9WwBzrNXwXHA-V6Hak'
hours_req = requests.get(hours_url)
hours_data = json.loads(req.text)
print(hours_data)

{'candidates': [{'formatted_address': '140 George St, The Rocks NSW 2000, Australia'}], 'status': 'OK'}
