In [2]:
import requests
import json

def fetch_and_process_persons():
    url = 'https://experts.illinois.edu/ws/api/524/persons'

    # Adjusted fields parameter to include details explicitly
    fields = 'uuid,externalId,name.firstName,name.lastName,staffOrganisationAssociations.organisationalUnit.name.text.value,profileInformations.value.text.value'

    # Headers to request a JSON response
    headers = {'Accept': 'application/json'}

    # Initialize the list to hold all refined person information
    all_refined_info = []

    # Initial parameters for pagination
    params = {
        'apiKey': '5fb8492f-6462-49fd-b3dd-69c896549ff8',
        'size': 500,  # Adjust as per the API's limits and requirements
        'offset': 0,
        'fields': fields
    }

    total_persons = 2971  
    while params['offset'] < total_persons:
        response = requests.get(url, headers=headers, params=params)
        if response.status_code == 200:
            data = response.json()
            items = data.get('items', [])

            for item in items:
                # Construct full name and extract information
                full_name = f"{item.get('name', {}).get('firstName', '')} {item.get('name', {}).get('lastName', '')}".strip()
                organisational_units = [ou.get('organisationalUnit', {}).get('name', {}).get('text', [{}])[0].get('value', 'N/A')
                                        for ou in item.get('staffOrganisationAssociations', [])
                                        if ou.get('organisationalUnit')]

                research_interests = "N/A"
                profile_info = item.get('profileInformations', [])
                if profile_info:
                    research_interests = profile_info[0].get('value', {}).get('text', [{}])[0].get('value', 'N/A')

                person_info = {
                    'uuid': item.get('uuid', 'N/A'),
                    'email': item.get('externalId', 'N/A'),
                    'name': full_name,
                    'organization': organisational_units if organisational_units else ['N/A'],
                    'about': research_interests
                }

                all_refined_info.append(person_info)

            params['offset'] += params['size']
        else:
            print(f"Failed to retrieve data. Status code: {response.status_code}, Response content: {response.text}")
            break

    # Convert and save or print the refined person information
    refined_json = json.dumps(all_refined_info, indent=4)
    filename = 'researchers.json'
    with open(filename, 'w') as f:
        f.write(refined_json)
    print(f"Refined person data saved to {filename}")

# Call the function to start fetching and processing person information
fetch_and_process_persons()


Refined person data saved to researchers.json


In [24]:
import pandas as pd
import requests
import json

In [35]:
file_path = 'researchers.json'
with open(file_path, 'r') as file:
    data = json.load(file)

df = pd.DataFrame(data)

df['organization'] = df['organization'].apply(lambda units: list(set(units)))

df.head()

Unnamed: 0,uuid,email,name,organization,about
0,11b4c73a-d90d-4ef5-85d1-b37da6a64270,abbamont@illinois.edu,Peter Michael Abbamonte,"[Materials Research Lab, Physics]",
1,ff337faa-24e5-4cb1-be6b-07285ec7b49a,ahdyson@illinois.edu,Anne Haas Dyson,"[Education Policy, Organization and Leadership]",<p>Anne Haas Dyson is a former teacher of youn...
2,6f5808b8-fe1c-49a9-90b8-0cf4980d371b,agewirth@illinois.edu,Andrew A Gewirth,"[Materials Research Lab, Chemistry]",<p><span>Professor Andrew A. Gewirth received ...
3,395244cc-d551-417a-b3b5-85b455a2dcc9,aiwillis@illinois.edu,Arlette I Willis,[Curriculum and Instruction],
4,094081a5-552c-48ab-b076-0c9f316002c5,aleggett@illinois.edu,Anthony J Leggett,[Physics],


In [36]:
name_counts = {}

def update_name(name):
    if name in name_counts:
        name_counts[name] += 1
        return f"{name} {name_counts[name]}"
    else:
        name_counts[name] = 1
        return name

df['name'] = df['name'].apply(update_name)

In [41]:
df[df['email'] == 'mem12@illinois.edu']

Unnamed: 0,uuid,email,name,organization,about


In [38]:
df['organization'] = df['organization'].apply(lambda x: ', '.join(x) if isinstance(x, list) else x)
df['about'] = df['about'].str.replace('<[^>]+>', '', regex=True)

df.head()

Unnamed: 0,uuid,email,name,organization,about
0,11b4c73a-d90d-4ef5-85d1-b37da6a64270,abbamont@illinois.edu,Peter Michael Abbamonte,"Materials Research Lab, Physics",
1,ff337faa-24e5-4cb1-be6b-07285ec7b49a,ahdyson@illinois.edu,Anne Haas Dyson,"Education Policy, Organization and Leadership",Anne Haas Dyson is a former teacher of young c...
2,6f5808b8-fe1c-49a9-90b8-0cf4980d371b,agewirth@illinois.edu,Andrew A Gewirth,"Materials Research Lab, Chemistry",Professor Andrew A. Gewirth received his A.B. ...
3,395244cc-d551-417a-b3b5-85b455a2dcc9,aiwillis@illinois.edu,Arlette I Willis,Curriculum and Instruction,
4,094081a5-552c-48ab-b076-0c9f316002c5,aleggett@illinois.edu,Anthony J Leggett,Physics,


In [39]:
file_path = 'people.tsv'
df.to_csv(file_path, sep='\t', index=False)

In [29]:
df['articles'] = pd.Series([[] for _ in range(len(df))])

In [30]:
df

Unnamed: 0,uuid,email,name,organization,about,articles
0,11b4c73a-d90d-4ef5-85d1-b37da6a64270,abbamont@illinois.edu,Peter Michael Abbamonte,"Materials Research Lab, Physics",,[]
1,ff337faa-24e5-4cb1-be6b-07285ec7b49a,ahdyson@illinois.edu,Anne Haas Dyson,"Education Policy, Organization and Leadership",Anne Haas Dyson is a former teacher of young c...,[]
2,6f5808b8-fe1c-49a9-90b8-0cf4980d371b,agewirth@illinois.edu,Andrew A Gewirth,"Materials Research Lab, Chemistry",Professor Andrew A. Gewirth received his A.B. ...,[]
3,395244cc-d551-417a-b3b5-85b455a2dcc9,aiwillis@illinois.edu,Arlette I Willis,Curriculum and Instruction,,[]
4,094081a5-552c-48ab-b076-0c9f316002c5,aleggett@illinois.edu,Anthony J Leggett,Physics,,[]
...,...,...,...,...,...,...
2966,3d8b467e-e0f3-48aa-b46f-71ebc40af089,hungn@illinois.edu,Tan Thai Hung Nguyen,Earth Science and Environmental Change,,[]
2967,ede16d00-5c54-479a-8f83-402e7806cf77,mkumar41@illinois.edu,Manish Kumar,Applied Research Institute,,[]
2968,a262ace2-f4a0-437e-baf5-872fbc5b73a2,rwarne@illinois.edu,Robin William Warne,Prairie Research Institute,,[]
2969,1b25d1c1-026a-4f76-be51-5d9e3a9d2da1,raovnv@illinois.edu,Vimal V Rao,"Educational Psychology, Statistics",,[]


In [2]:
df = pd.read_csv('dataframe.tsv', sep='\t')

In [33]:
base_url = 'https://experts.illinois.edu/ws/api/524/persons'

headers = {'Accept': 'application/json'}
api_key = '5fb8492f-6462-49fd-b3dd-69c896549ff8' 


for index, row in df.iterrows():
    if index < 2971:
        continue
        
    person_uuid = row['uuid']
    url = f"{base_url}/{person_uuid}/research-outputs"

    params = {
        'apiKey': api_key,
        'size': 1000,
        'offset': 0,
    }

    response = requests.get(url, headers=headers, params=params)

    if response.status_code == 200:
        data = response.json()
        research_output_uuids = [item['uuid'] for item in data.get('items', [])]
        df.at[index, 'articles'] = research_output_uuids

In [9]:
file_path = 'final.tsv'
df.to_csv(file_path, sep='\t', index=False)

In [34]:
df

Unnamed: 0,uuid,email,name,organization,about,articles
0,11b4c73a-d90d-4ef5-85d1-b37da6a64270,abbamont@illinois.edu,Peter Michael Abbamonte,"Materials Research Lab, Physics",,[]
1,ff337faa-24e5-4cb1-be6b-07285ec7b49a,ahdyson@illinois.edu,Anne Haas Dyson,"Education Policy, Organization and Leadership",Anne Haas Dyson is a former teacher of young c...,[]
2,6f5808b8-fe1c-49a9-90b8-0cf4980d371b,agewirth@illinois.edu,Andrew A Gewirth,"Materials Research Lab, Chemistry",Professor Andrew A. Gewirth received his A.B. ...,[]
3,395244cc-d551-417a-b3b5-85b455a2dcc9,aiwillis@illinois.edu,Arlette I Willis,Curriculum and Instruction,,[]
4,094081a5-552c-48ab-b076-0c9f316002c5,aleggett@illinois.edu,Anthony J Leggett,Physics,,[]
...,...,...,...,...,...,...
2966,3d8b467e-e0f3-48aa-b46f-71ebc40af089,hungn@illinois.edu,Tan Thai Hung Nguyen,Earth Science and Environmental Change,,[]
2967,ede16d00-5c54-479a-8f83-402e7806cf77,mkumar41@illinois.edu,Manish Kumar,Applied Research Institute,,[]
2968,a262ace2-f4a0-437e-baf5-872fbc5b73a2,rwarne@illinois.edu,Robin William Warne,Prairie Research Institute,,[]
2969,1b25d1c1-026a-4f76-be51-5d9e3a9d2da1,raovnv@illinois.edu,Vimal V Rao,"Educational Psychology, Statistics",,[]


In [None]:
def clean_organization_list(org_list_str):
    org_list = ast.literal_eval(org_list_str)
    cleaned_list = list(set(org_list))
    return cleaned_list

people['organization'] = people['organization'].apply(clean_organization_list)


In [10]:
data = pd.read_csv('final.tsv', sep='\t')

In [22]:
data.head(100)

Unnamed: 0,uuid,email,name,organization,about,articles
0,11b4c73a-d90d-4ef5-85d1-b37da6a64270,abbamont@illinois.edu,Peter Michael Abbamonte,"Materials Research Lab, Physics",,[]
1,ff337faa-24e5-4cb1-be6b-07285ec7b49a,ahdyson@illinois.edu,Anne Haas Dyson,"Education Policy, Organization and Leadership",Anne Haas Dyson is a former teacher of young c...,[]
2,6f5808b8-fe1c-49a9-90b8-0cf4980d371b,agewirth@illinois.edu,Andrew A Gewirth,"Materials Research Lab, Chemistry",Professor Andrew A. Gewirth received his A.B. ...,[]
3,395244cc-d551-417a-b3b5-85b455a2dcc9,aiwillis@illinois.edu,Arlette I Willis,Curriculum and Instruction,,[]
4,094081a5-552c-48ab-b076-0c9f316002c5,aleggett@illinois.edu,Anthony J Leggett,Physics,,[]
...,...,...,...,...,...,...
95,c9fee833-b303-4f6b-99fa-5d58d12d5d23,livtd@illinois.edu,Liv Solveig Thorstensson Davila,"European Union Center, Women & Gender in Globa...","My research focusses on languages, literacies ...",[]
96,37327e28-57f7-4bbc-90be-5527ff815132,liora@illinois.edu,Liora Bresler,Curriculum and Instruction,Bresler’s interdisciplinary work focuses on th...,[]
97,683c93b6-abe7-49f1-a1cf-064a35f1230d,luting@illinois.edu,Ting Lu,"Bioengineering, National Center for Supercompu...",,[]
98,0fef1d11-2d43-4fc2-8099-d2da9f0a4040,m-nelson@illinois.edu,Mark E Nelson,Molecular and Integrative Physiology,"Research Topics\nComputational Biology, Neurob...",[]


In [23]:
df

Unnamed: 0,uuid,email,name,organization,about,articles
0,11b4c73a-d90d-4ef5-85d1-b37da6a64270,abbamont@illinois.edu,Peter Michael Abbamonte,"Materials Research Lab, Physics",,[]
1,ff337faa-24e5-4cb1-be6b-07285ec7b49a,ahdyson@illinois.edu,Anne Haas Dyson,"Education Policy, Organization and Leadership",Anne Haas Dyson is a former teacher of young c...,[]
2,6f5808b8-fe1c-49a9-90b8-0cf4980d371b,agewirth@illinois.edu,Andrew A Gewirth,"Materials Research Lab, Chemistry",Professor Andrew A. Gewirth received his A.B. ...,[]
3,395244cc-d551-417a-b3b5-85b455a2dcc9,aiwillis@illinois.edu,Arlette I Willis,Curriculum and Instruction,,[]
4,094081a5-552c-48ab-b076-0c9f316002c5,aleggett@illinois.edu,Anthony J Leggett,Physics,,[]
...,...,...,...,...,...,...
2966,3d8b467e-e0f3-48aa-b46f-71ebc40af089,hungn@illinois.edu,Tan Thai Hung Nguyen,Earth Science and Environmental Change,,"[4e96f7ba-c0a2-4fb3-a674-01de2ea5a337, ec2721f..."
2967,ede16d00-5c54-479a-8f83-402e7806cf77,mkumar41@illinois.edu,Manish Kumar,Applied Research Institute,,[]
2968,a262ace2-f4a0-437e-baf5-872fbc5b73a2,rwarne@illinois.edu,Robin William Warne,Prairie Research Institute,,"[20b8fe9b-cf3a-426b-8624-b4759f3709a7, f5cc349..."
2969,1b25d1c1-026a-4f76-be51-5d9e3a9d2da1,raovnv@illinois.edu,Vimal V Rao,"Educational Psychology, Statistics",,"[9587ce9f-162e-4b01-9c5a-5b99a8aa1618, d36571c..."
