In [None]:
# install System's public api client

!pip install --extra-index-url https://pypi.fury.io/systeminc api_client

In [None]:
# Alternatively find all the datasets produced below here:

# https://healthinclimate-ai-hackathon-system-datasets.s3.us-east-1.amazonaws.com/system_climate_topics.csv
# https://healthinclimate-ai-hackathon-system-datasets.s3.us-east-1.amazonaws.com/system_climate_relationships.csv
# https://healthinclimate-ai-hackathon-system-datasets.s3.us-east-1.amazonaws.com/system_climate_findings.csv

In [None]:
import requests
import pandas as pd
import json

import api_client

API_KEY = "t700pcvEia4mCOGYQnNuE3GQAdgAY1P2328Jr4Qn"

# System API

https://api-docs.system.com/

In [None]:
system_client = api_client.ApiClient(header_name="x-api-key", header_value=API_KEY)

topics_api = api_client.api.topics_api.TopicsApi(system_client)
relationships_api = api_client.api.relationships_api.RelationshipsApi(system_client)

# 24 Climate-related Topics
topic_list = "ak7lpHYuXE,P6IzhScRdx,alMLXtyJaf,icvzjWGzKD,Nw53jOQJ-3,DC_F0GO0WQ,36gde7OoEQ,jtzZPZ8DQh,dqIh31-nYb,oG8Da1gCE9,wpUeTjUbAc,4FaXcuNJRt,hLVvosw9nG,IBrypEE5vL,LQWjUNDY2O,qIJ-WYI1TN,rRpdh5vkse,VbhaEHYy0f,KORxmct3X4,imJDq4beGb,UBLYDhO7ul,Ya_mN0YSXB,SaxqJzugQe,MtUI-7H2Qe"

# Collect System Topics

In [None]:
# Collect Topics

topics_in_json = []

for i in topic_list.split(","):
    topic_return = topics_api.get_topic_by_id(topic_id=i)
    topics_in_json.append(json.loads(topic_return.to_json()))
    
topic_df = pd.DataFrame.from_dict(topics_in_json, orient='columns')[['id', 'name', 'wikidata_id', 'category']]

# Collect System Relationships

In [None]:
# Collect Relationships between Climate-related Topics and any other System Topic

relationships = []

for i in topics_in_json:
    
    total_response = topics_api.get_topic_relationships(topic_id=i.get("id"), include_total=True)
    
    if total_response.total == 0:
        continue
    
    rel_response = topics_api.get_topic_relationships(
        topic_id=i.get("id"), 
        fields="id,num_findings,num_studies,num_significant_findings,highest_cited,median_effect_size", 
        limit=total_response.total
    )
    
    for j in rel_response.data:
        relationships.append(json.loads(j.to_json()))

relationship_collection = []

for i in relationships:
    ids = i.get("id").split("|")
    i['source_topic'] = ids[0]
    i['target_topic'] = ids[1]
    
    new_i = {
        "relationship_id": i.get("id"),
        "num_findings": i.get("num_findings"),
        "num_studies": i.get("num_studies"),
        "num_significant_findings": i.get("num_significant_findings"),
        "highest_cited": i.get("highest_cited"),
        "median_effect_size": i.get("median_effect_size"),
        "source_topic": i.get("source_topic"),
        "target_topic": i.get("target_topic"),
    }
    
    relationship_collection.append(new_i)

relationship_df = pd.DataFrame(relationship_collection)

# Collect System Findings (for one Relationship)

In [None]:
# Get a set of findings related to a single System Relationship

# Relationship IDs are formatted as: "<system topic id>|<system topic id>"

relationship_id = "4jVFGkHWOi|MtUI-7H2Qe"

rel_total_response = relationships_api.get_relationship_statistical_findings(relationship_id=relationship_id, include_total=True)
rel_total = rel_total_response.total
rel_response = relationships_api.get_relationship_statistical_findings(
    relationship_id=relationship_id, 
    limit = rel_total
)

this_relationship = []

for j in rel_response.data:
    
    if j.study.publish_date:
        month = j.study.publish_date.month
        year = j.study.publish_date.year
        day = j.study.publish_date.day
    else:
        month = "XX"
        year = "XXXX"
        day = "XX"
    
    this_finding = {
        "id": j.id,
        "summary": j.summary,
        "topic_1_id": j.topic_1.id,
        "topic_1_name": j.topic_1.name,
        "variable_1_id": j.variable_1.id,
        "variable_1_name": j.variable_1.name,
        "topic_2_id": j.topic_2.id,
        "topic_2_name": j.topic_2.name,
        "variable_2_id": j.variable_2.id,
        "variable_2_name": j.variable_2.name,
        "statistic_type": j.statistic_type.name,
        "statistic_value":  j.statistic_value,
        "effect_size": j.effect_size,
        "ci_upper": j.ci_upper,
        "ci_lower": j.ci_lower,
        "p_value": j.p_value,
        "study_id": j.study.id,
        "study_doi": j.study.doi,
        "study_journal": j.study.journal,
        "study_published_data": f"{month}/{day}/{year}",
        "study_population": j.study.population,
        "study_sample_size": j.study.sample_size,
        "study_cited_by": j.study.cited_by,
    }

    this_relationship.append(this_finding)

finding_df = pd.DataFrame(this_relationship)