In [1]:
!pip install wattslab-atlas
!pip install --upgrade wattslab-atlas

Collecting wattslab-atlas
  Downloading wattslab_atlas-1.2.0-py3-none-any.whl.metadata (4.6 kB)
Collecting types-requests>=2.31.0 (from wattslab-atlas)
  Downloading types_requests-2.32.4.20250913-py3-none-any.whl.metadata (2.0 kB)
Downloading wattslab_atlas-1.2.0-py3-none-any.whl (13 kB)
Downloading types_requests-2.32.4.20250913-py3-none-any.whl (20 kB)
Installing collected packages: types-requests, wattslab-atlas
Successfully installed types-requests-2.32.4.20250913 wattslab-atlas-1.2.0


In [2]:
"""
# Atlas SDK - Jupyter Demo

This notebook shows how to use the Atlas SDK in Jupyter.
"""

from wattslab_atlas import AtlasClient
from wattslab_atlas.models import FeatureCreate

In [3]:
# Initialize client
client = AtlasClient()


# Login - will reuse saved credentials if available
client.login("caroltu@seas.upenn.edu")

Atlas SDK version: 1.2.0


{'message': 'SDK token sent. Check your email.'}

In [4]:
# If you received a magic link, validate it here
# comment out if not needed
client.validate_magic_link(input("Enter the token you received in your email: ").strip())

Enter the token you received in your email: ZQ9VCPAJnCFG5ni97O89KnGYZl_pSbEy4xjobDe9wNFpF-gSGtvNSbo4g6mF8LMU6HQqA96nIVl_UcLQE0C4bQ


{'message': 'Magic link validated.',
 'email': 'caroltu@seas.upenn.edu',
 'credits': 5000}

In [None]:
# List features
features = client.list_features()
print(f"Found {len(features)} features\n")

# Display in a nice format
for f in features[:5]:
    print(f"{f.feature_name}")
    print(f"   {f.feature_description}")
    print(f"   Type: {f.feature_type} | ID: {f.id[:8]}...")
    print()

Found 105 features

📋 paper
   features of a paper.
   Type: array | ID: 67531047...

📋 title
   The title of the paper.
   Type: string | ID: 67531047...

📋 experiments
   The experiments in a paper.
   Type: array | ID: 67531048...

📋 name
   The name of the experiments in the study.
   Type: string | ID: 67531048...

📋 description
   Description of the found experiment.
   Type: string | ID: 67531048...



In [None]:
# List your papers
papers = client.list_papers(page=1, page_size=5)
print(f"Total papers: {papers.total_papers}\n")

for p in papers.papers:
    print(f"{p.title or p.file_name} (ID: {p.id[:8]}...)")

Total papers: 6

📄 nomad.pdf (ID: 68cd9fd7...)
📄 96227087-47db-43be-862e-99e0a0f1c6fa.pdf (ID: 68d2d598...)
📄 Analyzing_large_scale_human_mobility_dat.pdf (ID: 68d2d5db...)
📄 3106774.pdf (ID: 68d2e238...)
📄 Conduct an Ethnographic Interview.pdf (ID: 68daf28a...)


In [16]:
projects = client.list_projects()
for project in projects:
    print(f"{project.title}: {len(project.papers)} papers")

# Get specific project results
project_id = "68a8661d599246127d859169"
results = client.get_project_results(project_id)

# Process results
for result in results["results"]:
    paper_id = result.get("_paper_id")
    version = result.get("_version", 1)
    # Access extracted features from the result
    for key, value in result.items():
        if not key.startswith("_"):  # Skip metadata fields
            print(f"{key}: {value}")

testproject: 4 papers
paper: [{'title': 'Temporal understanding of human mobility: A multi-time scale analysis', 'data_sample': [{'dataset_type': 'mobility', 'provider_name': 'carrier', 'filters': [{'filter_type': 'geographical', 'filter_expression': '("city","==","Xi\'an")', 'filter_key': 'city'}, {'filter_type': 'temporal', 'filter_expression': '("duration","==","1 week")', 'filter_key': 'duration'}], 'completeness_statistic': [{'completeness_statistic_name': 'sampling rate', 'completeness_statistic_value': 'mean,188s'}]}, {'dataset_type': 'mobility', 'provider_name': 'carrier', 'filters': [{'filter_type': 'geographical', 'filter_expression': '("city","==","Shenyang")', 'filter_key': 'city'}, {'filter_type': 'temporal', 'filter_expression': '("duration","==","5 weeks")', 'filter_key': 'duration'}], 'completeness_statistic': []}, {'dataset_type': 'mobility', 'provider_name': 'carrier', 'filters': [{'filter_type': 'geographical', 'filter_expression': '("city","==","Urumqi")', 'filter_k

In [22]:
def flatten_object(obj, paths_to_collapse=None, current_path=""):
    if paths_to_collapse is None:
        paths_to_collapse = []

    def join_path(p, k):
        return f"{p}.{k}" if p else k

    def prefix_keys(row, prefix):
        """Prefix every key of `row` with `prefix`"""
        return {f"{prefix} {k}": v for k, v in row.items()}

    if isinstance(obj, list):
        result = []
        for item in obj:
            result.extend(flatten_object(item, paths_to_collapse, current_path))
        return result

    rows = [{}]  # start with one empty row

    for key, value in obj.items():
        full_path = join_path(current_path, key)

        if isinstance(value, list):
            if full_path in paths_to_collapse:
                # collapse ⇒ one cell with the count
                for r in rows:
                    r[key] = f"{len(value)} {key}"
            else:
                # expand ⇒ one row per element
                if len(value) == 0:
                    child_rows = [{}]
                else:
                    child_rows = []
                    for elem in value:
                        child_rows.extend(flatten_object(elem, paths_to_collapse, full_path))

                new_rows = []
                for r in rows:
                    for cr in child_rows:
                        new_rows.append({**r, **prefix_keys(cr, key)})
                rows = new_rows
            continue

        if isinstance(value, dict):
            child_rows = flatten_object(value, paths_to_collapse, full_path)
            new_rows = []
            for r in rows:
                for cr in child_rows:
                    new_rows.append({**r, **prefix_keys(cr, key)})
            rows = new_rows
            continue

        for r in rows:
            r[key] = value

    return rows


def nest_flat_keys(flat_obj):
    nested = {}

    for key, val in flat_obj.items():
        segments = key.split(" ")
        curr = nested

        for i, segment in enumerate(segments):
            # Remove "_truth" suffix if present
            if segment.endswith("_truth"):
                segment = segment[:-6]

            if i == len(segments) - 1:
                curr[segment] = val
            else:
                if segment not in curr or curr[segment] is None:
                    curr[segment] = {}
                curr = curr[segment]

    return nested

In [23]:
colab_project = client.get_project_by_id("68a8661d599246127d859169")

r = colab_project.get_results()['results']
flatten_object(r)

[{'paper title': 'Temporal understanding of human mobility: A multi-time scale analysis',
  'paper data_sample dataset_type': 'mobility',
  'paper data_sample provider_name': 'carrier',
  'paper data_sample filters filter_type': 'geographical',
  'paper data_sample filters filter_expression': '("city","==","Xi\'an")',
  'paper data_sample filters filter_key': 'city',
  'paper data_sample completeness_statistic completeness_statistic_name': 'sampling rate',
  'paper data_sample completeness_statistic completeness_statistic_value': 'mean,188s',
  'paper experiments domain': 'data validation',
  'paper experiments code_location': '',
  'paper experiments description': 'Comparison of MFR and CDR data sets to validate MFR as a finer-grained proxy for human mobility by analyzing distributions of daily records, interevent times, and interevent distances.',
  'paper experiments mobility_metric temporal_aggregation': 'day',
  'paper experiments mobility_metric spatial_aggregation': 'cell_tower'

In [None]:
# Create a new feature
new_feature = FeatureCreate(
    feature_name="Study Duration",
    feature_description="Duration of the study in months",
    feature_identifier="study_duration",
    feature_type="integer",
)

created = client.create_feature(new_feature)
print(f"Created: {created.feature_name}")
print(f"   ID: {created.id}")

APIError: API error: {"error":"2 validation errors for FeatureCreate\nfeature_type\n  Input should be 'text', 'number', 'boolean', 'enum' or 'parent' [type=literal_error, input_value='integer', input_type=str]\n    For further information visit https://errors.pydantic.dev/2.11/v/literal_error\nfeature_prompt\n  Field required [type=missing, input_value={'feature_name': 'Study D... [], 'is_shared': False}, input_type=dict]\n    For further information visit https://errors.pydantic.dev/2.11/v/missing"}