This script will pull all metadata and file links from a single project in OSF

In [10]:
import requests
import pandas as pd
import os


In [11]:
# Base URL for OSF API
BASE_URL = "https://api.osf.io/v2"

def get_osf_project_metadata(project_id):
    """
    Fetch metadata for a given OSF project ID.
    """
    url = f"{BASE_URL}/nodes/{project_id}/"
    response = requests.get(url)
    response.raise_for_status()
    return response.json()

def list_osf_files(project_id):
    """
    List files available under the OSF project.
    """
    url = f"{BASE_URL}/nodes/{project_id}/files/"
    response = requests.get(url)
    response.raise_for_status()
    return response.json()

def get_file_links(storage_url):
    """
    Get individual file download links from a storage location (like osfstorage).
    """
    response = requests.get(storage_url)
    response.raise_for_status()
    return response.json()


In [12]:
# Example public project (replace with your own project ID)
project_id = "86ume"  # replace with actual OSF project ID

# Get metadata
metadata = get_osf_project_metadata(project_id)
print(metadata['data']['attributes'])


{'title': 'Heat Watch Charlotte NC', 'description': 'Results from the 2024 urban heat mapping campaign.', 'category': 'project', 'custom_citation': None, 'date_created': '2024-07-18T16:45:40.927166', 'date_modified': '2025-02-06T18:11:29.159012', 'registration': False, 'preprint': False, 'fork': False, 'collection': False, 'tags': [], 'access_requests_enabled': True, 'node_license': None, 'analytics_key': 'e72755bc2bde3ea7c065347dacbcec168c38b4430e7e253b6646d67163c70fc578f9f4df86c70a1c61ebc204eebee771379ac9e9e9b0b91dc9769a0bd85e2dc79b0bae10572609589664f20fb37f263d97089247674fae7a4dadc4fc239d5086262ac9295484148512562f74cad51bd783bccabff25ec9d3cf0899f0fcedd6a4b1fb960ce7d45bef8a0db4af1af5b7ff', 'current_user_can_comment': False, 'current_user_permissions': ['read'], 'current_user_is_contributor': False, 'current_user_is_contributor_or_group_member': False, 'wiki_enabled': True, 'public': True, 'subjects': []}


In [13]:
def get_osfstorage_file_list_url(files_info):
    """
    Extract the file listing URL from 'relationships' -> 'files' -> 'links' -> 'related'
    """
    try:
        return files_info['data'][0]['relationships']['files']['links']['related']['href']
    except (KeyError, IndexError):
        return None


In [14]:
# Get the correct file listing URL
storage_url = get_osfstorage_file_list_url(files_info)

if storage_url:
    files = get_file_links(storage_url)

    # Inspect file metadata
    for f in files['data']:
        print(f["attributes"]["name"], "-", f["links"]["download"])
else:
    print("Could not find osfstorage file list URL.")


ReadMeFileNotation.txt - https://osf.io/download/vrh5z/
Español_Summary Report_Heat Watch Charlotte NC_CAPA 020625.pdf - https://osf.io/download/zctms/
traverses_chw_charlotte_north_carolina_082324.zip - https://osf.io/download/7dar9/
Summary Report_Heat Watch Charlotte NC_CAPA 112224.pdf - https://osf.io/download/9ayhs/
rasters_chw_charlotte_north_carolina_080624.zip - https://osf.io/download/y5m7k/
