In [70]:
import requests

# Search all objects with medium = "Textiles"
search_url = "https://collectionapi.metmuseum.org/public/collection/v1/search"
params = {
    "medium": "Textiles",
    "q": "*"
}
response = requests.get(search_url, params=params)
data = response.json()

# List of object IDs returned
print(data["total"], "objects found")
print(data["objectIDs"][:10])  # Show first 10 object IDs


33437 objects found
[229636, 229605, 229642, 229639, 229667, 229692, 229693, 229694, 229695, 229504]


In [72]:
import requests
import json
import time
from datetime import datetime

# Get the search results first
search_url = "https://collectionapi.metmuseum.org/public/collection/v1/search"
params = {
    "medium": "Textiles",
    "q": "*"
}
response = requests.get(search_url, params=params)
search_data = response.json()

object_ids = search_data["objectIDs"]
print(f"Found {len(object_ids)} textile objects")

# Function to get object details with retry logic
def get_object_details(object_id, max_retries=3):
    detail_url = f"https://collectionapi.metmuseum.org/public/collection/v1/objects/{object_id}"
    
    for attempt in range(max_retries):
        try:
            response = requests.get(detail_url)
            if response.status_code == 200:
                return response.json()
            elif response.status_code == 403:
                print(f"Rate limited on object {object_id}, waiting 5 seconds...")
                time.sleep(5)  # Wait longer on rate limit
                continue
            else:
                print(f"Failed to get object {object_id}: {response.status_code}")
                return None
        except Exception as e:
            print(f"Error getting object {object_id}: {e}")
            time.sleep(2)
    
    return None

# Download all object details with much slower rate
all_objects = []
batch_size = 50  # Save more frequently
failed_ids = []

print("Starting download with rate limiting...")
start_time = time.time()

for i, object_id in enumerate(object_ids):
    if i % 10 == 0:  # Print progress every 10 items
        elapsed = time.time() - start_time
        rate = i / elapsed if elapsed > 0 else 0
        print(f"Progress: {i+1}/{len(object_ids)} ({(i+1)/len(object_ids)*100:.1f}%) - Rate: {rate:.1f} req/sec")
    
    object_data = get_object_details(object_id)
    if object_data:
        all_objects.append(object_data)
    else:
        failed_ids.append(object_id)
    
    # Save progress every batch_size objects
    if (i + 1) % batch_size == 0:
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = f"met_textiles_batch_{i+1}_{timestamp}.json"
        with open(filename, 'w', encoding='utf-8') as f:
            json.dump(all_objects, f, indent=2, ensure_ascii=False)
        print(f"Saved {len(all_objects)} objects to {filename}")
    
    # Much slower rate - about 10 requests per second
    time.sleep(0.15)  # 150ms delay = ~6.7 requests/second

# Save final results
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
final_filename = f"met_textiles_complete_{timestamp}.json"

with open(final_filename, 'w', encoding='utf-8') as f:
    json.dump(all_objects, f, indent=2, ensure_ascii=False)

total_time = time.time() - start_time
print(f"\nDownload complete!")
print(f"Total time: {total_time/3600:.1f} hours")
print(f"Successfully downloaded: {len(all_objects)} objects")
print(f"Failed downloads: {len(failed_ids)} objects")
print(f"Success rate: {len(all_objects)/(len(all_objects)+len(failed_ids))*100:.1f}%")
print(f"Final file saved as: {final_filename}")

# Save failed IDs and summary
if failed_ids:
    with open(f"failed_ids_{timestamp}.json", 'w') as f:
        json.dump(failed_ids, f)

summary = {
    "total_found": len(object_ids),
    "successfully_downloaded": len(all_objects),
    "failed_downloads": len(failed_ids),
    "download_time_hours": total_time/3600,
    "download_timestamp": timestamp,
    "search_parameters": params
}

with open(f"download_summary_{timestamp}.json", 'w') as f:
    json.dump(summary, f, indent=2)

Found 33437 textile objects
Starting download with rate limiting...
Progress: 1/33437 (0.0%) - Rate: 0.0 req/sec
Progress: 11/33437 (0.0%) - Rate: 2.5 req/sec
Progress: 21/33437 (0.1%) - Rate: 2.5 req/sec
Rate limited on object 229602, waiting 5 seconds...
Rate limited on object 229602, waiting 5 seconds...
Rate limited on object 229602, waiting 5 seconds...
Progress: 31/33437 (0.1%) - Rate: 1.1 req/sec
Rate limited on object 229560, waiting 5 seconds...
Rate limited on object 229560, waiting 5 seconds...
Rate limited on object 229560, waiting 5 seconds...
Rate limited on object 229561, waiting 5 seconds...
Rate limited on object 229561, waiting 5 seconds...
Rate limited on object 229561, waiting 5 seconds...
Rate limited on object 229562, waiting 5 seconds...
Progress: 41/33437 (0.1%) - Rate: 0.6 req/sec
Saved 47 objects to met_textiles_batch_50_20250704_200045.json
Progress: 51/33437 (0.2%) - Rate: 0.7 req/sec
Rate limited on object 229497, waiting 5 seconds...
Rate limited on object

KeyboardInterrupt: 