In [None]:
import requests
import json
import pandas as pd
import time

url = "https://igsyv1z1xi-dsn.algolia.net/1/indexes/*/queries"
headers = {
    "x-algolia-agent": "Algolia for JavaScript (5.0.0); Browser",
    "x-algolia-api-key": "6658746ce52e30dacfdd8ba5f8e8cf18",
    "x-algolia-application-id": "IGSYV1Z1XI",
    "content-type": "application/json"
}

payload = {
    "requests": [
        {
            "indexName": "product",
            "clickAnalytics": True,
            "facets": [
                "availability", "language", "learning_type", "level",
                "partner", "product", "program_type", "skills.skill", "subject"
            ],
            "hitsPerPage": 150,
            "page": 0,
            "query": ""
        }
    ]
}

all_courses = []
page = 0

total_pages = None

while True:
    print(f"[INFO] Fetching page {page}...")
    payload["requests"][0]["page"] = page
    response = requests.post(url, headers=headers, json=payload)
    data = response.json()

    hits = data["results"][0]["hits"]
    if total_pages is None:
        total_pages = data["results"][0].get("nbPages", 1)
        print(f"[INFO] Total pages available: {total_pages}")

    if not hits:
        break

    for course in hits:
        record = {
            "title": course.get("title", "Missing"),
            "partner": course.get("partner", ["Missing"]),
            "primary_description": course.get("primary_description", "Missing"),
            "secondary_description": course.get("secondary_description", "Missing"),
            "tertiary_description": course.get("tertiary_description", "Missing"),
            "availability": course.get("availability", ["Missing"]),
            "subject": course.get("subject", ["Missing"]),
            "level": course.get("level", ["Missing"]),
            "language": course.get("language", ["Missing"]),
            "product": course.get("product", "Missing"),
            "program_type": course.get("program_type", ["Missing"]),
            "staff": course.get("staff", ["Missing"]),
            "translation_language": course.get("ai_languages", {}).get("translation_languages", ["Missing"]),
            "transcription_language": course.get("ai_languages", {}).get("transcription_languages", ["Missing"]),
            "recent_enrollment_count": course.get("recent_enrollment_count", "Missing"),
            "marketing_url": course.get("marketing_url", "Missing"),
            "weeks_to_complete": course.get("weeks_to_complete", "Missing"),
            "skill": (
                [s["skill"] for s in course.get("skills", []) if isinstance(s, dict)]
                if isinstance(course.get("skills", []), list) else ["Missing"]
            )
        }
        all_courses.append(record)

    page += 1
    if page >= total_pages:
        break

    print(f"[INFO] Sleeping 10 seconds to respect crawl delay...")
    time.sleep(10)


df = pd.DataFrame(all_courses)
df.to_csv("edx_courses.csv", index=False)
print("[DONE] Saved to edx_courses.csv")

[INFO] Fetching page 0...
[INFO] Total pages available: 7
[INFO] Sleeping 10 seconds to respect crawl delay...
[INFO] Fetching page 1...
[INFO] Sleeping 10 seconds to respect crawl delay...
[INFO] Fetching page 2...
[INFO] Sleeping 10 seconds to respect crawl delay...
[INFO] Fetching page 3...
[INFO] Sleeping 10 seconds to respect crawl delay...
[INFO] Fetching page 4...
[INFO] Sleeping 10 seconds to respect crawl delay...
[INFO] Fetching page 5...
[INFO] Sleeping 10 seconds to respect crawl delay...
[INFO] Fetching page 6...
[DONE] Saved to edx_courses.csv


In [21]:
df = pd.read_csv("edx_courses.csv")
df

Unnamed: 0,title,partner,primary_description,secondary_description,tertiary_description,availability,subject,level,language,product,program_type,staff,translation_language,transcription_language,recent_enrollment_count,marketing_url,weeks_to_complete,skill
0,How to Learn Online,['edX'],<p>This course will prepare you with strategie...,"<ul>\n<li>History, benefits, and foundational ...",<p>This course harnesses science-backed techni...,['Available now'],['Education & Teacher Training'],['Introductory'],['English'],Course,[],[],"['Arabic', 'English', 'Spanish (Latin America)...","['Portuguese - Brazil', 'Indonesian', 'Arabic'...",47714,https://www.edx.org/learn/how-to-learn/edx-how...,2.0,['Learning Design']
1,Corporate Finance,['Columbia University'],Learn both the sound theoretical principles of...,<p>You will learn how to value a firm over the...,Understand both the sound theoretical principl...,['Available now'],"['Business & Management', 'Economics & Finance']",['Introductory'],['English'],Program,['Professional Certificate'],['daniel-wolfenzon'],[],[],11648,https://www.edx.org/certificates/professional-...,,"['Cost Of Capital', 'Finance', 'Investment Ban..."
2,The Science of Happiness,"['University of California, Berkeley']",<p>The first MOOC to teach positive psychology...,<ul>\n<li>What happiness really means and why ...,"<p><em>""A free eight-week Science of Happiness...","['Upcoming', 'Available now']",['Social Sciences'],['Introductory'],['English'],Course,[],"['dacher-keltner', 'emiliana-simon-thomas']","['Arabic', 'English', 'Spanish (Latin America)...","['Russian', 'Indonesian', 'Spanish', 'Arabic',...",16821,https://www.edx.org/learn/happiness/university...,11.0,"['Empathy', 'Evolutionary Biology', 'Psychology']"
3,Supply Chain Management,['Massachusetts Institute of Technology'],Earn a MicroMasters credential from MIT’s #1 r...,"<p><img style=""max-height:175px"" align=""right""...","To apply core methodologies (probability, stat...","['Archived', 'Upcoming', 'Available now']","['Business & Management', 'Engineering']","['Advanced', 'Introductory', 'Intermediate']",['English'],Program,['MicroMasters'],"['yossi-sheffi', 'james-blayney-rice-jr', 'jar...",[],[],30033,https://www.edx.org/masters/micromasters/mitx-...,,"['Supply Chain Management', 'Innovation', 'Sup..."
4,Remote Work Revolution for Everyone,['Harvard University'],"<p>In Remote Work Revolution for Everyone, you...",<ul>\n<li>Understand the key elements of remot...,<p>How are you thriving or surviving in your r...,['Available now'],['Business & Management'],['Introductory'],['English'],Course,['Professional Certificate'],['tsedal-neeley'],"['Arabic', 'English', 'Spanish (Latin America)...","['Spanish', 'Arabic', 'Thai', 'Korean', 'Telug...",45954,https://www.edx.org/learn/remote-work/harvard-...,3.0,"['Telecommuting', 'Customer Relationship Build..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,"Eaux, villes et changements climatiques",['Université de Montréal'],<p>Vous vous intéressez aux enjeux de l’eau en...,<p>Dans ce cours vous apprendrez à :</p>\n<p>•...,<p>Vous avez à cœur la protection de l’eau? Vo...,['Available now'],"['Engineering', 'Environmental Studies']",['Introductory'],['French'],Course,[],"['louise-millette-2', 'veronique-gisondi']",[],[],372,https://www.edx.org/learn/engineering/universi...,6.0,[]
996,Fixed Income Portfolio Management,['New York Institute of Finance'],"<p><span lang=""EN-CA"">Learn how to construct a...",<ul>\n<li>Review of Fixed Income Concepts</li>...,<p>Fixed Income Portfolio Management is a comp...,['Available now'],['Economics & Finance'],['Advanced'],['English'],Course,['Professional Certificate'],"['chris-thomas', 'jack-farmer']","['Arabic', 'English', 'Spanish (Latin America)...",[],371,https://www.edx.org/learn/portfolio-management...,4.0,"['Portfolio Management Professional', 'Portfol..."
997,Artes na Educação,['Fundação para a Ciência e a Tecnologia'],<p>A Educação encontra-se num profundo process...,<ul>\n<li>Identificar os principais contributo...,<p>Sabemos que as gerações mais novas estão su...,['Available now'],"['Education & Teacher Training', 'Art & Culture']",['Introductory'],['Portuguese'],Course,[],"['teresa-eca', 'cesar-israel-paulo']","['Arabic', 'English', 'Spanish (Latin America)...",[],371,https://www.edx.org/learn/education-teacher-tr...,6.0,[]
998,Strategic Communication for Sustainability Lea...,['University of Maryland Center for Environmen...,<p>Communicating science effectively is a crit...,<ul>\r\n<li>How to identify and critique featu...,<p>Effective science communicators are in shor...,['Available now'],"['Environmental Studies', 'Social Sciences', '...",['Introductory'],['English'],Course,['Professional Certificate'],"['richard-arnold', 'william-bill-dennison']","['Arabic', 'English', 'Spanish (Latin America)...","['Arabic', 'Portuguese - Brazil', 'Indonesian']",371,https://www.edx.org/learn/sustainability/unive...,5.0,"['Target Audience', 'Science Communication', '..."
