In [0]:
import json
import os
from typing import Callable

import pandas as pd
import requests

In [0]:
current_working_directory = os.getcwd()
print("Location of the notebook "+current_working_directory)
os.chdir('../')
project_root=os.getcwd()
print("Project root "+project_root)

DATA_DIR: str = project_root + "/data"

In [0]:
def download_openai_caribbean_dataset():
    print("Downloading OpenAI Caribbean Challenge Dataset")
    base_url: str = "https://raw.githubusercontent.com/drivendataorg/open-ai-caribbean/main/1st%20Place/data"
    train_url: str = base_url + "/train.geojson"
    test_url: str = base_url + "/test.geojson"

    train_response: dict = requests.get(train_url).json()
    test_response: dict = requests.get(test_url).json()

    BASE_DIR: str = f"{DATA_DIR}/openai_caribbean/submission_github_data"
    os.makedirs(BASE_DIR, exist_ok=True)

    with open(BASE_DIR + "/train.json", "w") as f:
        json.dump(train_response, f, indent=4)

    with open(BASE_DIR + "/test.json", "w") as f:
        json.dump(test_response, f, indent=4)

    print(f"OpenAI Caribbean Challenge Dataset saved to {BASE_DIR}")


In [0]:
def download_colobia_meta_demographics():
    print("Downloading Colombia High Density Population Resolution Map")
    data_url: str = "https://data.humdata.org/dataset/2f865527-b7bf-466c-b620-c12b8d07a053/resource/357c91e0-c5fb-4ae2-ad9d-00805e5a075d/download/col_general_2020_csv.zip"
    df = pd.read_csv(data_url)
    BASE_DIR: str = f"{DATA_DIR}/meta_demographics/colombia"
    os.makedirs(BASE_DIR, exist_ok=True)
    df.to_csv(BASE_DIR + "/general_2020.csv", index=False)
    print(f"Colombia High Density Population Dataset saved to {BASE_DIR}")

In [0]:
def main():
    data_callables: list[Callable] = [
        download_openai_caribbean_dataset,
        download_colobia_meta_demographics,
    ]
    print(f"Found {len(data_callables)} datasets to download")
    for c in data_callables:
        c()


if __name__ == "__main__":
    main()


Found 2 datasets to download
Downloading OpenAI Caribbean Challenge Dataset
OpenAI Caribbean Challenge Dataset saved to /Users/yaminigotimukul/DataScience/Berekley/Semesters/Spring_2024/repo/alivio//data/openai_caribbean/submission_github_data
Downloading Colombia High Density Population Resolution Map
Colombia High Density Population Dataset saved to /Users/yaminigotimukul/DataScience/Berekley/Semesters/Spring_2024/repo/alivio//data/meta_demographics/colombia
