# Download and save PyPI packages

Download the current list of package names from [PyPI](https://pypi.org/), the Python package index.

Normalise the package names as described by the [Python Packaging User Guide](https://packaging.python.org/en/latest/specifications/name-normalization/).

In [1]:
PYPI_PACKAGES_URL = "https://pypi.org/simple/"

In [2]:
# download the html from pypi

import requests

response = requests.get(PYPI_PACKAGES_URL)
response.raise_for_status()  # ensure we stop if something goes wrong

In [3]:
# extract the package names from the html

from bs4 import BeautifulSoup

soup = BeautifulSoup(response.text, "html.parser")
packages = {
    a.text for a in soup.find_all("a") if a.text
}  # each <a> tag is a project name

In [4]:
# normalise the package names

from src.libraries.format import python_normalise

normalised = {python_normalise(name) for name in packages}

In [None]:
# save the downloaded package names

from llm_cgr import save_json
from datetime import datetime

save_json(
    data={
        "datetime": datetime.now().isoformat(),
        "data": sorted(normalised),
    },
    file_path="../data/libraries/pypi.json",
)