In [None]:
from lxml import html
import re
import requests

def get_packages(language='python'):
    """ populate available packages from the pip registry """
    registry = "https://cran.r-project.org/web/packages/available_packages_by_name.html#available-packages-Z"
    response = requests.get(registry) 
    response.raise_for_status()
    html_tree = html.fromstring(response.content)
    # pylint: disable=unnecessary-comprehension
    packages = [generate_packages(package, app.config['ARTIFACTORY'], language)
                for package in html_tree.xpath('//a/text()')
                if generate_packages(package, app.config['ARTIFACTORY'], language) is not None]

    # Remove redundant packages & versions pair
    if language == 'conda':
        packages = [i for n, i in enumerate(packages) if i not in packages[n + 1:]]
    return packages


# pylint: disable=inconsistent-return-statements
def generate_packages(package, artifactory, language):
    """ generate dict object for all the packages """
    if artifactory and language == 'r':
        if package.endswith('.tar.gz'):
            package_name = package.split("_")[0]
            package_version = re.search('_(.*).tar', package).group(1)
            return {"name": package_name, "version": package_version}
    elif language in ["conda", "conda-r"]:
        if package.endswith('tar.bz2'):
            package_name_list = package.split("-")
            build_name = package_name_list[-1]
            package_version = package_name_list[-2]
            package_name = "-".join(package_name_list[:-2])
            py_version = re.search("(py[0-9]{2})+", build_name)
            py_version = py_version.group() if py_version else "NA"
            if language == "conda":
                return {"name": package_name, "version": package_version, "python_version": py_version}
            return {"name": package_name}
    else:
        return {"name": package}
    
def create_index(refresh=False, language='python'):
    """ Create ES index """
    packages = get_packages(language)
    package_list = []

    # pylint: disable=unexpected-keyword-arg
    for package in packages:
        if "href=" not in package:
            save_package_data(package, language)
            package_list.append(package['name'])

    if len(package_list) > 0:
        delete_missing_packages(package_list, language)

create_index('r')

> [0;32m<ipython-input-2-cd6440a5e09d>[0m(9)[0;36mget_packages[0;34m()[0m
[0;32m      7 [0;31m    [0;32mimport[0m [0mpdb[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m      8 [0;31m    [0mpdb[0m[0;34m.[0m[0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m----> 9 [0;31m    [0;32mif[0m [0mlanguage[0m [0;34m==[0m [0;34m'r'[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     10 [0;31m        [0mregistry[0m [0;34m=[0m [0;34m"https://cran.r-project.org/web/packages/available_packages_by_name.html#available-packages-Z"[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     11 [0;31m    [0mresponse[0m [0;34m=[0m [0mrequests[0m[0;34m.[0m[0mget[0m[0;34m([0m[0mregistry[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  n


> [0;32m<ipython-input-2-cd6440a5e09d>[0m(11)[0;36mget_packages[0;34m()[0m
[0;32m      9 [0;31m    [0;32mif[0m [0mlanguage[0m [0;34m==[0m [0;34m'r'[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     10 [0;31m        [0mregistry[0m [0;34m=[0m [0;34m"https://cran.r-project.org/web/packages/available_packages_by_name.html#available-packages-Z"[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m---> 11 [0;31m    [0mresponse[0m [0;34m=[0m [0mrequests[0m[0;34m.[0m[0mget[0m[0;34m([0m[0mregistry[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     12 [0;31m    [0mresponse[0m[0;34m.[0m[0mraise_for_status[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     13 [0;31m    [0mhtml_tree[0m [0;34m=[0m [0mhtml[0m[0;34m.[0m[0mfromstring[0m[0;34m([0m[0mresponse[0m[0;34m.[0m[0mcontent[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  


UnboundLocalError: local variable 'registry' referenced before assignment
> [0;32m<ipython-input-2-cd6440a5e09d>[0m(11)[0;36mget_packages[0;34m()[0m
[0;32m      9 [0;31m    [0;32mif[0m [0mlanguage[0m [0;34m==[0m [0;34m'r'[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     10 [0;31m        [0mregistry[0m [0;34m=[0m [0;34m"https://cran.r-project.org/web/packages/available_packages_by_name.html#available-packages-Z"[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m---> 11 [0;31m    [0mresponse[0m [0;34m=[0m [0mrequests[0m[0;34m.[0m[0mget[0m[0;34m([0m[0mregistry[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     12 [0;31m    [0mresponse[0m[0;34m.[0m[0mraise_for_status[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     13 [0;31m    [0mhtml_tree[0m [0;34m=[0m [0mhtml[0m[0;34m.[0m[0mfromstring[0m[0;34m([0m[0mresponse[0m[0;34m.[0m[0mcontent[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  


--Return--
None
> [0;32m<ipython-input-2-cd6440a5e09d>[0m(11)[0;36mget_packages[0;34m()[0m
[0;32m      9 [0;31m    [0;32mif[0m [0mlanguage[0m [0;34m==[0m [0;34m'r'[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     10 [0;31m        [0mregistry[0m [0;34m=[0m [0;34m"https://cran.r-project.org/web/packages/available_packages_by_name.html#available-packages-Z"[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m---> 11 [0;31m    [0mresponse[0m [0;34m=[0m [0mrequests[0m[0;34m.[0m[0mget[0m[0;34m([0m[0mregistry[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     12 [0;31m    [0mresponse[0m[0;34m.[0m[0mraise_for_status[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     13 [0;31m    [0mhtml_tree[0m [0;34m=[0m [0mhtml[0m[0;34m.[0m[0mfromstring[0m[0;34m([0m[0mresponse[0m[0;34m.[0m[0mcontent[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  


UnboundLocalError: local variable 'registry' referenced before assignment
> [0;32m<ipython-input-2-cd6440a5e09d>[0m(49)[0;36mcreate_index[0;34m()[0m
[0;32m     47 [0;31m[0;32mdef[0m [0mcreate_index[0m[0;34m([0m[0mrefresh[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m [0mlanguage[0m[0;34m=[0m[0;34m'python'[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     48 [0;31m    [0;34m""" Create ES index """[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m---> 49 [0;31m    [0mpackages[0m [0;34m=[0m [0mget_packages[0m[0;34m([0m[0mlanguage[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     50 [0;31m    [0mpackage_list[0m [0;34m=[0m [0;34m[[0m[0;34m][0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     51 [0;31m[0;34m[0m[0m
[0m


ipdb>  packages


*** NameError: name 'packages' is not defined


ipdb>  n


--Return--
None
> [0;32m<ipython-input-2-cd6440a5e09d>[0m(49)[0;36mcreate_index[0;34m()[0m
[0;32m     47 [0;31m[0;32mdef[0m [0mcreate_index[0m[0;34m([0m[0mrefresh[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m [0mlanguage[0m[0;34m=[0m[0;34m'python'[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     48 [0;31m    [0;34m""" Create ES index """[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m---> 49 [0;31m    [0mpackages[0m [0;34m=[0m [0mget_packages[0m[0;34m([0m[0mlanguage[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     50 [0;31m    [0mpackage_list[0m [0;34m=[0m [0;34m[[0m[0;34m][0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     51 [0;31m[0;34m[0m[0m
[0m


ipdb>  n


UnboundLocalError: local variable 'registry' referenced before assignment
> [0;32m<ipython-input-2-cd6440a5e09d>[0m(61)[0;36m<module>[0;34m()[0m
[0;32m     57 [0;31m[0;34m[0m[0m
[0m[0;32m     58 [0;31m    [0;32mif[0m [0mlen[0m[0;34m([0m[0mpackage_list[0m[0;34m)[0m [0;34m>[0m [0;36m0[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     59 [0;31m        [0mdelete_missing_packages[0m[0;34m([0m[0mpackage_list[0m[0;34m,[0m [0mlanguage[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     60 [0;31m[0;34m[0m[0m
[0m[0;32m---> 61 [0;31m[0mcreate_index[0m[0;34m([0m[0;34m'r'[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m
