In [None]:
import os
from pathlib import Path
import subprocess

from collections import OrderedDict
from operator import or_
from pprint import pprint
from functools import reduce, partial

In [None]:
from itertools import product

In [None]:
import numpy as np
import pandas as pd
import ujson
import yaml
import yamlloader

In [None]:
from IPython.display import display

In [None]:
from conda.cli.python_api import run_command, Commands
from conda.history import History

In [None]:
from conda_env import PY2_PACKAGES

In [None]:
from dautil.util import map_parallel

In [None]:
os.environ['PATH'] = f'{Path("~/.homebrew/anaconda3/bin").expanduser()}:{os.environ["PATH"]}'

In [None]:
def get_all_conda_envs():
    '''return all conda environment paths excluding base/root

    using conda's Python API
    '''
    return [
        env
        for env in ujson.loads(run_command(Commands.INFO, '--json')[0])['envs']
        if '/envs/' in env # filter out the root/base env
    ]

In [None]:
def conda_list(env):
    '''return a DataFrame representation of ``conda list`` for environment at `env`

    this one uses the conda Python API that excludes pip installed packages
    '''
    temp = ujson.loads(run_command(Commands.LIST, '--prefix', env, '--json')[0])
    df = pd.DataFrame(temp)
    df.set_index('name', inplace=True)
    return df

In [None]:
def conda_list_subprocess(env):
    '''return a DataFrame representation of ``conda list`` for environment at `env`

    this one uses the cli directly that includes pip installed packages
    '''
    temp = ujson.loads(subprocess.run(('conda', 'list', '--prefix', env, '--json'), stdout=subprocess.PIPE).stdout)
    df = pd.DataFrame(temp)
    df.set_index('name', inplace=True)
    return df

In [None]:
def conda_check_compat_python_version(version, package, debug=False):
    '''check if a package is compatible with a Python version
    '''
    temp = subprocess.run(('conda', 'create', '--dry-run', '--json', '-n', f'conda_check_compat_python_version', f'python={version}', package), stdout=subprocess.PIPE)
    if debug:
        return temp
    return not bool(temp.returncode)

In [None]:
def get_user_installed_packages(env):
    '''return user installed packages in prefix `env`

    using undocumented conda Python API, see
    https://github.com/conda/conda/issues/4545#issuecomment-469984684
    '''
    history = History(env)
    return history.get_requested_specs_map().keys()

In [None]:
def filter_channels(env, channels=('pypi',)):
    '''return packages from `channels` in environment `env`
    '''
    df = conda_list_subprocess(env)
    return df[df.channel.isin(channels)].index

In [None]:
def remove_element(list_, elements):
    '''filter out items in `list_` that contains element in `elements`
    '''
    return [
        item for item in list_
        if not reduce(
            or_,
            (element in item for element in elements)
        )
    ]

In [None]:
def map_union(func, iterables):
    '''set union of the results from `func` applied to items in `iterables`
    '''
    return set().union(*[set(func(item)) for item in iterables])

In [None]:
def get_url(version, os):
    assert os in ('linux', 'osx')
    if version == '2.7':
        # from https://docs.anaconda.com/anaconda/packages/oldpkglists/
        url = 'https://docs.anaconda.com/anaconda/packages/old-pkg-lists/2019.10/py{version}_{os}-64/'
    else:
        # from https://docs.anaconda.com/anaconda/packages/pkg-docs/
        url = 'https://docs.anaconda.com/anaconda/packages/py{version}_{os}-64/'
    return url.format(version=version, os=os)

In [None]:
def get_df(version, os):
    df = pd.read_html(get_url(version, os), header=0, index_col=0)[0]
    assert np.all(df['In Installer'].isna())
    df.drop('In Installer', axis=1, inplace=True)
    return df

In [None]:
def diff(left, right):
    df_left = get_df(*left)
    df_right = get_df(*right)

    idx_left = set(df_left.index)
    idx_right = set(df_right.index)

    idx_left_only = idx_left - idx_right
    idx_right_only = idx_right - idx_left
    return df_left.loc[idx_left_only], df_right.loc[idx_right_only]

In [None]:
envs = remove_element(get_all_conda_envs(), ('jupyterlab', 'ISR', 'sage', 'data100', 'acx'))

List of environments that will be inspected:

In [None]:
envs

# Conda

In [None]:
# set of all user installed packages in envs
conda_packages = map_union(get_user_installed_packages, envs)

In [None]:
with open(os.path.expanduser('conda-all.txt'), 'r') as f:
    conda_all = set(word.lstrip('#').strip() for word in f.readlines() if word.lstrip('#').strip())

In [None]:
with open(os.path.expanduser('conda.txt'), 'r') as f:
    conda = set(word.lstrip('#').strip() for word in f.readlines() if word.lstrip('#').strip())

In [None]:
conda_all |= conda

In [None]:
conda_all2 = conda_all | set(PY2_PACKAGES)

User installed packages not in `conda-all.txt` or `conda.txt`

In [None]:
list(map(print, sorted(conda_packages - conda_all2)));

in `conda-all.txt` or `conda.txt` but not installed

In [None]:
list(map(print, sorted(conda_all2 - conda_packages)));

# pip

In [None]:
# all pypi packages from envs
pip_packages = map_union(filter_channels, envs)

In [None]:
with open(os.path.expanduser('pip-all.txt'), 'r') as f:
    pip_all = set(word.lstrip('#').strip() for word in f.readlines() if word.lstrip('#').strip())

In [None]:
with open(os.path.expanduser('pip.txt'), 'r') as f:
    pip = set(word.lstrip('#').strip() for word in f.readlines() if word.lstrip('#').strip())

In [None]:
pip_all |= pip

pypi packages not in `pip-all.txt` or `pip.txt`

In [None]:
list(map(print, sorted(pip_packages - pip_all)));

in `pip-all.txt` or `pip.txt` but not installed

In [None]:
list(map(print, sorted(pip_all - pip_packages)));

# Inspect packages not compatible with Python 3.8

In [None]:
conda_all_tuple = tuple(conda_all)

In [None]:
%time conda_compat = map_parallel(partial(conda_check_compat_python_version, '3.8'), conda_all_tuple, mode='multithreading', processes=os.cpu_count())

In [None]:
df_compat = pd.DataFrame(conda_compat, index=conda_all_tuple, columns=['is_compat'])

In [None]:
df_compat[~df_compat.is_compat]

# Inspect packages not supported by Anaconda

In [None]:
version = '3.8'
os = 'linux'

In [None]:
df = get_df(version, os)

In [None]:
conda_all - set(df.index.values)

# Intersection of Anaconda supported packages

Create an environment named `acx`, which stands for Anaconda extended, as an intersection of packages installed and those supported by Anaconda

In [None]:
df_linux = get_df('3.8', 'linux')

In [None]:
df_mac = get_df('3.8', 'osx')

In [None]:
conda_supported_packages_linux = set(df_linux.index)
conda_supported_packages_mac = set(df_mac.index)
conda_supported_packages = conda_supported_packages_linux | conda_supported_packages_mac

In [None]:
len(conda_supported_packages_mac), len(conda_supported_packages_linux), len(conda_supported_packages)

In [None]:
# packages in conda_all.txt or conda.txt, that's supported by Anaconda
conda_filtered_linux = conda_all & conda_supported_packages_linux
conda_filtered_mac = conda_all & conda_supported_packages_mac
conda_filtered = conda_filtered_mac & conda_filtered_linux
len(conda_filtered), len(conda_filtered_mac), len(conda_filtered_linux), conda_filtered_linux - conda_filtered_mac

In [None]:
conda_filtered.update({'anaconda', 'panflute', 'cytoolz'})

In [None]:
conda_filtered = sorted(conda_filtered)

In [None]:
len(conda_filtered)

In [None]:
with open('acx.yml', 'w') as f:
    yaml.dump(
        OrderedDict((
            ('name', 'acx'),
            ('channels', ['defaults']),
            ('dependencies', conda_filtered)
        )),
        f,
        Dumper=yamlloader.ordereddict.CSafeDumper,
        default_flow_style=False
    )