In [1]:
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
from pprint import pprint
from tqdm import tqdm
from pymongo import MongoClient
from packaging.version import Version
%matplotlib inline
%config InlineBackend.figure_format = "svg"
print(pd.__version__)
print(sns.__version__)
print(matplotlib.__version__)

1.3.4
0.11.2
3.5.0


## Load Data

In [2]:
ms_df = pd.read_csv('data/mindspore.csv')
ms_df['framework'] = 'mindspore'
ms_df['dependent_number'] = ms_df['dependent_number'].fillna(0).astype(int)
pp_df = pd.read_csv('data/paddlepaddle.csv')
pp_df['framework'] = 'paddlepaddle'
pp_df['dependent_number'] = pp_df['dependent_number'].fillna(0).astype(int)
torch_df = pd.read_csv('data/torch.csv')
torch_df['framework'] = 'pytorch'
torch_df['dependent_number'] = torch_df['dependent_number'].fillna(0).astype(int)
tf_df = pd.read_csv('data/tensorflow.csv')
tf_df['framework'] = 'tensorflow'
tf_df['dependent_number'] = tf_df['dependent_number'].fillna(0).astype(int)
all_df = pd.concat([ms_df, pp_df, torch_df, tf_df], ignore_index=True)
print('Toal number of packages in each SC:')
print(len(ms_df), len(pp_df), len(torch_df), len(tf_df), len(all_df))

ms_packages = set(ms_df['name'].unique())
pp_packages = set(pp_df['name'].unique())
torch_packages = set(torch_df['name'].unique())
tf_packages = set(tf_df['name'].unique())
all_packages = set(all_df['name'].unique())
print('Total number of unique packages in each SC:')
print(len(ms_packages), len(pp_packages), len(
    torch_packages), len(tf_packages), len(all_packages))

Toal number of packages in each SC:
13 121 5926 5622 11682
Total number of unique packages in each SC:
13 82 3494 3602 6251


## Version Evolution

In [3]:
client = MongoClient(host='127.0.0.1', port=27017)
pypi_db = client['pypi']
versioned_deps = pypi_db['versioned_dependencies']
distribution_metadata = pypi_db['distribution_metadata']

In [4]:
def get_versions(pkg: str) -> list:
    pipeline = [
        {"$match": {"name": pkg}},
        {"$group": {"_id": None, "versions": {"$addToSet": "$version"}}}
    ]
    versions = list(distribution_metadata.aggregate(pipeline=pipeline))
    if versions:
        return sorted(versions[0]['versions'], key=Version)
    else:
        return []

In [5]:
def get_upload_time(pkg: str, version: str):
    results = list(distribution_metadata.find({"name": pkg, "version": version, "upload_time": {"$ne": None}}, 
                                         projection={"_id": False, "upload_time": True}))
    return pd.DataFrame(results)['upload_time'].min()
get_upload_time("torch", "1.10.0")

'2021-10-21 14:45:53.897101 UTC'

In [6]:
def get_all_versions(packages):
    res = pd.DataFrame()
    for pkg in tqdm(packages):
        query = {
            "name": pkg,
            "extra": False
        }
        results = list(versioned_deps.find(query, projection={"_id": False, "extra": False}))
        df = pd.DataFrame(results)
        try:
            df = df[df['dependency'].isin(all_packages)]
            for version in df['version'].unique():
                df.loc[df['version']==version, ['upload_time']] = get_upload_time(pkg, version)
        except:
            print(pkg)
        res = res.append(df)
    return res.reset_index(drop=True)

In [7]:
def get_all_versions(packages):
    res = pd.DataFrame()
    for pkg in tqdm(packages):
        query = {
            "name": pkg,
            "extra": False
        }
        results = list(versioned_deps.find(query, projection={"_id": False, "extra": False}))
        df = pd.DataFrame(results)
        try:
            df = df[df['dependency'].isin(all_packages)]
            for version in df['version'].unique():
                df.loc[df['version']==version, ['upload_time']] = get_upload_time(pkg, version)
        except:
            print(pkg)
        res = res.append(df)
    return res.reset_index(drop=True)

In [8]:
versioned_df = get_all_versions(all_packages)

  1%|█▋                                                                                                                                                        | 67/6251 [00:03<04:31, 22.78it/s]

bert-serving


  4%|██████▌                                                                                                                                                  | 267/6251 [00:18<06:52, 14.49it/s]

torch-workflow-archiver


  5%|████████▏                                                                                                                                                | 334/6251 [00:24<08:15, 11.94it/s]

tensorflow-gpu-estimator


  7%|██████████▋                                                                                                                                              | 435/6251 [00:33<07:35, 12.76it/s]

tensorflow-cpu-estimator


 14%|█████████████████████                                                                                                                                    | 862/6251 [01:15<09:12,  9.76it/s]

tensorflow-io-gcs-filesystem


 15%|██████████████████████▍                                                                                                                                  | 919/6251 [01:23<11:04,  8.03it/s]

tensorflow-recorder


 16%|███████████████████████▉                                                                                                                                 | 980/6251 [01:30<09:16,  9.48it/s]

pytorchfi


 16%|████████████████████████▋                                                                                                                               | 1013/6251 [01:37<10:27,  8.35it/s]

paddle-ernie


 17%|█████████████████████████▊                                                                                                                              | 1060/6251 [01:46<11:05,  7.80it/s]

tensorflow-onnx


 18%|███████████████████████████▍                                                                                                                            | 1126/6251 [01:55<10:15,  8.32it/s]

paddle-upgrade-tool


 21%|███████████████████████████████▍                                                                                                                        | 1293/6251 [02:24<11:51,  6.97it/s]

tf-1.x-rectified-adam


 22%|████████████████████████████████▋                                                                                                                       | 1344/6251 [02:32<11:12,  7.30it/s]

asteroid-sphinx-theme


 22%|█████████████████████████████████▌                                                                                                                      | 1381/6251 [02:39<19:25,  4.18it/s]

ngraph-tensorflow-bridge


 22%|█████████████████████████████████▊                                                                                                                      | 1391/6251 [02:40<12:11,  6.64it/s]

bert-service


 22%|██████████████████████████████████▏                                                                                                                     | 1404/6251 [02:42<10:41,  7.55it/s]

fairseq-doc
syntaxnet-with-tensorflow


 23%|██████████████████████████████████▏                                                                                                                     | 1407/6251 [02:42<10:27,  7.72it/s]

torch-vision


 25%|█████████████████████████████████████▌                                                                                                                  | 1545/6251 [03:09<13:09,  5.96it/s]

ppcls-notebook


 26%|███████████████████████████████████████▋                                                                                                                | 1632/6251 [03:25<11:55,  6.46it/s]

tensorflownumpy


 29%|███████████████████████████████████████████▉                                                                                                            | 1807/6251 [04:04<12:34,  5.89it/s]

tensorflow-io-plugin-gs-nightly


 38%|██████████████████████████████████████████████████████████▍                                                                                             | 2403/6251 [07:47<23:04,  2.78it/s]

tensorflow-io-gcs-filesystem-nightly


 42%|████████████████████████████████████████████████████████████████▏                                                                                       | 2638/6251 [09:28<24:00,  2.51it/s]

tensorboard-data-server


 43%|████████████████████████████████████████████████████████████████▊                                                                                       | 2663/6251 [09:37<21:54,  2.73it/s]

tensorflowdnspython


 46%|█████████████████████████████████████████████████████████████████████▍                                                                                  | 2857/6251 [10:57<22:00,  2.57it/s]

pydone


 48%|████████████████████████████████████████████████████████████████████████▎                                                                               | 2975/6251 [11:47<23:47,  2.29it/s]

ppocr-test


 50%|████████████████████████████████████████████████████████████████████████████                                                                            | 3126/6251 [13:17<23:56,  2.18it/s]

skflow


 54%|█████████████████████████████████████████████████████████████████████████████████▊                                                                      | 3367/6251 [15:13<22:38,  2.12it/s]

torchviz


 55%|████████████████████████████████████████████████████████████████████████████████████                                                                    | 3457/6251 [15:58<23:06,  2.01it/s]

x2paddle


 56%|█████████████████████████████████████████████████████████████████████████████████████▋                                                                  | 3523/6251 [16:55<37:12,  1.22it/s]

jtyoui-ernie


 65%|██████████████████████████████████████████████████████████████████████████████████████████████████▊                                                     | 4065/6251 [22:29<21:21,  1.71it/s]

torchlightning


 68%|██████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                 | 4230/6251 [24:09<20:18,  1.66it/s]

ppdet-notebook


 70%|██████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                             | 4383/6251 [26:08<22:22,  1.39it/s]

torchcontrib


 73%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                         | 4556/6251 [28:20<20:10,  1.40it/s]

tensorflow-tools


 75%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                      | 4691/6251 [29:57<18:39,  1.39it/s]

paddle-gpu-serving


 75%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                     | 4712/6251 [30:12<19:43,  1.30it/s]

paddle_fl


 76%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                    | 4738/6251 [30:32<18:23,  1.37it/s]

ct-master-build


 81%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                            | 5079/6251 [35:24<16:57,  1.15it/s]

tensorflow-tesla


 83%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                          | 5160/6251 [36:46<15:34,  1.17it/s]

inference-gym


 84%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                        | 5232/6251 [37:49<14:35,  1.16it/s]

recordio


 93%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉           | 5794/6251 [46:08<06:47,  1.12it/s]

syntaxnet


 93%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎          | 5813/6251 [46:27<06:55,  1.05it/s]

lightning-data


 94%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏        | 5890/6251 [48:26<05:58,  1.01it/s]

pytorch-translate


 95%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉       | 5963/6251 [50:04<05:25,  1.13s/it]

PaddleDetection-test


 96%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏      | 5970/6251 [50:12<05:02,  1.08s/it]

tensorflow-probability-gpu


 99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏| 6218/6251 [54:47<00:36,  1.11s/it]

unofficial-pt-lightning-sphinx-theme


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6251/6251 [55:23<00:00,  1.88it/s]


In [9]:
len(versioned_df)

6444637

In [10]:
versioned_df.head()

Unnamed: 0,name,version,dependency,dependency_version,upload_time
0,gravityspy,1.0.1,Keras,2.3.1,2020-03-25 17:45:36.751896 UTC
1,gravityspy,1.0.1,Keras,2.4.0,2020-03-25 17:45:36.751896 UTC
2,gravityspy,1.0.1,Keras,2.4.1,2020-03-25 17:45:36.751896 UTC
3,gravityspy,1.0.1,Keras,2.4.2,2020-03-25 17:45:36.751896 UTC
4,gravityspy,1.0.1,Keras,2.4.3,2020-03-25 17:45:36.751896 UTC


In [11]:
len(versioned_df['name'].unique()), len(all_packages)

(6132, 6251)

In [12]:
versioned_df.to_csv("data/versions.csv", index=0)

In [13]:
def versioned_sc(framework: list, version: str):
    sc = {}
    sc[1] = set(framework)
    layer = 1
    right_df = pd.DataFrame()
    right_df.loc[:, 'name'] = framework
    right_df.loc[:, 'version'] = version
    all_df = pd.DataFrame()
    all_df = all_df.append(right_df)
    while not right_df.empty:
        layer += 1
        tmp = versioned_df.merge(right_df, left_on=['dependency', 'dependency_version'], right_on=['name', 'version'])
        if not tmp.empty:
            sc[layer] = set(tmp['name_x'].unique())
            right_df = tmp[['name_x', 'version_x']].drop_duplicates().rename(columns={'name_x': 'name', 'version_x': 'version'})
            right_df = right_df[~(right_df['name'].isin(all_df['name']))]
            all_df = all_df.append(right_df)
            # print('layer: {} finished'.format(layer))
        else:
            break
    return sc

In [14]:
ms_versioned_sc = {}
versions = get_versions('mindspore')
print(versions)
for v in versions:
    ms_versioned_sc[v] = versioned_sc(['mindspore', 'mindspore-ascend', 'mindspore-gpu'], v)
    print('mindspore {} finished'.format(v))
print('mindspore done')
pp_versioned_sc = {}
versions = get_versions('paddlepaddle')
print(versions)
for v in versions:
    pp_versioned_sc[v] = versioned_sc(['paddlepaddle', 'paddlepaddle-gpu'], v)
    print('paddlepaddle {} finished'.format(v))
print('paddlepaddle done')

['0.2.0', '0.3.1', '1.0.0', '1.0.1', '1.1.0', '1.1.1', '1.2.0rc1', '1.2.0', '1.2.1', '1.3.0', '1.5.0.dev20211104', '1.5.0.dev20211105', '1.5.0rc1', '1.5.0']
mindspore 0.2.0 finished
mindspore 0.3.1 finished
mindspore 1.0.0 finished
mindspore 1.0.1 finished
mindspore 1.1.0 finished
mindspore 1.1.1 finished
mindspore 1.2.0rc1 finished
mindspore 1.2.0 finished
mindspore 1.2.1 finished
mindspore 1.3.0 finished
mindspore 1.5.0.dev20211104 finished
mindspore 1.5.0.dev20211105 finished
mindspore 1.5.0rc1 finished
mindspore 1.5.0 finished
mindspore done
['0.10.0', '0.10.5', '0.10.7', '0.11.0', '0.12.0', '0.13.0', '0.14.0', '0.15.0', '1.0.0', '1.0.1', '1.0.2', '1.1.0', '1.2.0', '1.2.1', '1.3.0', '1.3.1', '1.3.2', '1.4.0', '1.4.1', '1.5.0', '1.5.1', '1.5.2', '1.6.0rc0', '1.6.0', '1.6.1', '1.6.2', '1.6.3', '1.7.0', '1.7.1', '1.7.2', '1.8.0', '1.8.1', '1.8.2', '1.8.3', '1.8.4', '1.8.5', '2.0.0a0', '2.0.0b0', '2.0.0rc0', '2.0.0rc1', '2.0.0', '2.0.1', '2.0.2', '2.1.0', '2.1.1', '2.1.2', '2.1.3', '2.

In [15]:
torch_versioned_sc = {}
versions = get_versions('torch')
print(versions)
for v in versions:
    torch_versioned_sc[v] = versioned_sc(['torch'], v)
    print('pytorch {} finished'.format(v))
print('pytorch done')
tf_versioned_sc = {}
versions = get_versions('tensorflow')
print(versions)
for v in versions:
    tf_versioned_sc[v] = versioned_sc(['tensorflow', 'intel-tensorflow', 'intel-tensorflow-avx512', 'tensorflow-aarch64', 'tensorflow-ascend',
                      'tensorflow-cpu', 'tensorflow-fedora28', 'tensorflow-gpu', 'tensorflow-macos', 'tf-nightly', 'tf-nightly-cpu',
                      'tf-nightly-gpu', 'tf-nightly-xla-gpu'], v)
    print('tensorflow {} finished'.format(v))
print('tensorflow done')

['0.1.2', '0.1.2.post2', '0.3.0.post4', '0.3.1', '0.4.0', '0.4.1', '0.4.1.post2', '1.0.0', '1.0.1', '1.0.1.post2', '1.1.0', '1.1.0.post2', '1.2.0', '1.3.0', '1.3.0.post2', '1.3.1', '1.4.0', '1.5.0', '1.5.1', '1.6.0', '1.7.0', '1.7.1', '1.8.0', '1.8.1', '1.9.0', '1.9.1', '1.10.0']
pytorch 0.1.2 finished
pytorch 0.1.2.post2 finished
pytorch 0.3.0.post4 finished
pytorch 0.3.1 finished
pytorch 0.4.0 finished
pytorch 0.4.1 finished
pytorch 0.4.1.post2 finished
pytorch 1.0.0 finished
pytorch 1.0.1 finished
pytorch 1.0.1.post2 finished
pytorch 1.1.0 finished
pytorch 1.1.0.post2 finished
pytorch 1.2.0 finished
pytorch 1.3.0 finished
pytorch 1.3.0.post2 finished
pytorch 1.3.1 finished
pytorch 1.4.0 finished
pytorch 1.5.0 finished
pytorch 1.5.1 finished
pytorch 1.6.0 finished
pytorch 1.7.0 finished
pytorch 1.7.1 finished
pytorch 1.8.0 finished
pytorch 1.8.1 finished
pytorch 1.9.0 finished
pytorch 1.9.1 finished
pytorch 1.10.0 finished
pytorch done
['0.12.0rc0', '0.12.0rc1', '0.12.0', '0.12.1', '

In [16]:
def version_structure(ver_sc: dict):
    for ver, sc in ver_sc.items():
        a = set()
        res = ver
        for l, p in sc.items():
            a = a.union(p)
            res = res + ',' + str(len(p))
        res = res + ',' + str(len(a))
        print(res)

In [17]:
version_structure(ms_versioned_sc)

0.2.0,3,3
0.3.1,3,3
1.0.0,3,3
1.0.1,3,3
1.1.0,3,3
1.1.1,3,1,4
1.2.0rc1,3,3
1.2.0,3,2,5
1.2.1,3,1,4
1.3.0,3,2,5
1.5.0.dev20211104,3,1,4
1.5.0.dev20211105,3,1,4
1.5.0rc1,3,1,4
1.5.0,3,1,4


In [18]:
version_structure(pp_versioned_sc)

0.10.0,2,8,2,10
0.10.5,2,8,2,10
0.10.7,2,9,2,11
0.11.0,2,9,2,11
0.12.0,2,9,2,11
0.13.0,2,9,2,11
0.14.0,2,9,2,11
0.15.0,2,8,2,10
1.0.0,2,8,2,10
1.0.1,2,8,2,10
1.0.2,2,8,2,10
1.1.0,2,8,2,10
1.2.0,2,8,2,10
1.2.1,2,8,2,10
1.3.0,2,9,2,11
1.3.1,2,9,2,11
1.3.2,2,9,2,11
1.4.0,2,9,2,11
1.4.1,2,9,2,11
1.5.0,2,8,2,10
1.5.1,2,8,2,10
1.5.2,2,8,2,10
1.6.0rc0,2,8,2,10
1.6.0,2,8,2,10
1.6.1,2,8,2,10
1.6.2,2,8,2,10
1.6.3,2,9,2,11
1.7.0,2,9,2,11
1.7.1,2,9,2,11
1.7.2,2,11,2,13
1.8.0,2,13,2,15
1.8.1,2,13,2,15
1.8.2,2,13,2,15
1.8.3,2,14,2,16
1.8.4,2,14,2,16
1.8.5,2,16,2,18
2.0.0a0,2,15,2,17
2.0.0b0,2,15,2,17
2.0.0rc0,2,18,2,20
2.0.0rc1,2,17,2,19
2.0.0,2,21,6,27
2.0.1,2,23,2,25
2.0.2,2,19,2,21
2.1.0,2,20,2,22
2.1.1,2,19,2,21
2.1.2,2,19,2,21
2.1.3,2,19,2,21
2.2.0rc0,2,19,2,21
2.2.0,2,19,2,21


In [19]:
version_structure(torch_versioned_sc)

0.1.2,1,1204,1114,243,16,1,1985
0.1.2.post2,1,1204,1114,243,16,1,1985
0.3.0.post4,1,1204,1114,243,16,1,1985
0.3.1,1,1206,1123,217,13,1,1971
0.4.0,1,1244,1125,212,13,1,1993
0.4.1,1,1282,1177,219,13,1,2057
0.4.1.post2,1,1278,1177,219,13,1,2056
1.0.0,1,1411,1269,170,9,1,2187
1.0.1,1,1423,1273,170,9,1,2198
1.0.1.post2,1,1428,1272,170,9,1,2199
1.1.0,1,1527,1350,165,13,3,2316
1.1.0.post2,1,1518,1351,165,13,3,2314
1.2.0,1,1619,1422,151,14,2,2422
1.3.0,1,1650,1497,184,14,1,2515
1.3.0.post2,1,1645,1495,183,14,1,2511
1.3.1,1,1693,1535,168,14,1,2563
1.4.0,1,1887,1619,152,14,1,2722
1.5.0,1,1916,1615,150,14,1,2745
1.5.1,1,1921,1613,147,14,1,2745
1.6.0,1,2205,1729,141,13,1,2999
1.7.0,1,2248,1718,123,13,1,3011
1.7.1,1,2360,1737,116,13,1,3072
1.8.0,1,2365,1717,119,13,1,3075
1.8.1,1,2446,1719,117,13,1,3115
1.9.0,1,2484,1710,111,13,1,3137
1.9.1,1,2460,1712,113,13,1,3129
1.10.0,1,2449,1705,114,13,1,3118


In [20]:
version_structure(tf_versioned_sc)

0.12.0rc0,13,877,277,129,37,5,2,1,1213
0.12.0rc1,13,845,276,129,37,5,2,1,1186
0.12.0,13,878,277,129,37,5,2,1,1214
0.12.1,13,881,278,129,37,5,2,1,1215
1.0.0,13,898,278,129,37,5,2,1,1230
1.0.1,13,907,281,129,37,5,2,1,1242
1.1.0rc0,13,871,280,129,37,5,2,1,1211
1.1.0rc1,13,871,280,129,37,5,2,1,1211
1.1.0rc2,13,871,280,129,37,5,2,1,1211
1.1.0,13,918,282,127,36,5,2,1,1252
1.2.0rc0,13,880,280,127,37,5,2,1,1219
1.2.0rc1,13,881,280,127,37,5,2,1,1220
1.2.0rc2,13,881,280,127,37,5,2,1,1220
1.2.0,13,930,299,118,35,5,2,1,1264
1.2.1,13,933,301,118,35,5,2,1,1268
1.3.0rc0,13,894,294,128,37,5,2,1,1240
1.3.0rc1,13,894,294,128,37,5,2,1,1240
1.3.0rc2,13,894,294,128,37,5,2,1,1240
1.3.0,13,953,311,123,35,4,1,1294
1.4.0rc0,13,909,304,133,37,4,1,1261
1.4.0rc1,13,909,304,133,37,4,1,1261
1.4.0,13,962,315,123,35,4,1,1302
1.4.1,13,964,315,123,35,4,1,1304
1.5.0rc0,13,922,308,133,37,4,1,1272
1.5.0rc1,13,922,308,133,37,4,1,1272
1.5.0,13,974,320,123,35,4,1,1316
1.5.1,13,970,319,123,35,4,1,1313
1.6.0rc0,13,928,312,133,