In [13]:
import pickle
import pandas as pd
import os
from docstring_refs import count_mentions_from_url

def read_to_df(pkl_path):
    with open(pkl_path, 'rb') as file:
        d = pickle.load(file)
    df = pd.DataFrame.from_dict(d, orient='index')
    df = df.rename_axis('function')
    df = df.reset_index()
    filename = pkl_path.replace('.pkl','.xlsx')
    df.to_excel(filename, index=False)
    return df

In [56]:
functions_df = read_to_df('functions_info.pkl')
functions_df['example_files'] = functions_df['example_files'].apply(lambda L: [s.replace('\\','/') for s in L])
functions_df['path'] = functions_df['path'].apply(lambda s: s.replace('\\','/'))
functions_df.to_excel('functions_info.xlsx', index=False)

functions_df

Unnamed: 0,function,docstring,path,example_files
0,clone,Construct a new unfitted estimator with the sa...,../scikit-learn/sklearn/base.py,[../scikit-learn/examples/cluster/plot_inducti...
1,make_pipeline,Construct a :class:`Pipeline` from the given e...,../scikit-learn/sklearn/pipeline.py,[../scikit-learn/examples/applications/plot_cy...
2,johnson_lindenstrauss_min_dim,Find a 'safe' number of components to randomly...,../scikit-learn/sklearn/random_projection.py,[../scikit-learn/examples/miscellaneous/plot_j...
3,kmeans_plusplus,Init n_clusters seeds according to k-means++.\...,../scikit-learn/sklearn/cluster/_kmeans.py,[../scikit-learn/examples/cluster/plot_kmeans_...
4,estimate_bandwidth,Estimate the bandwidth to use with the mean-sh...,../scikit-learn/sklearn/cluster/_mean_shift.py,[../scikit-learn/examples/cluster/plot_mean_sh...
...,...,...,...,...
97,gen_even_slices,Generator to create `n_packs` evenly spaced sl...,../scikit-learn/sklearn/utils/_chunking.py,[../scikit-learn/examples/linear_model/plot_po...
98,shuffle,Shuffle arrays or sparse matrices in a consist...,../scikit-learn/sklearn/utils/_indexing.py,[../scikit-learn/examples/applications/plot_pr...
99,get_routing_for_object,"Get a ``Metadata{Router, Request}`` instance f...",../scikit-learn/sklearn/utils/_metadata_reques...,[../scikit-learn/examples/miscellaneous/plot_m...
100,process_routing,Validate and route input parameters.\n\nThis f...,../scikit-learn/sklearn/utils/_metadata_reques...,[../scikit-learn/examples/miscellaneous/plot_m...


In [55]:
# for each example file, get the associated functions
functions_df_exploded = functions_df.explode('example_files')
examples_df = functions_df.explode('example_files').groupby('example_files')[['function', 'path']].agg(list).reset_index()

# get the name of the example and the number of mentions. If number of mentions==1, then nobody has started working on it
examples_df['example_filename'] = examples_df['example_files'].apply(os.path.basename)
examples_df['example'] = examples_df['example_filename'].apply(lambda s: os.path.splitext(s)[0])
examples_df['mentions'] = examples_df['example'].apply(count_mentions_from_url)

examples_not_wip = examples_df[examples_df['mentions']==1]
examples_not_wip.to_excel('examples_not_wip.xlsx', index=False)
examples_not_wip

Unnamed: 0,example_files,function,path,example_filename,example,mentions
2,../scikit-learn/examples/applications/plot_fac...,"[classification_report, train_test_split]",[../scikit-learn/sklearn/metrics/_classificati...,plot_face_recognition.py,plot_face_recognition,1
3,../scikit-learn/examples/applications/plot_mod...,[train_test_split],[../scikit-learn/sklearn/model_selection/_spli...,plot_model_complexity_influence.py,plot_model_complexity_influence,1
4,../scikit-learn/examples/applications/plot_out...,[get_data_home],[../scikit-learn/sklearn/datasets/_base.py],plot_out_of_core_classification.py,plot_out_of_core_classification,1
5,../scikit-learn/examples/applications/plot_out...,[load_wine],[../scikit-learn/sklearn/datasets/_base.py],plot_outlier_detection_wine.py,plot_outlier_detection_wine,1
10,../scikit-learn/examples/applications/wikipedi...,[randomized_svd],[../scikit-learn/sklearn/utils/extmath.py],wikipedia_principal_eigenvector.py,wikipedia_principal_eigenvector,1
...,...,...,...,...,...,...
191,../scikit-learn/examples/svm/plot_linearsvc_su...,[make_blobs],[../scikit-learn/sklearn/datasets/_samples_gen...,plot_linearsvc_support_vectors.py,plot_linearsvc_support_vectors,1
193,../scikit-learn/examples/svm/plot_separating_h...,[make_blobs],[../scikit-learn/sklearn/datasets/_samples_gen...,plot_separating_hyperplane_unbalanced.py,plot_separating_hyperplane_unbalanced,1
194,../scikit-learn/examples/svm/plot_svm_anova.py,[cross_val_score],[../scikit-learn/sklearn/model_selection/_vali...,plot_svm_anova.py,plot_svm_anova,1
201,../scikit-learn/examples/tree/plot_iris_dtc.py,[plot_tree],[../scikit-learn/sklearn/tree/_export.py],plot_iris_dtc.py,plot_iris_dtc,1


In [30]:
referenced_df = read_to_df('referenced_info.pkl')