In [1]:
import tokenize
import json
import os

In [2]:
FILEPATH='./raw/scikit-learn/sklearn/decomposition/_factor_analysis.py'

In [3]:
comment_list = []
with open(FILEPATH, 'rb') as f:
    for tok in tokenize.tokenize(f.readline):
        if tok.type == 3:
            comment_list.append((tok.start[0], tok.end[0], tok.string))

In [4]:
with open('./index/scikit-learn/sklearn/decomposition/_factor_analysis.json', 'r') as f:
    file_content = json.loads(f.read())
ln_fdef = {}
function_params = {}
for fd in file_content['FunctionDef']:
    for ln in file_content['FunctionDef'][fd]['lineno']:
        if ln not in ln_fdef:
            ln_fdef[ln] = []
        ln_fdef[ln].append(fd)
    function_params[fd] = file_content['FunctionDef'][fd]['params']

In [5]:
cfunc_pairs = []
for clns, clne, cs in comment_list:
    if clns-1 in ln_fdef:
        for f in ln_fdef[clns-1]:
            cfunc_pairs.append((f, cs))
    if clne+1 in ln_fdef:
        for f in ln_fdef[clne+1]:
            cfunc_pairs.append((f, cs))

In [6]:
# Provides comment-function pairs
for func, comment in cfunc_pairs:
    print(function_params[func])
    print(comment)

['self', 'X', 'y']
"""Fit the FactorAnalysis model to X using SVD based approach.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Training data.

        y : Ignored
            Ignored parameter.

        Returns
        -------
        self : object
            FactorAnalysis class instance.
        """
['self', 'X']
"""Apply dimensionality reduction to X using the model.

        Compute the expected mean of the latent variables.
        See Barber, 21.2.33 (or Bishop, 12.66).

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Training data.

        Returns
        -------
        X_new : ndarray of shape (n_samples, n_components)
            The latent variables of X.
        """
['self']
"""Compute data covariance with the FactorAnalysis model.

        ``cov = components_.T * components_ + diag(noise_variance)``

        Returns
        -------
        cov : ndarray

In [7]:
# with open(os.path.join('./index/', "file_key.txt")) as f:
#         j = json.loads(f.read())

In [39]:
def extract_parameters_from_func_description(func_description):
    returns_loc = func_description.find('Returns')
    if returns_loc != -1:
        func_description = func_description[:returns_loc]
    
    func_description = func_description.replace('\\', '')
        
    parameters = {}
    line_split_description = func_description.split('\n')
    for line_no, x in enumerate(line_split_description):
        print(line_no, x)
        if ':' in x:
            cur_index = line_no
            
            while line_split_description[cur_index].strip() != '' and cur_index < len(line_split_description):
                cur_index += 1
                
            parameters[x.split(':')[0].strip()] = ' '.join(line_split_description[line_no:cur_index]).strip()
                
            
            
    return parameters
        

def get_parameter_definition_locations(json_filepath, function_name):
    DEBUG = True
    
    raw_filepath = json_filepath.replace('.json', '.py')
    comment_list = []
    with open(os.path.join('./raw', raw_filepath), 'rb') as f:
        for tok in tokenize.tokenize(f.readline):
            if tok.type == 3:
                comment_list.append((tok.start[0], tok.end[0], tok.string))
                
    with open(os.path.join('./index', json_filepath), 'r') as f:
        file_content = json.loads(f.read())
    ln_fdef = {}
    function_params = {}
    for fd in file_content['FunctionDef']:
        for ln in file_content['FunctionDef'][fd]['lineno']:
            if ln not in ln_fdef:
                ln_fdef[ln] = []
            ln_fdef[ln].append(fd)
        function_params[fd] = file_content['FunctionDef'][fd]['params']
        
    cfunc_pairs = {}
    for clns, clne, cs in comment_list:
        if clns-1 in ln_fdef:
            for f in ln_fdef[clns-1]:
                cfunc_pairs[f] = cs
        if clne+1 in ln_fdef:
            for f in ln_fdef[clne+1]:
                cfunc_pairs[f] = cs
                
    if DEBUG:
        for func, comment in cfunc_pairs.items():
            print(func)
            print(function_params[func])
            print(comment)
        print('\n')
    
    func_comments = cfunc_pairs[function_name]
    param_description_map = extract_parameters_from_func_description(func_comments)
    param_location_map = {}
    
    if DEBUG:
        print('Parameters for function: {}'.format(function_name))
    for param_name in param_description_map:
        param_description = param_description_map[param_name]
        if DEBUG:
            print(param_name, ':', param_description)
        param_location_map[param_name] = set(['builtin']) # TODO: Search for the type of the parameter across multiple files
        
    return param_location_map
    
        

In [40]:
get_parameter_definition_locations('scikit-learn/sklearn/model_selection/_search.json', 'score')

__iter__
['self']
"""Iterate over the points in the grid.

        Returns
        -------
        params : iterator over dict of str to any
            Yields dictionaries mapping each estimator parameter to one of its
            allowed values.
        """
__len__
['self']
"""Number of points that will be sampled."""
__getitem__
['self', 'ind']
"""Get the parameters that would be ``ind``th in iteration

        Parameters
        ----------
        ind : int
            The iteration index

        Returns
        -------
        params : dict of str to any
            Equal to list(self)[ind]
        """
_estimator_has
['attr']
"""Check if we can delegate a method to the underlying estimator.

    Calling a prediction method will only be available if `refit=True`. In
    such case, we check first the fitted best estimator. If it is not
    fitted, we check the unfitted estimator.

    Checking the unfitted estimator allows to use `hasattr` on the `SearchCV`
    instance even before

{'X': {'builtin'}, 'y': {'builtin'}}