Testing package scan across a VisionEval repository

Idea:

- Pick the repository
- Search across all files in the sources directory
- Within each of framework, model, modules, and VEGUI, scan for all library and require statements in .R files
- Compile list of all the locations where each package is used... store full path. 
- Aggregate to simple count of occurrences for each each package by framework, model, VE* module and VEGUI as columns.


In [1]:
import os
from pathlib import Path
import pandas as pd
import numpy as np # for repeat
import re 
import collections # for counting number of times a packages is used in a script

# Pick repository version
home_dir = str(Path.home())

# Find location of VisionEval repository (or repositories). Assumes repo was cloned (i.e., includes .git)
ve_dirs = []
for path, dir, subdirs in os.walk(home_dir):
    if re.search(r'VisionEval.+\.git$', str(path)):
            ve_dirs.append(path.rstrip('\.git'))

In [2]:
print("Found repositories:", *ve_dirs, sep ='\n')
ve_dir = ve_dirs[0]
print("Scanning first repository,", ve_dir)
# Export output to home for now
export_dir = home_dir

Found repositories:
C:\Users\Daniel.Flynn\Documents\git\VisionEval
C:\Users\Daniel.Flynn\Documents\git\VisionEval-Dev
Scanning first repository, C:\Users\Daniel.Flynn\Documents\git\VisionEval


In [3]:
repo_path = ve_dir.split("\\")
repo_name = repo_path[len(repo_path)-1]

In [4]:
R_packs = []

for dir, subdirs, files in os.walk(ve_dir):
    # Find .R files
    for file in files:
        if file.endswith(".R"):
            # Read the file and scan for @import statements.
            # Add encoding statement, otherwise fails for some files, likc CalculatePolicyVmt.R for some reason
            found = re.findall('@import .+', open(os.path.join(dir, file), encoding="utf-8").read())
            found_list = [packs for segments in found for packs in segments.split()]
            while '@import' in found_list: found_list.remove('@import')

            found_from = re.findall('@importFrom .+', open(os.path.join(dir, file), encoding="utf-8").read())
            found_from_list = [packs for segments in found_from for packs in segments.split()]
            found_from_list = found_from_list[:len(found_from_list)-1] # remove the function being imported
            while '@importFrom' in found_from_list: found_from_list.remove('@importFrom')
            
            found_list.extend(found_from_list)
            counter = collections.Counter(found_list)
            if(len(counter)==0):
                df = pd.DataFrame(data = {'dir': str(dir), 'file': str(file), 'package': [np.nan], 'count': [np.nan]})
            else:
                df = pd.DataFrame.from_dict(counter, orient='index').reset_index()
                df.columns = ['package', 'count']
                df["dir"] = pd.Series(list(np.repeat([dir], len(df))))
                df["file"] = pd.Series(list(np.repeat([file], len(df))))
            R_packs.append(df)

R_packs = pd.concat(
  R_packs, ignore_index=True, verify_integrity=True, sort = False)


In [5]:
outfile = repo_name + '_Dependency_Scan.csv'
print('Exporting ' + outfile + ' to ' + export_dir)

R_packs.to_csv(os.path.join(export_dir, outfile))

Exporting VisionEval_Dependency_Scan.csv to C:\Users\Daniel.Flynn


In [6]:
results = R_packs.groupby('package').agg({'count': 'sum'})

results.sort_values('count', inplace=True)

In [7]:
outfile = repo_name + '_Dependency_Summary.csv'
print('Exporting ' + outfile + ' to ' + export_dir)

results.to_csv(os.path.join(export_dir, outfile))

Exporting VisionEval_Dependency_Summary.csv to C:\Users\Daniel.Flynn
