In [61]:
import ast
from pathlib import Path
from typing import List, Dict, Set
from typeguard import typechecked

# Root folder of all source code analyzed
source_folder: Path = Path('C:/Users/Carsten/source/repos/Zeeguu-Core')

# The packages we analyze - use folders in root dir
packages: List[Path] = [x for x in source_folder.iterdir() if x.is_dir() and not x.name.startswith('.') and not x.name == 'doc']

# The dict of infos for each package - we will add to the inner dict below
package_infos: Dict[str, Dict] =  { p.name: {'path': p} for p in packages}

# Add list of files to inner package_infos dict
for name, info in package_infos.items():
    files = [file for file in info['path'].rglob('*.py')]
    info['files'] = files

In [62]:
@typechecked
def imports_from_file(file: Path) -> List[str]:
    ''' Use ast to extract all imports from file '''
    class ImportVisitor(ast.NodeVisitor):

        def __init__(self):
            self.imports = set()

        def visit_Import(self, import_node):
            for alias in import_node.names:
                self.imports.add(alias.name)
            super(ImportVisitor, self).generic_visit(import_node)

        def visit_ImportFrom(self, import_from_node):
            m1 = import_from_node.module
            if (m1 is not None):
                for m2 in import_from_node.names:
                    self.imports.add(m1 + '.' + m2.name)
            super(ImportVisitor, self).generic_visit(import_from_node)

    #print('imports_from_file ' + str(file))
            
    file_ast = ast.parse(open(file).read())
    visitor = ImportVisitor()        
    visitor.visit(file_ast)
    imports = list(visitor.imports)
    imports.sort()
    return imports

In [63]:
@typechecked
def is_module_in_package(module: str, package: str) -> bool:
    ''' Check if module (e.g. 'x.y.z') is part of a package (e.g. 'x.y') '''
    for m, p in zip(module.split('.'), package.split('.')):
        if (m != p):
            return False
    return True

In [64]:
@typechecked
def import_info_from_file(file: Path, info: Dict[str, int]) -> None:
    ''' Examine all imports from file and update the info dict
        
        info dict is a map from package to import count and only imports
        to packages already in the info dict is considered
    '''
    for i in imports_from_file(file):
        for p in info:
            if (is_module_in_package(i, p)):
                info[p] = info[p] + 1

info = {p.name:0 for p in packages}
import_info_from_file(source_folder / "zeeguu_core/model/user.py", info)
print(info)

{'analyses': 0, 'playground': 0, 'tools': 0, 'zeeguu_core': 20, 'zeeguu_core_test': 0}


In [65]:
# Add import info to inner dict in package_infos
for name, info in package_infos.items():
    import_info = {p.name:0 for p in packages}
    for file in info['files']:
        import_info_from_file(file, import_info)
    info['import_info'] = import_info


In [66]:
@typechecked
def LOC(file: Path) -> int:
    ''' Return the number of lines in a file
    
        TODO: better definition here
    '''
    return sum([1 for line in open(file)])

In [80]:
# Add total LOC to inner dict in package_infos
for name, info in package_infos.items():
    info['LOC'] = sum([LOC(f) for f in info['files']])


In [79]:
# Visualize
from pyvis.network import Network
from numpy import log as ln

net = Network(directed=True, notebook=True)

# Nodes
for name, info in package_infos.items():
    net.add_node(name, size = ln(info['LOC']), title = name + " LOC: " + str(info['LOC']), label=name)

# Edges
for name, info in package_infos.items():
    for dependency, count in info['import_info'].items():
        if (name != dependency and count > 0):
            net.add_edge(name, dependency, title = str(count), width = ln(count), arrowStrikethrough=False)
net.show('arch_recon.html')