# Class level relationships

## Generalization

Here, we are going to parse a whole project instead of a single python file.

In [2]:
# Clone a target project. Today, we try `httpie/cli`, a commandline HTTP client.
!git clone git@github.com:httpie/cli.git

fatal: destination path 'cli' already exists and is not an empty directory.


In [3]:
# First, we need to read all python files in the project.
import os

path = "./cli/httpie/**/*.py"

from glob import glob

for filepath in glob(path, recursive=True):
    print(filepath)

./cli/httpie/cookies.py
./cli/httpie/sessions.py
./cli/httpie/config.py
./cli/httpie/compat.py
./cli/httpie/encoding.py
./cli/httpie/models.py
./cli/httpie/client.py
./cli/httpie/__init__.py
./cli/httpie/core.py
./cli/httpie/ssl_.py
./cli/httpie/downloads.py
./cli/httpie/context.py
./cli/httpie/utils.py
./cli/httpie/uploads.py
./cli/httpie/__main__.py
./cli/httpie/status.py
./cli/httpie/adapters.py
./cli/httpie/plugins/registry.py
./cli/httpie/plugins/__init__.py
./cli/httpie/plugins/builtin.py
./cli/httpie/plugins/manager.py
./cli/httpie/plugins/base.py
./cli/httpie/internal/daemons.py
./cli/httpie/internal/daemon_runner.py
./cli/httpie/internal/__init__.py
./cli/httpie/internal/__build_channel__.py
./cli/httpie/output/streams.py
./cli/httpie/output/models.py
./cli/httpie/output/__init__.py
./cli/httpie/output/utils.py
./cli/httpie/output/writer.py
./cli/httpie/output/processing.py
./cli/httpie/output/ui/__init__.py
./cli/httpie/output/ui/palette.py
./cli/httpie/output/ui/rich_utils.p

In [4]:
pyfiles = list(glob(path, recursive=True))
len(pyfiles)

78

## Get AST one-by-one

In [5]:
import ast

def getAST(path: str):
    with open(path, "r") as f:
        source = f.read()
        
    return ast.parse(source)

In [6]:
myast = getAST(pyfiles[0])
myast

<ast.Module at 0x1049a9c00>

## Detect an inheritance syntax.

In [7]:
from ast import NodeVisitor

# class SuperClassNameVisitor(NodeVisitor):
#     def __init__(self):
#         super().__init__()
#         self.names = []
    
#     def visit_Name(self, node):
#         self.names.append(node.id)
#         return super().generic_visit(node)

class ClassVisitor(NodeVisitor):
    def __init__(self):
        super().__init__()
        self.gen_dict = {}
        
    def visit_ClassDef(self, node):
#         namevisitor = SuperClassNameVisitor()
        parents = []
        for base in node.bases:
            if isinstance(base, ast.Attribute):
                parents.append(base.attr)
            elif isinstance(base, ast.Subscript):
                parents.append(base.value.id)
            elif isinstance(base, ast.Name):
                parents.append(base.id)
            else:
                raise Exception(f"Unknown base type: {type(base)}")
        self.gen_dict[node.name] = { "parents": parents }
#         print(node.name, list([base.id for base in node.bases]))

#         if node.name == "LazyChoices" or node.name == "HTTPHeadersDict":
#         for base in node.bases:
#             print(ast.dump(base, indent=2))
        return super().generic_visit(node)
        
myvisitor = ClassVisitor()
myvisitor.visit(myast)

## Store generalization relationships in a data structure

In [27]:
myvisitor = ClassVisitor()

for pyfile in pyfiles:
    myast = getAST(pyfile)
    myvisitor.visit(myast)
    
# Generic[T[T]]
myvisitor.gen_dict

{'HTTPieCookiePolicy': {'parents': ['DefaultCookiePolicy']},
 'Session': {'parents': ['BaseConfigDict']},
 'ConfigFileError': {'parents': ['Exception']},
 'BaseConfigDict': {'parents': ['dict']},
 'Config': {'parents': ['BaseConfigDict']},
 'cached_property': {'parents': []},
 'HTTPMessage': {'parents': []},
 'HTTPResponse': {'parents': ['HTTPMessage']},
 'HTTPRequest': {'parents': ['HTTPMessage']},
 'RequestsMessageKind': {'parents': ['Enum']},
 'OutputOptions': {'parents': ['NamedTuple']},
 'HTTPieCertificate': {'parents': ['NamedTuple']},
 'HTTPieHTTPSAdapter': {'parents': ['HTTPAdapter']},
 'ContentRangeError': {'parents': ['ValueError']},
 'Downloader': {'parents': []},
 'DownloadStatus': {'parents': []},
 'LogLevel': {'parents': ['str', 'Enum']},
 'Environment': {'parents': []},
 'JsonDictPreservingDuplicateKeys': {'parents': ['OrderedDict']},
 'ExplicitNullAuth': {'parents': ['AuthBase']},
 'LockFileError': {'parents': ['ValueError']},
 'ChunkedStream': {'parents': []},
 'Chunke

In [9]:
anydict = {"key1": "v1", "k2": "v2"}

for k, v in anydict.items():
    print(k, v)

key1 v1
k2 v2


In [10]:
def addChildrenInfo(pdict):
    childreninfo = {}
    for k, v in pdict.items():
        for p in v["parents"]:
            if p in pdict:
                if p in childreninfo:
                    childreninfo[p].append(k)
                else:
                    childreninfo[p] = [k]
    return childreninfo

childreninfo = addChildrenInfo(myvisitor.gen_dict)
completegeninfo = {}

for k, v in myvisitor.gen_dict.items():
    completegeninfo[k] = v
    if k in childreninfo:
        completegeninfo[k]["children"] = childreninfo[k]
    else:
        completegeninfo[k]["children"] = []

completegeninfo

{'HTTPieCookiePolicy': {'parents': ['DefaultCookiePolicy'], 'children': []},
 'Session': {'parents': ['BaseConfigDict'], 'children': []},
 'ConfigFileError': {'parents': ['Exception'], 'children': []},
 'BaseConfigDict': {'parents': ['dict'], 'children': ['Session', 'Config']},
 'Config': {'parents': ['BaseConfigDict'], 'children': []},
 'cached_property': {'parents': [], 'children': []},
 'HTTPMessage': {'parents': [], 'children': ['HTTPResponse', 'HTTPRequest']},
 'HTTPResponse': {'parents': ['HTTPMessage'], 'children': []},
 'HTTPRequest': {'parents': ['HTTPMessage'], 'children': []},
 'RequestsMessageKind': {'parents': ['Enum'], 'children': []},
 'OutputOptions': {'parents': ['NamedTuple'], 'children': []},
 'HTTPieCertificate': {'parents': ['NamedTuple'], 'children': []},
 'HTTPieHTTPSAdapter': {'parents': ['HTTPAdapter'], 'children': []},
 'ContentRangeError': {'parents': ['ValueError'], 'children': []},
 'Downloader': {'parents': [], 'children': []},
 'DownloadStatus': {'parents

# Association

## Composition

We need to assume that there is no composition between classes because it is challenging to identify the lifecycle of objects in Python.

## Aggregation

In [11]:
class TypeVisitor(NodeVisitor):
    def __init__(self):
        super().__init__()
        self.class_var_map = {}

    def visit_ClassDef(self, node):
        cname = node.name
        membervars = {}
        for stmt in node.body:
            if isinstance(stmt, ast.AnnAssign) and cname == "OutputOptions":
                varname = stmt.target.id
                vartype = stmt.annotation.id
                membervars[varname] = vartype
        self.class_var_map[cname] = membervars
        return super().generic_visit(node)        
            
myvisitor = TypeVisitor()

for pyfile in pyfiles:
    myast = getAST(pyfile)
    myvisitor.visit(myast)
    
myvisitor.class_var_map

{'HTTPieCookiePolicy': {},
 'Session': {},
 'ConfigFileError': {},
 'BaseConfigDict': {},
 'Config': {},
 'cached_property': {},
 'HTTPMessage': {},
 'HTTPResponse': {},
 'HTTPRequest': {},
 'RequestsMessageKind': {},
 'OutputOptions': {'kind': 'RequestsMessageKind',
  'headers': 'bool',
  'body': 'bool',
  'meta': 'bool'},
 'HTTPieCertificate': {},
 'HTTPieHTTPSAdapter': {},
 'ContentRangeError': {},
 'Downloader': {},
 'DownloadStatus': {},
 'LogLevel': {},
 'Environment': {},
 'JsonDictPreservingDuplicateKeys': {},
 'ExplicitNullAuth': {},
 'LockFileError': {},
 'ChunkedStream': {},
 'ChunkedUploadStream': {},
 'ChunkedMultipartUploadStream': {},
 'ExitStatus': {},
 'HTTPieHTTPAdapter': {},
 'BuiltinAuthPlugin': {},
 'HTTPBasicAuth': {},
 'HTTPBearerAuth': {},
 'BasicAuthPlugin': {},
 'DigestAuthPlugin': {},
 'BearerAuthPlugin': {},
 'PluginManager': {},
 'BasePlugin': {},
 'AuthPlugin': {},
 'TransportPlugin': {},
 'ConverterPlugin': {},
 'FormatterPlugin': {},
 'DataSuppressedErro

In [53]:
class AssignmentVisitor(NodeVisitor):
    def __init__(self, classmap):
        super().__init__()
        self.member_var_map = {}
        self.classmap = classmap


# Assign(expr* targets, expr value, string? type_comment)
    def visit_Assign(self, node):
        lvalue = node.targets
        rvalue = node.value
        
        if len(lvalue) == 1:
            lvalue = lvalue[0] # you need to have a better expression.
#             print(lvalue)
            if isinstance(lvalue, ast.Attribute):
                if isinstance(lvalue.value, ast.Name) and lvalue.value.id == "self" and isinstance(lvalue.attr, str):
                    memvarname = lvalue.attr
                    if isinstance(rvalue, ast.Call) and isinstance(rvalue.func, ast.Name):
                        if rvalue.func.id in self.classmap:
                            print(ast.dump(rvalue, indent=2)) ### TODO: You need to fix here!
                            self.member_var_map[memvarname] = rvalue.func.id
                    else:
                        self.member_var_map[memvarname] = None
        return super().generic_visit(node) 
                        
    def get_member_var_info(self):
        return self.member_var_map
                    
        
# class ClassVisitorForMemberVar(NodeVisitor):
#     def __init__(self):
#         super().__init__()
    
#     def visit_ClassDef(self, node):
#         if node.name == "Session":
#             for stmt in node.body:
#                 if isinstance(stmt, ast.FunctionDef):
#                     for bodystmt in stmt.body:
#                         if isinstance(bodystmt, ast.Assign):
#                             print(ast.dump(bodystmt, indent=2))

class ClassVisitor2(NodeVisitor):
    def __init__(self, classmap):
        super().__init__()
        self.class_memvar_map = {}
        self.classmap = classmap
        
    def visit_ClassDef(self, node):
        assignvisitor = AssignmentVisitor(self.classmap)
        for stmt in node.body:
            if isinstance(stmt, ast.FunctionDef) or isinstance(stmt, ast.AsyncFunctionDef):
                assignvisitor.visit(stmt)
                amap = assignvisitor.get_member_var_info()
                self.class_memvar_map[node.name] = amap
        return super().generic_visit(node)
    
    def get_class_memvar_map(self):
        return self.class_memvar_map

In [54]:
avisitor = ClassVisitor2(myvisitor.gen_dict)

for pyfile in pyfiles:
    myast = getAST(pyfile)
    avisitor.visit(myast)
    
for aclass, vars in avisitor.get_class_memvar_map().items():
    print(f"{aclass}")
    print(f"\t{vars}")

Call(
  func=Name(id='HTTPHeadersDict', ctx=Load()),
  args=[],
  keywords=[])
Call(
  func=Name(id='Path', ctx=Load()),
  args=[
    Name(id='directory', ctx=Load())],
  keywords=[])
Call(
  func=Name(id='DownloadStatus', ctx=Load()),
  args=[],
  keywords=[
    keyword(
      arg='env',
      value=Name(id='env', ctx=Load()))])
Call(
  func=Name(id='StatusDisplay', ctx=Load()),
  args=[
    Attribute(
      value=Name(id='self', ctx=Load()),
      attr='env',
      ctx=Load())],
  keywords=[])
Call(
  func=Name(id='ProgressDisplay', ctx=Load()),
  args=[
    Attribute(
      value=Name(id='self', ctx=Load()),
      attr='env',
      ctx=Load())],
  keywords=[])
Call(
  func=Name(id='DummyDisplay', ctx=Load()),
  args=[
    Attribute(
      value=Name(id='self', ctx=Load()),
      attr='env',
      ctx=Load())],
  keywords=[])
Call(
  func=Name(id='MetadataLexer', ctx=Load()),
  args=[],
  keywords=[
    keyword(
      arg='precise',
      value=Name(id='precise', ctx=Load()))])
Call(