In [54]:
from typing import *
import libcst as cst
import networkx as nx
import pandas as pd
import numpy as np

In [55]:
from alkh import cst_utils

In [56]:
file_path = 'play.py'

In [84]:
file_lines = open(file_path, 'r').readlines()
file_content = open(file_path, 'r').read()
wrapper = cst.metadata.MetadataWrapper(cst.parse_module(file_content))
scopes = set(wrapper.resolve(cst.metadata.ScopeProvider).values())
ranges = wrapper.resolve(cst.metadata.PositionProvider)
file_number_of_lines = len(file_lines)

In [85]:
class AssignCollector(cst.CSTVisitor):
    def __init__(self, ranges):
        super().__init__()
        self._ranges = ranges
        self._assign_info: List[Tuple] = []

    def get_info(self):
        return self._assign_info

    def visit_Assign(self, node: cst.FunctionDef) -> None:
        pos = self._ranges[node].start
        value_collector = ValueCollector()
        node.value.visit(value_collector)
        value_dict = {'names': value_collector.names, 'ints': value_collector.ints, 'floats': value_collector.floats}
        for target in node.targets:
            target_collector = ValueCollector()
            target.visit(target_collector)
            names_list = target_collector.names
            if names_list:
                self._assign_info.append((target_collector.names, value_dict, pos.line))


class ValueCollector(cst.CSTVisitor):
    def __init__(self):
        super().__init__()
        self.names: List[Union[str, Tuple]] = []
        self.ints: List[str] = []
        self.floats: List[str] = []
        self._attribute_level = 0

    def visit_Name(self, node: cst.FunctionDef) -> None:
        if self._attribute_level == 0:
            self.names.append([node.value])

    def visit_Integer(self, node: cst.FunctionDef) -> None:
        self.ints.append(node.value)

    def visit_Float(self, node: cst.FunctionDef) -> None:
        self.floats.append(node.value)

    def visit_Attribute(self, node: cst.FunctionDef) -> None:
        self._attribute_level += 1

    def leave_Attribute(self, node: cst.FunctionDef) -> None:
        if isinstance(node.value, cst._nodes.expression.Name):
            self.names.append([node.value.value, node.attr.value])
        else:
            self.names[len(self.names) - 1].append(node.attr.value)
        self._attribute_level -= 1

In [87]:
assign_collector = AssignCollector(ranges)
wrapper.visit(assign_collector)
assign_collector.get_info()

[([['jj']], {'names': [], 'ints': ['20'], 'floats': []}, 5),
 ([['k']], {'names': [], 'ints': ['8'], 'floats': []}, 9),
 ([['m']], {'names': [['pd', 'Series']], 'ints': ['20'], 'floats': []}, 10),
 ([['self', 'k'], ['ll']],
  {'names': [['jj']], 'ints': ['9', '10'], 'floats': []},
  13),
 ([['b'], ['mm']],
  {'names': [['self', 'k'], ['self', 'm', 'c'], ['self', 'k']],
   'ints': ['8'],
   'floats': []},
  14),
 ([['a']], {'names': [], 'ints': ['5'], 'floats': []}, 21),
 ([['b']], {'names': [['a']], 'ints': ['7'], 'floats': ['5.0']}, 22),
 ([['ll']], {'names': [['a']], 'ints': [], 'floats': ['6.4']}, 23),
 ([['c']], {'names': [['a'], ['b']], 'ints': ['3'], 'floats': []}, 24),
 ([['d']], {'names': [['b'], ['c']], 'ints': [], 'floats': []}, 25),
 ([['k']], {'names': [['int'], ['d']], 'ints': ['2'], 'floats': []}, 26),
 ([['b']], {'names': [], 'ints': ['8'], 'floats': []}, 32),
 ([['a']], {'names': [], 'ints': ['5'], 'floats': []}, 36),
 ([['b']], {'names': [['a']], 'ints': ['7'], 'floats

# convert collected to directed graph

In [88]:
call_df = pd.DataFrame(assign_collector.get_info(), columns=['assigned', 'data', 'line'])

In [89]:
call_df

Unnamed: 0,assigned,data,line
0,[[jj]],"{'names': [], 'ints': ['20'], 'floats': []}",5
1,[[k]],"{'names': [], 'ints': ['8'], 'floats': []}",9
2,[[m]],"{'names': [['pd', 'Series']], 'ints': ['20'], ...",10
3,"[[self, k], [ll]]","{'names': [['jj']], 'ints': ['9', '10'], 'floa...",13
4,"[[b], [mm]]","{'names': [['self', 'k'], ['self', 'm', 'c'], ...",14
5,[[a]],"{'names': [], 'ints': ['5'], 'floats': []}",21
6,[[b]],"{'names': [['a']], 'ints': ['7'], 'floats': ['...",22
7,[[ll]],"{'names': [['a']], 'ints': [], 'floats': ['6.4']}",23
8,[[c]],"{'names': [['a'], ['b']], 'ints': ['3'], 'floa...",24
9,[[d]],"{'names': [['b'], ['c']], 'ints': [], 'floats'...",25


In [90]:
# call_df.explode('assigned')

In [91]:
def _get_names_from_data(a_dict):
    return a_dict['names']

In [92]:
call_df['assigner'] = call_df['data'].apply(_get_names_from_data)

In [93]:
call_df

Unnamed: 0,assigned,data,line,assigner
0,[[jj]],"{'names': [], 'ints': ['20'], 'floats': []}",5,[]
1,[[k]],"{'names': [], 'ints': ['8'], 'floats': []}",9,[]
2,[[m]],"{'names': [['pd', 'Series']], 'ints': ['20'], ...",10,"[[pd, Series]]"
3,"[[self, k], [ll]]","{'names': [['jj']], 'ints': ['9', '10'], 'floa...",13,[[jj]]
4,"[[b], [mm]]","{'names': [['self', 'k'], ['self', 'm', 'c'], ...",14,"[[self, k], [self, m, c], [self, k]]"
5,[[a]],"{'names': [], 'ints': ['5'], 'floats': []}",21,[]
6,[[b]],"{'names': [['a']], 'ints': ['7'], 'floats': ['...",22,[[a]]
7,[[ll]],"{'names': [['a']], 'ints': [], 'floats': ['6.4']}",23,[[a]]
8,[[c]],"{'names': [['a'], ['b']], 'ints': ['3'], 'floa...",24,"[[a], [b]]"
9,[[d]],"{'names': [['b'], ['c']], 'ints': [], 'floats'...",25,"[[b], [c]]"


In [94]:
def _get_scope_index(line_number, scopes_df):
    query_string = f"start_line_number <= {line_number} and end_line_number >= {line_number}"
    relevant_scoped_df = scopes_df.query(query_string).sort_values("length")
    scope_index = relevant_scoped_df.iloc[0]['scope_index']
    return scope_index

In [95]:
stack_df = pd.read_pickle('dd9d12a0f74047eb89df0ac6de4dd098.pkl')
stack_df[["file_path", "function", "lineno", "locals_names"]]
scopes_df = stack_df.loc[0, 'locals']['scopes_df']

In [96]:
call_df["scope_index"] = call_df["line"].apply(_get_scope_index, args=(scopes_df,))

In [97]:
call_df

Unnamed: 0,assigned,data,line,assigner,scope_index
0,[[jj]],"{'names': [], 'ints': ['20'], 'floats': []}",5,[],0
1,[[k]],"{'names': [], 'ints': ['8'], 'floats': []}",9,[],1
2,[[m]],"{'names': [['pd', 'Series']], 'ints': ['20'], ...",10,"[[pd, Series]]",1
3,"[[self, k], [ll]]","{'names': [['jj']], 'ints': ['9', '10'], 'floa...",13,[[jj]],2
4,"[[b], [mm]]","{'names': [['self', 'k'], ['self', 'm', 'c'], ...",14,"[[self, k], [self, m, c], [self, k]]",2
5,[[a]],"{'names': [], 'ints': ['5'], 'floats': []}",21,[],3
6,[[b]],"{'names': [['a']], 'ints': ['7'], 'floats': ['...",22,[[a]],3
7,[[ll]],"{'names': [['a']], 'ints': [], 'floats': ['6.4']}",23,[[a]],3
8,[[c]],"{'names': [['a'], ['b']], 'ints': ['3'], 'floa...",24,"[[a], [b]]",3
9,[[d]],"{'names': [['b'], ['c']], 'ints': [], 'floats'...",25,"[[b], [c]]",3
