In [None]:
using SemanticModels
using Random
using DataFrames
using IterableTables # need a way to bundle all of the cookbook programs' dependencies and ensure these are all installed
using MetaGraphs
using RandomNumbers
using LightGraphs
using Cassette
using DataFrames


include("../src/parse.jl")
include("../src/cassette.jl")


We want to ingest the main method of a program, and recursively enumerate: (1) the set of subroutines, S = {s | s is called by main} that main calls; (2) the set of variables that main interacts with (as args, or in the form of locally scoped assignment statements), V = {v | v is passed to main, or is a local variable inside of main}; (3) the set of sub-subroutines (e.g., subroutines called by each sub-routine called by main), $S_{sr}$ = {s.sr | s.sr called by s; $s \in S$}; and (4) the set of variables that the set of methods, $S$, interacts with, which may be global (w/respect to main) or local (w/respect to S) in scope, $V_{sr}$ = {s.sr.v | v is passed to an s.sr $\in S_{sr}$, or is a local variable inside of an s.sr $\in S_{sr}$}

We can extract the set $S$ by indexing into the lowered code representation of main, and grepping for all method calls that are prefaced by main:
```
code_info_obj = @code_lowered main(args...)
V = {x | x $\in$ code_info_obj.slotnames}
```

We can extract the set $V$ by accessing the slotnames of the lowered representation of main: 
```
code_info_obj = @code_lowered main(args...)
V = {x | x $\in$ code_info_obj.slotnames}
```

We can index into the resulting dataframes to find $S_{sr}$ and $V_{sr}$.

In [None]:
# These are some toy problems that contain sub-routine calls, (trivial) recursive calls, and/or empty args.
function f(a, b)
    c = a + b
    d = g(c)
    
    return d
end

function g(x)
    res = abs(x)
    res -= 1
    
    while res > 0
        res = g(res)
    end

    return res
end

function h()
    rng = MersenneTwister(1234);
    x = pi*Base.randn(rng,1)
    return x
end


function i(a::Int64, b::Int64)
    
    var_a = a
    var_b = b
    
    c = f(var_a, var_b)
    d = g(c)
    d *= h()
    return d
end

function j(x)
    z = x
    y = 2
    result = i(x,y)
    return result
end

# right now, we can't catch numeric literals. we need variable declarations (otherwise, we have to catch values, and differentiate between literals and non-literals)

In [105]:
# This is an example of the types of output we can get when we call @code_lowered on a function:
code_info_j = @code_lowered i(2,3)
println("Code info: \n", code_info_j, "\n")
println("Code info.code \n", code_info_j.code, "\n")
println("Code info.slotnames: \n", code_info_j.slotnames, "\n")
println("Code info.slotflags: \n", code_info_j.slotflags, "\n")

Code info: 
CodeInfo(
[90m[77G│[1G[39m[90m28 [39m1 ─     var_a = a
[90m[77G│[1G[39m[90m29 [39m│       var_b = b
[90m[77G│[1G[39m[90m31 [39m│       c = (Main.f)(var_a, var_b)
[90m[77G│[1G[39m[90m32 [39m│       d = (Main.g)(c)
[90m[77G│[1G[39m[90m35 [39m└──     return d
)

Code info.code 
Any[:(_4 = _2), :(_5 = _3), :(_6 = (Main.f)(_4, _5)), :(_7 = (Main.g)(_6)), :(return _7)]

Code info.slotnames: 
Any[Symbol("#self#"), :a, :b, :var_a, :var_b, :c, :d]

Code info.slotflags: 
UInt8[0x00, 0x00, 0x00, 0x10, 0x10, 0x10, 0x10]



In [184]:
mutable struct MethodCallsSubroutineFrame{src, dst, dst_args, edge_type}
    src::src
    dst::dst
    dst_args::dst_args
    edge_type::edge_type
end

df = DataFrame(src = Function[], dst = Any[], dst_args = Any[], edge_type=String[])

function get_slotnames(dst_args_match, slotnames)
    
    varnames_to_use = []
                
    for (i,args) in enumerate([dst_args_match])

        args_list = split(string(args), ",")
        
        # Remove all non-numeric characters, and grab the slotname associated with each arg        
        for (i,a) in enumerate(args_list)
            
            a = replace(a, r"[^0-9]+" =>"")
            push!(varnames_to_use, getindex(slotnames, parse(Int64, a)))
        end
    end
                
    return varnames_to_use
end

function get_literal_numeric_vals(numeric_dst_args_match)
    
    varnames_to_use = []
                
    for (i,args) in enumerate([dst_args_match])

        args_list = split(string(args), ",")
        
        # Remove all non-numeric characters, and grab the slotname associated with each arg        
        for (i,a) in enumerate(args_list)
            
            a = replace(a, r"[^0-9]+" =>"")
            push!(varnames_to_use, parse(Float64, a))
        end
    end
                
    return varnames_to_use
end
    

function get_set_S(input_df, f, args...)
    
    println("f: ", f)
    println("args: ", args)
    println("df dims: ", size(input_df))
    
    
    code_info_obj = @code_lowered f(args...)
    println("type of code_info_obj: ", typeof(code_info_obj))


    for (i, line) in enumerate(code_info_obj.code)
        
        # TODO: do we need to handle module names?
        # Any functions called by f (the function passed in) will be prefaced by Main
        m = match(r"Main.*?(?=\))", string(line))
        
        if typeof(m) != Nothing
            
            # Top-level function f calls the callee
            callee = eval(Symbol(split(string(m.match), ".")[2])) 

            slot_dst_args = match(r"\(_\d+.*",string(line))
   
            # If there's no match, the method we've detected doesn't have any non-literal args
            # If the number of matches >= 1, we need to get the slotnames associated with each function's args
            if typeof(slot_dst_args) != Nothing 

                slot_varnames_to_use = get_slotnames(slot_dst_args.match, code_info_obj.slotnames)
                frame = MethodCallsSubroutineFrame(f, callee, slot_varnames_to_use, "method_calls_subroutine")
                
            else
                frame = MethodCallsSubroutineFrame(f, callee, (), "method_calls_subroutine")
            end

            push!(input_df, (frame.src, frame.dst, frame.dst_args, frame.edge_type))
            
        end
            
    end

    
    return input_df  
    
end

function get_set_S(high_level_output_df)
    
    tempdf = DataFrame(src = Any[], dst = Any[], dst_args = Any[], edge_type=String[])
    
    for row in eachrow(high_level_output_df)        
        tempdf = get_set_S(tempdf, row.dst, row.dst_args...)
    end
    
    outdf = append!(high_level_output_df, tempdf)

    return outdf

end

function run_get_S(input_func, args...)
    init_df = DataFrame(src = Function[], dst = Any[], dst_args = Any[], edge_type=String[])
    high_level_call_graph = get_set_S(init_df, input_func, args...)
    println("High-level function calls subroutine graph created \n")
    println(high_level_call_graph, "\n")
    println("Creating second-order subroutines call subroutines graph \n")
    second_order_call_graph = get_set_S(high_level_call_graph)
    return second_order_call_graph
end
    
run_get_S(f, 2, 4)



f: f
args: (2, 4)
df dims: (0, 4)
type of code_info_obj: Core.CodeInfo
High-level function calls subroutine graph created 

1×4 DataFrame
│ Row │ src      │ dst │ dst_args │ edge_type               │
│     │ [90mFunction[39m │ [90mAny[39m │ [90mAny[39m      │ [90mString[39m                  │
├─────┼──────────┼─────┼──────────┼─────────────────────────┤
│ 1   │ f        │ g   │ [:c]     │ method_calls_subroutine │

Creating second-order subroutines call subroutines graph 

f: g
args: (:c,)
df dims: (0, 4)
type of code_info_obj: Core.CodeInfo


Unnamed: 0_level_0,src,dst,dst_args,edge_type
Unnamed: 0_level_1,Function,Any,Any,String
1,f,g,[:c],method_calls_subroutine
2,g,abs,[:x],method_calls_subroutine
3,g,g,[:res],method_calls_subroutine


In [186]:
run_get_S(g, 2)

f: g
args: (2,)
df dims: (0, 4)
type of code_info_obj: Core.CodeInfo
High-level function calls subroutine graph created 

2×4 DataFrame
│ Row │ src      │ dst │ dst_args │ edge_type               │
│     │ [90mFunction[39m │ [90mAny[39m │ [90mAny[39m      │ [90mString[39m                  │
├─────┼──────────┼─────┼──────────┼─────────────────────────┤
│ 1   │ g        │ abs │ [:x]     │ method_calls_subroutine │
│ 2   │ g        │ g   │ [:res]   │ method_calls_subroutine │

Creating second-order subroutines call subroutines graph 

f: abs
args: (:x,)
df dims: (0, 4)
type of code_info_obj: Array{Union{Nothing, CodeInfo},1}


ErrorException: type Array has no field code

In [187]:
run_get_S(h)

f: h
args: ()
df dims: (0, 4)
type of code_info_obj: Core.CodeInfo
High-level function calls subroutine graph created 

1×4 DataFrame
│ Row │ src      │ dst             │ dst_args │ edge_type               │
│     │ [90mFunction[39m │ [90mAny[39m             │ [90mAny[39m      │ [90mString[39m                  │
├─────┼──────────┼─────────────────┼──────────┼─────────────────────────┤
│ 1   │ h        │ MersenneTwister │ ()       │ method_calls_subroutine │

Creating second-order subroutines call subroutines graph 

f: MersenneTwister
args: ()
df dims: (0, 4)
type of code_info_obj: Core.CodeInfo


Unnamed: 0_level_0,src,dst,dst_args,edge_type
Unnamed: 0_level_1,Function,Any,Any,String
1,h,MersenneTwister,(),method_calls_subroutine


In [188]:
run_get_S(i, 5, 4)

f: i
args: (5, 4)
df dims: (0, 4)
type of code_info_obj: Core.CodeInfo
High-level function calls subroutine graph created 

2×4 DataFrame
│ Row │ src      │ dst │ dst_args         │ edge_type               │
│     │ [90mFunction[39m │ [90mAny[39m │ [90mAny[39m              │ [90mString[39m                  │
├─────┼──────────┼─────┼──────────────────┼─────────────────────────┤
│ 1   │ i        │ f   │ [:var_a, :var_b] │ method_calls_subroutine │
│ 2   │ i        │ g   │ [:c]             │ method_calls_subroutine │

Creating second-order subroutines call subroutines graph 

f: f
args: (:var_a, :var_b)
df dims: (0, 4)
type of code_info_obj: Core.CodeInfo
f: g
args: (:c,)
df dims: (1, 4)
type of code_info_obj: Core.CodeInfo


Unnamed: 0_level_0,src,dst,dst_args,edge_type
Unnamed: 0_level_1,Function,Any,Any,String
1,i,f,"[:var_a, :var_b]",method_calls_subroutine
2,i,g,[:c],method_calls_subroutine
3,f,g,[:c],method_calls_subroutine
4,g,abs,[:x],method_calls_subroutine
5,g,g,[:res],method_calls_subroutine


In [189]:
run_get_S(j,2)

f: j
args: (2,)
df dims: (0, 4)
type of code_info_obj: Core.CodeInfo
High-level function calls subroutine graph created 

1×4 DataFrame
│ Row │ src      │ dst │ dst_args │ edge_type               │
│     │ [90mFunction[39m │ [90mAny[39m │ [90mAny[39m      │ [90mString[39m                  │
├─────┼──────────┼─────┼──────────┼─────────────────────────┤
│ 1   │ j        │ i   │ [:x, :y] │ method_calls_subroutine │

Creating second-order subroutines call subroutines graph 

f: i
args: (:x, :y)
df dims: (0, 4)
type of code_info_obj: Array{Union{Nothing, CodeInfo},1}


ErrorException: type Array has no field code

In [153]:
# In progress
mutable struct MethodInteractWithVarFrame{src, dst, dst_args, edge_type}
    src::src
    dst::dst
    dst_args::dst_args
    edge_type::edge_type
end

df = DataFrame(src = Function[], dst = Symbol[], dst_args = Expr[], edge_type=String[])


function get_set_V(f, args...)
    
    code_info_obj = @code_lowered f(args...)
    
    # the first element of the slotnames array is Symbol("#self#")
    f_vars = code_info_obj.slotnames[2:length(code_info_obj.slotnames)]
    
    for v in f_vars
        frame = MethodInteractWithVarFrame(f, v, Expr(:nothing), "method_interacts_w_variable" )            
        push!(df, (frame.src, frame.dst, frame.dst_args, frame.edge_type))
        
    end
   
    return df
end



out = get_set_V(i, 2,3)

Unnamed: 0_level_0,src,dst,dst_args,edge_type
Unnamed: 0_level_1,Function,Symbol,Expr,String
1,i,a,:($(Expr(:nothing))),method_interacts_w_variable
2,i,b,:($(Expr(:nothing))),method_interacts_w_variable
3,i,var_a,:($(Expr(:nothing))),method_interacts_w_variable
4,i,var_b,:($(Expr(:nothing))),method_interacts_w_variable
5,i,c,:($(Expr(:nothing))),method_interacts_w_variable
6,i,d,:($(Expr(:nothing))),method_interacts_w_variable
