This notebook will contain some scratch work on searching the ASTs of some julia code looking for reasoning about types. I'll probably also have some plots in here.

# Setup

In [None]:
addprocs(6)

In [None]:
push!(LOAD_PATH, pwd());

In [None]:
using C
using ASTp
using DataStructures

In [None]:
reload("C")
reload("ASTp")

In [None]:
test_dir = "test_modules/"
eco_dir = "/Users/isaac/Documents/julia_repos/"
base_dir = "/Users/isaac/github/julia/Base/"
test_files = C.search_dirs(test_dir)
eco_files = C.search_dirs(eco_dir)
base_files = C.search_dirs(base_dir);

# Examples

### methods using methods that use Types

In [None]:
@time begin
    @everywhere type_mt = methodswith(Type, Base);
    @everywhere type_funcs = unique(map(x->x.func.name::Symbol, type_mt));
end

In [None]:
# Getting signatures for methods
# To interact with the signatures, use `sig.parameters`
@everywhere tmethod_info = let d = DataStructures.OrderedDict{Symbol,Dict}([t=>Dict{Any,Any}("sigs" => []) for t in type_funcs]...)
    for m in type_mt
        push!(d[m.func.name]["sigs"], m.sig)
    end
    for v in values(d)
        v["siglengths"] = unique(Int[length(s.parameters) for s in v["sigs"]])
    end
    d
end
    

In [None]:
# parse_ast(test_files, 
# )
@everywhere function process_file(path::AbstractString, s::C.Selector)
#     println(path)
    ast = C.parse_file(path)
    exprs_list = try 
        C.parse_ast(ast, s)
    catch x
        println(path)
        println(x)
        return [x]
    end
    return exprs_list
#   count_fields(exprs_list)
end

In [None]:
# # @everywhere function gettypefunctions(files, degree::Int)
# @everywhere function gettypefunctions(degree::Int, comparison=(==))
#         C.Selector(Any[
#             ASTp.isfunction, # Only look at functions
#             [
#                 ASTp.functionbody, # Inside their guts
#                 y->C.parse_ast(y,C.Selector([ # Count the number of functions they use which can be dispatched on Types
#                     x->isa(x,Expr),
#                     x->ASTp.iscalling(x,type_funcs)
#                     ])),
# #                 x->length(x)>degree # Filter by count of functions
#             x->comparison(length(x),degree)
#             ]
#         ]
#     )#)
# end

In [None]:
# @everywhere function gettypefunctions(files, degree::Int)
@everywhere function gettypefunctions(degree::Int, comparison=(==))
        C.Selector(Any[
            ASTp.isfunction,# Only look at functions
        x->!ASTp.isanon(x), # Don't want anonymous functions. Only declared ones
            [
                ASTp.functionbody, # Inside their guts
                y->C.parse_ast(y,C.Selector([ # Count the number of functions they use which can be dispatched on Types
                    x->isa(x,Expr),
                    x->ASTp.iscalling(x,type_funcs),
                    x->length(x.args) in  tmethod_info[x.args[1]]["siglengths"] # Signatures should at least be of right length
                    ])),
#                 x->length(x)>degree # Filter by count of functions
            x->comparison(length(x),degree)
            ]
        ]
    )#)
end

In [None]:
@everywhere function get_counts(file)
    data = map(1:10) do number
        f = gettypefunctions(number)
        process_file(file, f)
    end
#     println(typeof(data))
#     println(typeof(gettypefunctions(10, >)))
#     append!(data, gettypefunctions(gettypefunctions(10, >)))
    return data
end




### I want to figure out input -> output types for some functions

* So I can use built in stuff.
* However, I'll need to know how to call things, so I'll want a mapping of functions to the module they were defined in.
* So I'll also need to be able to work with nested modules.

`module_tree` has been created


#### Issues

* `<|` is exported, but I can't find any definition being assigned to it

### return type inference on a method

In [None]:
fieldsntypes(f) = map(x->(x, fieldtype(typeof(f), x)), fieldnames(f))

In [None]:
fieldsntypes(temp2)

In [None]:
"""Trys to infer which types a method could return."""
function return_types(m::Method)
    linfo = m.func
    atypes = Base.to_tuple_type(m.sig)
    if !isa(m.tvars, SimpleVector)
        sparams = Base.svec(m.tvars)
    else
        sparams = m.tvars
    end
    @assert isa(sparams, SimpleVector)
    # I think I need to get m.tvars into a SimpleVector
    (_li, ty) = Core.Inference.typeinf(linfo, atypes, sparams)
    println(_li)
    ty
end
return_types(type_mt[end-1])

In [None]:
"""
Counts how many time a method calls it's function.
"""
function same_function(x)
    name = functionname(x)
    body = functionbody(x)
    parse_ast(body, Selector([isexpr, iscalling(name)]))
end

# Filtering raw data

I pick up some weird stuff in the raw data. Here I'll be establishing what I'm clearing out.

In [None]:
"""Function to be applied to each element of list of function expressions to see if they could be filtered."""
function filter_expr(expr::Expr)
    if isa(expr.args[1], Bool) # Some weird stuff in Markdown with `@breaking` macro
        return false
    end
    return true
end 
filter_expr(not_expr::Any) = false

# Take 1

Getting data out and playing around with it. Here I tried to go by how much type algebra each function had

In [None]:
raw = pmap(get_counts, base_files);


In [None]:
tenplus = pmap(x->process_file(x, gettypefunctions(10, >)),base_files);

In [None]:
for i in 1:length(tenplus)

    push!(raw[i], tenplus[i])
end 

In [None]:
serialize(open("data/type_func_calls.jld", "w"), raw)

In [None]:
raw = deserialize(open("data/type_func_calls.jld"));

In [None]:
function to2d(raw)
    temp = deepcopy(raw)
    for i in 1:length(temp)
        for j in 1:length(temp[i])
            if length(temp[i][j]) == 1 && all(x->isa(x, Exception), temp[i][j])
#                 println(i," ", j)
                temp[i][j] = Any[]
            end
        end
    end
    return reduce(hcat, temp) # plot that with names
end
raw2d = to2d(raw)
t_counts = map(length, raw2d)
# us
# @as _ raw2d[11,:] filter(x->length(x)>0, _) reduce(append!, Expr[],_) join(_,"\n") clipboard(_)

## Collection and playtime with data

In [None]:
using Lazy
# collecting data to look at
huge = counts[11,:].>0
@as _ raw2d[11,:] filter(x->length(x)>0, _) reduce(append!, Expr[],_) join(_,"\n") clipboard(_) 
# Finding files with functions which do a lot of reasoning
base_files[counts[11,:] .> 0] 
# Finding files who do a lot with types
let temp = mapslices(x->sum(x .* [1:11]), counts, 1)
    temp = vcat(reshape(base_files, 1, length(base_files)), temp)
    sortcols(temp, by=y->y[2], rev=true)
end

In [None]:
t_counts = let temp = pmap(x->process_file(x, gettypefunctions(10, >)),base_files)
    map!(length, temp)
    vcat(t_counts, reshape(temp, 1, length(temp)))
end

In [None]:
per_file = let temp = mapslices(x->sum(x .* [1:11;]), t_counts, 1)
    temp = vcat(reshape(base_files, 1, length(base_files)), temp)
    temp= sortcols(temp, by=y->y[2])
    rotl90(temp)
end

In [None]:
open("data/tycounts.jld","w") do f
    serialize(f, counts)
end
counts == deserialize(open("data/tycounts.jld"))

In [None]:
rmprocs(workers())

In [None]:
df = DataFrame()
df[:file] = base_files
map(1:11) do count_num
    df[symbol(string("count_",count_num))] = t_counts[count_num,:] #temp3[count_num, :]
end;

In [None]:
countcols = [symbol(string("count_",i)) for i in 1:10]
df2 = stack(df, countcols, :file)
df2[:count] = map(x->parse(Int, string(x)[7:end]), df2[:variable])
df2 = df2[[:file,:count,:value]]


In [None]:
# serialize(open("data/typerdf.jld", "w"), df2)
writetable("data/typerdf.dat", df2)

# Take 2: Finding a better way to organize my data

Here I am going to try and keep the data around. Basically I want to have a format where it's easy for me to interogate the data

Right now a dataframe in long format is looking appealing, as I can just groupby

In [None]:
raw = pmap(base_files) do file
    process_file(file, gettypefunctions(1, >))
end;
raw = pmap(x->filter(filter_expr, x), raw)

In [None]:
rmprocs(workers())

In [None]:
using DataFrames
import DataFrames.head
using JLD

In [None]:
df = DataFrame()
df[:file] = convert(Array{ASCIIString}, base_files)
df[:exprs] = raw;
sort!(df, cols = order(:exprs, by=length), rev=true);
df[:file] = map(x->x[32:end], df[:file].data);


In [None]:
collect_calls(x) = map(field([:args, 1]), parse_ast(x, Selector([isexpr, iscall])))
"""Literally copy pasted from stack overflow, might sort a counter? select says it will partially sort."""
most_common(c::Accumulator)    = most_common(c, length(c))
most_common(c::Accumulator, k) = select!(collect(c), 1:k, by=kv->kv[2], rev=true)

## Expanding DataFrame by expression

* Each expression gets it's own row
* **Names** of functions defined by expression are also extracted

In [None]:
"""Extract function names from expressions"""
exprnames(exprs) = map(x->try ASTp.functionname(x) catch nothing end, exprs) # Definitions with strange syntax.

In [None]:
# Expanding dataframe by expression
exdf = by(df, :file) do df
    n = length(df[1, :exprs])
    f = df[1, :file]
    DataFrame(expr=df[1, :exprs])
end
# Filtering out bounds errors from earlier
exdf = exdf[![isa(x,BoundsError) for x in exdf[:expr]],:]
exdf[:expr] = convert(Array{Expr,1}, exdf[:expr].data) # Now we have a single type.;
exdf[:name] = exprnames(exdf[:expr]); # Has errors, probably shouldn't index on this.;

In [None]:
# Getting most common functions for the files in the expressions.
let tempdf = deepcopy(exdf)
    tempdf[:functions] = map(collect_calls, tempdf[:expr])
    by(tempdf, :file) do subdf
        f = subdf[1, :file]
        fs = vcat(subdf[:functions]...)
        temp = filter(x->x in type_funcs, fs) |> counter |> most_common#x->x.map |> collect
#         DataFrame(counts = counter() |> most_common)
        DataFrame(subdf[:])
    end
end

In [None]:
# Most common over all
let temp = map(collect_calls, exdf[:expr])
    temp = vcat(temp...)
    filter(x->x in type_funcs, temp) |> counter |> most_common |> Base.showarray
end

In [None]:
let tempdf = deepcopy(exdf)
    tempdf[:method_calls] = map(collect_calls, exdf[:expr])
    by(exdf, :file) do subdf
        counter(vcat(subdf[:method_calls]...))
        

In [None]:
callsdf = by(exdf, :expr) do subdf
    name = subdf[1,:name]
    file = subdf[1, :file]
    expr = subdf[1,:expr]
    sig = ASTp.functionhead(expr)
    calls = collect_calls(subdf[1,:expr]) |> counter |> most_common
    call_name = map(x->x[1], calls)
    call_count = map(x->x[2], calls)
    idxs = findin(call_name, type_funcs)
    call_name = call_name[idxs]
    call_count = call_count[idxs]
    n = length(call_name)
    DataFrame(file=fill(file,n) , name=fill(name,n), sig=fill(sig,n), called=call_name, count=call_count)
end;

In [None]:
temp = by(callsdf, :expr) do subdf
    # TODO some values give multiple expressions
    name = subdf[1,:name]
    file = subdf[1,:file]
    sig = subdf[1,:sig]
    DataFrame(file=file, name=name, sig=sig, count=sum(subdf[:count]))
end |> x->sort(x, cols=:count, rev=true)

In [None]:
using RCall

In [None]:
temp[:expr] = map(string, temp[:expr])
temp[:name] = map(string, temp[:name])
temp[:sig] = map(string, temp[:sig]);

In [None]:
g = globalEnv
reval(rparse("dfls <- NULL"))

In [None]:
rrun(x::AbstractString) = reval(rparse(x))

In [None]:
rrun("dfls <- NULL")

In [None]:
for cnm in DataFrames._names(temp)
    g[:colcnm] = RCall.sexp(convert(Array, temp[cnm]))
    reval(rparse("dfls\$$cnm <- colcnm")) # Make a R list
end

In [None]:
rrun("df <- data.frame(dfls)");

In [None]:
rrun("library(WriteXLS)")

In [None]:
rrun("""WriteXLS(df, "test.xls")""")

In [None]:
rrun("save(file='dfjulia.RData', df)")

# Writing out data

In [None]:
jldopen("data/plotting/df.jld", "w") do f
    write(f, "df", temp[[:file,:name,:sig,:called,:count]])
end

# TODO

* Make a taxonomy of julia asts that will reason about types
    * With identifying methods
        * These could then be nested to find more precise things
* Read that Julia paper.


In [None]:
function methodsreturning(t::Type, f::Function, showparents::Bool=false, meths = Method[])
    mt = typeof(f).name.mt
    d = mt.defs
    while d !== nothing
        if (x -> (Base.type_close_enough(x, t) ||
                     (showparents ? (t <: x && (!isa(x,TypeVar) || x.ub != Any)) :
                      (isa(x,TypeVar) && x.ub != Any && t == x.ub)) &&
                     x != Any && x != ANY))(d.func.rettype)
            push!(meths, d)
        end
        d = d.next
    end
    return meths
end

function methodsreturning(t::Type, m::Module, showparents::Bool=false)
    meths = Method[]
    for nm in names(m)
        if isdefined(m, nm)
            f = getfield(m, nm)
            if isa(f, Function)
                methodsreturning(t, f, showparents, meths)
            end
        end
    end
    return unique(meths)
end

function methodsreturning(t::Type, showparents::Bool=false)
    meths = Method[]
    mainmod = current_module()
    # find modules in Main
    for nm in names(mainmod)
        if isdefined(mainmod,nm)
            mod = getfield(mainmod, nm)
            if isa(mod, Module)
                append!(meths, methodsreturning(t, mod, showparents))
            end
        end
    end
    return unique(meths)
end

In [None]:
methodsreturning(Int, foo)

In [None]:
foo(x::Int) = x::Int
foo(1)

In [None]:

let d = typeof(foo).name.mt.defs
    t = Int
    meths = []
    showparents = true
    while d !== nothing
        push!(meths, d.func.rettype) # I could probably just dump this and check it with a regular expression.
        
#         if (x -> (Base.type_close_enough(x, t) ||
#                     (showparents ? (t <: x && (!isa(x,TypeVar) || x.ub != Any)) :
#                       (isa(x,TypeVar) && x.ub != Any && t == x.ub)) &&
#                      x != Any && x != ANY))(d.func.rettype)
#             push!(meths, d)
#         end
        d = d.next
    end
    println(meths)
end

In [None]:
methodsreturning(Type, rem) 

In [None]:
@time methodswith(Function);

In [None]:
@time methodswith2(Type)