In [1]:
 import ast
 import inspect
 import pandas as pd
    
kwords = {'ast', 'inspect','np'}

In [None]:
# BNF Notation
# program ::= statement
#            |statement program 
# statement =Return(expr)
#      | Assign(expr* targets, expr value)
#      | Expr(expr value)
        
# expr = BinOp(expr left, operator op, expr right)
#          | Call(expr Func, expr* args, keyword* keywords)
#          | Str(string s)
#          | Attribute(expr value, identifier Attr)
#          | Name(identifier id)
#          | List(expr* elts)
#          | Dict(expr* keys, expr* values)
         
# operator ::= Add | Sub 
# Attr ::= DataFrame  
# arguments = (arg* args)
# arg = (identifier arg, expr? annotation)

In [2]:
def test():
#     x = np.zeros(1,1)
#     x = np.array([2,3,1,0])
    d = {'col1': [1, 2], 'col2': [3, 4]}
    df = pd.DataFrame(data=d)
    return df
    
test()
example = ast.parse(inspect.getsource(test))
ast.dump(example)

"Module(body=[FunctionDef(name='test', args=arguments(args=[], vararg=None, kwonlyargs=[], kw_defaults=[], kwarg=None, defaults=[]), body=[Assign(targets=[Name(id='d', ctx=Store())], value=Dict(keys=[Str(s='col1'), Str(s='col2')], values=[List(elts=[Num(n=1), Num(n=2)], ctx=Load()), List(elts=[Num(n=3), Num(n=4)], ctx=Load())])), Assign(targets=[Name(id='df', ctx=Store())], value=Call(func=Attribute(value=Name(id='pd', ctx=Load()), attr='DataFrame', ctx=Load()), args=[], keywords=[keyword(arg='data', value=Name(id='d', ctx=Load()))])), Return(value=Name(id='df', ctx=Load()))], decorator_list=[], returns=None)])"

In [5]:
class conforms(ast.NodeVisitor):
    
    def __init__(self,verbose=False,kwords={'pd','df'}):
        # Any attributes
        self.keywords = {'print'}.union(kwords)
        self.env = {}
        self.verbose = verbose
        
    # Helper
    def save_to_env(self,key,value):
        if key not in self.env:
            self.env[key] = value
        else:
            pass
        
    def visit_Module(self,node): 
        if self.verbose:
            print(type(node))           
        results = [self.visit(stmt) for stmt in node.body]
        if self.verbose:
            print()
            print('Results: ' + str(results))
            print('Environment: ' + str(self.env))
        return all(results)
    
    def visit_FunctionDef(self,node):
        # self.save_to_env(node.name)
        # save the arguments as well
        if self.verbose:
            print(str(type(node)) + ' : ' + node.name + str([arg.arg for arg in node.args.args])) 
        self.save_to_env(node.name,True)
        results = [self.visit(stmt) for stmt in node.body]
        if self.verbose:
            print(results)
        return all(results)
    
    def visit_Return(self,node):
        if self.verbose:
            print(type(node))
        return self.visit(node.value)
    
    def visit_Name(self,node):
        name = node.id
        if self.verbose:
            print(str(type(node)) + ' : ' + name)
        if name in self.keywords or name in self.env:            
            return True
        else:
            return False
    
    def visit_Assign(self,node):
        if self.verbose:
            print(type(node))        
        # Assume there is only 1 target
        self.save_to_env(node.targets[0].id,True)
        self.visit(node.targets[0])
        return self.visit(node.value)
    
    def visit_Call(self,node):
        if self.verbose:
            print(type(node))
        return self.visit(node.func) and all([self.visit(arg) for arg in node.args])

    def visit_Str(self,node):
        if self.verbose:
            print(type(node))
        return True
        
    def visit_Import(self,node):
        if self.verbose:
            print(str(type(node)) + ' : ' + str([alias.asname for alias in node.names]))
        # Assume only one import per line
        self.save_to_env(node.names[0].asname,node.names[0].name)
        return True
    
    def visit_Attribute(self,node):
        # node.value -> 
        # node.attr -> identifier
        if self.verbose:
            print(str(type(node)) + ' : ' + node.attr) 
        if node.attr in ['DataFrame','concat']:
            return True  
        else:
            return False
#         return self.visit(node.value)        
        
        
    def visit_Expr(self,node):
        if self.verbose:
            print(type(node))
        return self.visit(node.value)
    
    def visit_BinOp(self,node):
        if self.verbose:
            print(type(node))        
        # we can also look at the operator if necessary
        return self.visit(node.left) and self.visit(node.right)
    
    def visit_List(self,node):
        if self.verbose:
            print(type(node))        
        return [self.visit(elt) for elt in node.elts]
    

        # print(node)
    
    def visit_Num(self,node):
        if self.verbose:
            print(str(type(node)) + ' : ' + str(node.n))  
        return True
    
    def visit_Dict(self,node):
        if self.verbose:
            print(str(type(node)))
        return all([self.visit(key) for key in node.keys] and [self.visit(value) for value in node.values])
    
#     def visit_Slice(self,node):
#         if self.verbose:
#             print(type(node))
#         return self.visit(node.value)
    
def is_valid(kwords = {}, verbose=False):
    def wrapper(obj):
        node = ast.parse(inspect.getsource(obj))
        print(ast.dump(node))
        conform = conforms(verbose,kwords).visit(node)
        print("The Object " + str(obj) + " has: " + ("Valid Syntax" if conform else "Invalid Syntax"))
    return wrapper

@is_valid(kwords,verbose=True)
def test():
#     x = np.array([2,3,1,0])
    d = {'col1': [1, 2], 'col2': [3, 4]}
    df = pd.DataFrame(data=d)
    return df

Module(body=[FunctionDef(name='test', args=arguments(args=[], vararg=None, kwonlyargs=[], kw_defaults=[], kwarg=None, defaults=[]), body=[Assign(targets=[Name(id='d', ctx=Store())], value=Dict(keys=[Str(s='col1'), Str(s='col2')], values=[List(elts=[Num(n=1), Num(n=2)], ctx=Load()), List(elts=[Num(n=3), Num(n=4)], ctx=Load())])), Assign(targets=[Name(id='df', ctx=Store())], value=Call(func=Attribute(value=Name(id='pd', ctx=Load()), attr='DataFrame', ctx=Load()), args=[], keywords=[keyword(arg='data', value=Name(id='d', ctx=Load()))])), Return(value=Name(id='df', ctx=Load()))], decorator_list=[Call(func=Name(id='is_valid', ctx=Load()), args=[Name(id='kwords', ctx=Load())], keywords=[keyword(arg='verbose', value=NameConstant(value=True))])], returns=None)])
<class '_ast.Module'>
<class '_ast.FunctionDef'> : test[]
<class '_ast.Assign'>
<class '_ast.Name'> : d
<class '_ast.Dict'>
<class '_ast.Str'>
<class '_ast.Str'>
<class '_ast.List'>
<class '_ast.Num'> : 1
<class '_ast.Num'> : 2
<class 

In [50]:
class infer_type(ast.NodeVisitor):
    
    def visit_Module(self,node): 
        global count
        count={}
        total=0
        for s in node.body:
            total=total+self.visit(s) 
        return total
    
    def visit_Expr(self, node):
        return self.visit(node.value)
    
    def visit_FunctionDef(self,node):
        # self.save_to_env(node.name)
        # save the arguments as well
        if self.verbose:
            print(str(type(node)) + ' : ' + node.name + str([arg.arg for arg in node.args.args])) 
        self.save_to_env(node.name,True)
        results = [self.visit(stmt) for stmt in node.body]
        if self.verbose:
            print(results)
        return all(results)
    
    def visit_Return(self,node):
        
        return self.visit(node.value)
    
    def visit_Name(self,node):
        if node.id in count:
            return count[node.id]
#         name = node.id
#         if self.verbose:
#             print(str(type(node)) + ' : ' + name)
#         if name in self.keywords or name in self.env:            
#             return True
#         else:
#             return False
    
    def visit_Assign(self,node):
        if self.verbose:
            print(type(node))        
        # Assume there is only 1 target
        self.save_to_env(node.targets[0].id,True)
        self.visit(node.targets[0])
        return self.visit(node.value)
    
#     def visit_Call(self,node):
#         if self.verbose:
#             print(type(node))
#         return self.visit(node.func) and all([self.visit(arg) for arg in node.args])
#         return (expr func, expr* args, keyword* keywords)

    def visit_Str(self,node):
        if self.verbose:
            print(type(node))
        return True
        
    def visit_Import(self,node):
        if self.verbose:
            print(str(type(node)) + ' : ' + str([alias.asname for alias in node.names]))
        # Assume only one import per line
        self.save_to_env(node.names[0].asname,node.names[0].name)
        return True
    
    def visit_Attribute(self,node):
        # node.value -> 
        # node.attr -> identifier
        if self.verbose:
            print(str(type(node)) + ' : ' + node.attr) 
        if node.attr in ['DataFrame']:
            return True  
        else:
            return False
#         return self.visit(node.value)        
        
        
    def visit_Expr(self,node):
        return self.visit(node.value)
    
    def visit_BinOp(self, node):
        if type(node.op) in [ast.Add, ast.Sub, ast.Mult]:
            if self.visit(node.left) is int and\
               self.visit(node.right) is int:
                return 4
    
    def visit_List(self,node):
        l=0
        for x in node.elts:
            l=l+self.visit(x) 
        return l

        # print(node)
    
    def visit_Num(self,node):
        if type(node.n) is int:
            return 2  
    
    def visit_Dict(self,node):
        if self.verbose:
            print(str(type(node)))
        return all([self.visit(key) for key in node.keys] and [self.visit(value) for value in node.values])
    
#     def visit_Slice(self,node):
#         if self.verbose:
#             print(type(node))
#         return self.visit(node.value)
    

def test():
#     x = np.array([2,3,1,0])
    d = {'col1': [1, 2], 'col2': [3, 4]}
    df = pd.DataFrame(data=d)
    return df

a = ast.parse('1001023'+'102')
infer_type().visit(a)

2