We need to create a function which can be passed some python 3 code and returns a dictionary containing all of the functions, expressions, and modules used by the python code.

Documentation: https://greentreesnakes.readthedocs.io/en/latest/nodes.html

###List all statements used
`    ['for', 'while', 'if','else','elif']`
###List all functions used
`    ['.reverse', 'reverse', '.sort', 'sorted', 'len']`

###List all modules imported
`    ['Math']`
    
###List all expressions used
`    ['+','-','*','=','==']`



In [None]:
# Example parser
import ast
def code_features(code):
    result = {
        "statements":{},
        "functions":{},
        "imports":{},
        "expressions":{}
    }
    return result


In [None]:
# Collect all statements
def getAllStat(tree):
  stat = {}
  for node in ast.walk(tree):
    if isinstance(node, ast.While):
      stat["While"] = True
    elif isinstance(node, ast.For):
      stat["For"] = True
    elif isinstance(node, ast.Return):
      stat["Return"] = True
    elif isinstance(node, ast.If):
      stat["If"] = True
    #else:
    #  print(node)

  return stat

In [None]:
# Alternate approach for functions.

'''
Get all function calls from a python file
The MIT License (MIT)
Copyright (c) 2016 Suhas S G <jargnar@gmail.com>
'''
import ast
from collections import deque


class FuncCallVisitor(ast.NodeVisitor):
    def __init__(self):
        self._name = deque()

    @property
    def name(self):
        #print(self._name)
        return '.'.join(self._name)


    @name.deleter
    def name(self):
        self._name.clear()

    # Updating to only show obj for ids
    def visit_Name(self, node):
        self._name.appendleft(node.id)


    def visit_Attribute(self, node):
        try:
            self._name.appendleft(node.attr)
            # hacking for demonstration list of functions
            #self._name.appendleft(node.value.id)
            self._name.appendleft("")

            #print(node.value.id)
        except AttributeError:
            self.generic_visit(node)


def get_func_calls(tree):
    func_calls = []
    for node in ast.walk(tree):
        if isinstance(node, ast.Call):
            callvisitor = FuncCallVisitor()
            callvisitor.visit(node.func)
            func_calls.append(callvisitor.name)
    result = {}
    for item in func_calls:
      result[item] = True
    #return sorted(list(set(func_calls)))
    return result


In [None]:
# Collect all imports
def getAllImports(a):
	"""Gather all imported module names"""
	if not isinstance(a, ast.AST):
		return set()
	imports = set()
	for child in ast.walk(a):
		if type(child) == ast.Import:
			for alias in child.names:
					imports.add(alias.asname if alias.asname != None else alias.name)
		elif type(child) == ast.ImportFrom:
				for alias in child.names: # these are all functions
						imports.add(alias.asname if alias.asname != None else alias.name)


	result = {}
	for item in imports:
	  result[item] = True
	return result

	#return sorted(list(imports))

In [None]:
# Collect all expressions
def getAllExpr(tree):
  expr = []
  for node in ast.walk(tree):
    if isinstance(node, ast.Add):
      expr.append("+")
    if isinstance(node, ast.Eq):
      expr.append("==")
    if isinstance(node, ast.Sub):
      expr.append("-")
    if isinstance(node, ast.Mult):
      expr.append("*")
    if isinstance(node, ast.Div):
      expr.append("/")
    if isinstance(node, ast.Lt):
      expr.append("<")
    if isinstance(node, ast.Gt):
      expr.append(">")

  result = {}
  for item in expr:
    result[item] = True
  return result

  #return sorted(list(set(expr)))

In [None]:
def code_features(src):
  tree = ast.parse(src)
  result = {
        "statements":{},
        "functions":{},
        "imports":{},
        "expressions":{}
    }
  result["imports"] = getAllImports(tree)
  result["expressions"] =getAllExpr(tree)
  result["functions"] = get_func_calls(tree)
  result["statements"] = getAllStat(tree)
  return result

In [None]:
# Here are some code examples to practice parsing with.
import ast

src1 = '''
def f1(l):
    l.reverse()
    if(true):
      return l
    return l
'''
src2 = '''
def f1(l):
    l.reverse()
    reverse(l)
    l.sort()
    sorted(l)
    len(l)
    for x in l:
        pass
    return(l)
'''
src3 = '''
import Math
def f1(l):
    x = 0
    while x < 5:
        x += 1
    return(l)

'''

example_solutions = {
    "a": src1,
    "b": src2,
    "c": src3
}


In [None]:
for key, src in example_solutions.items():
  results = code_features(src)
  print(key,results)

a {'statements': {'If': True, 'Return': True}, 'functions': {'.reverse': True}, 'imports': {}, 'expressions': {}}
b {'statements': {'For': True, 'Return': True}, 'functions': {'.reverse': True, 'reverse': True, '.sort': True, 'sorted': True, 'len': True}, 'imports': {}, 'expressions': {}}
c {'statements': {'While': True, 'Return': True}, 'functions': {}, 'imports': {'Math': True}, 'expressions': {'<': True, '+': True}}


In [None]:
# We have provided some example solution data hosted on Amazon S3
"""
{ problemKey1: {
     solutionKey1: {},
     solutionKey2: {}
  },
  problemKey2: {
     userKey1: {},
     userKey2: {}
  }
}

"""

url = "https://s3-ap-southeast-1.amazonaws.com/alset-public/example_solutions.json"
import urllib.request
import json
req = urllib.request.Request(url)

##parsing response
r = urllib.request.urlopen(req).read()
student_solutions = json.loads(r.decode('utf-8'))

for problemKey in student_solutions.keys():
  #print(problemKey)
  for userKey in student_solutions[problemKey].keys():
    #print("   ",userKey)
    #print(student_solutions[problemKey][userKey])
    pass


In [None]:
#Try listing funcitons from the student_solutions
for problemKey in student_solutions.keys():
  print("problem",problemKey)
  for userKey in student_solutions[problemKey].keys():
    #print(student_solutions[problemKey][userKey])
    src = student_solutions[problemKey][userKey]
    #function_calls = get_func_calls(ast.parse(src))
    print("---------------------")
    print(src)
    for key, value in code_features(src).items():
      print(key, value)



problem -LDkAuy-IFXOJoBILgL6
---------------------
# def multiply
def multiply(x, y):
    return x*y


statements {'Return': True}
functions {}
imports {}
expressions {'*': True}
---------------------
# def multiply
def multiply(x,y):
    return x*y
    


statements {'Return': True}
functions {}
imports {}
expressions {'*': True}
---------------------
def multiply(x,y):
    return x*y



statements {'Return': True}
functions {}
imports {}
expressions {'*': True}
---------------------
# def multiply
def multiply(x,y):
    return x*y
    


statements {'Return': True}
functions {}
imports {}
expressions {'*': True}
---------------------
# def multiply
# def multiply
def multiply(x,y):
  return x*y


statements {'Return': True}
functions {}
imports {}
expressions {'*': True}
---------------------
def multiply(a,b):
    return a*b


statements {'Return': True}
functions {}
imports {}
expressions {'*': True}
---------------------
# def multiply
def multiply(x,y): return x*y


statements {'

In [None]:
# Unit testing for code_features() function.

import unittest

src1 = '''
def f1(l):
    import datetime
    l.reverse()
    if(true):
      return l
    return l
'''



src2 = '''
def count_evens(i):
    return len([x for x in i if x % 2 == 0])

'''

class TestCodeFeatures(unittest.TestCase):

    def setUp(self):
        pass

    def test_default(self, src=""):
        code_features(src)

    def test_statements(self, src=src1):
        result = code_features(src)
        self.assertEqual(code_features(src)["statements"], {'If':True,'Return':True})

    def test_functions(self, src=src1):
        self.assertEqual(code_features(src)["functions"], {'.reverse':True})

    def test_imports(self, src=src1):
        self.assertEqual(code_features(src)["imports"], {'datetime':True})

    def test_len(self, src=src2):
        result = code_features(src)
        self.assertEqual(result["functions"], {'len':True})

    def test_more_expressions(self, src=src2):
        result = code_features(src)
        self.assertEqual(result["expressions"], {'==':True})

    def test_more_statements(self, src=src2):
        result = code_features(src)
        self.assertEqual(result["statements"], {'Return':True})
        #Todo - This is what should be returned. Missing the list expression content.
        #self.assertEqual(result["statements"], {'Return':True,'For':True, 'In':True, 'If':True})


# Simple runner
suite = unittest.defaultTestLoader.loadTestsFromTestCase(TestCodeFeatures)
unittest.TextTestRunner().run(suite)

.......
----------------------------------------------------------------------
Ran 7 tests in 0.018s

OK


<unittest.runner.TextTestResult run=7 errors=0 failures=0>

In [None]:
# And finally, we want to send back a dictionary of aggregate results rather than just the analysis of each solution.

def solution_features(solutions):

  """
  problemSkills -> ProblemKey -> featureType -> feature -> userKey -> True
  userSkills       -> UserKey -> featureType -> feature -> problemKey -> True

  """

  problemSkills = {}
  userSkills = {}

  for problemKey in solutions.keys():
    for userKey in solutions[problemKey]:
      src = solutions[problemKey][userKey]
      analysis = code_features(src)
      #print(src)
      #print(analysis)
      for featureType in analysis:
        #print(problemKey, userKey, featureType, analysis[featureType])
        for feature in analysis[featureType]:
          #Add the analysis to the problemSkills dictionary.
          if not problemKey in problemSkills:
            problemSkills[problemKey] = {}
          if not featureType in problemSkills[problemKey]:
            problemSkills[problemKey][featureType] = {}
          if not feature in problemSkills[problemKey][featureType]:
            problemSkills[problemKey][featureType][feature] = {}

          # Add userKey true to feature dictionary.
          problemSkills[problemKey][featureType][feature][userKey] = True

          # Add the same data to userSkills
          if not userKey in userSkills:
            userSkills[userKey] = {}
          if not featureType in userSkills[userKey]:
            userSkills[userKey][featureType] = {}
          if not feature in userSkills[userKey][featureType]:
            userSkills[userKey][featureType][feature] = {}

          # Add problemKey true to feature dictionary.
          userSkills[userKey][featureType][feature][problemKey] = True

  return {"problemSkills":problemSkills, "userSkills": userSkills}


final_results = solution_features(student_solutions)

for problemKey in final_results["problemSkills"]:
  print("----- All features used by all users to solve problem", problemKey, "-----")
  for featureType,features in final_results["problemSkills"][problemKey].items():
    for feature, userKeys in features.items():
      print(featureType, feature, len(userKeys))


for userKey in final_results["userSkills"]:
  print("----- All features used by user ",userKey,"to solve all problems. -----")
  for featureType,features in final_results["userSkills"][userKey].items():
    for feature, problemKeys in features.items():
      print(featureType, feature, len(problemKeys))





----- All features used by all users to solve problem -LDkAuy-IFXOJoBILgL6 -----
statements Return 24
expressions * 24
----- All features used by all users to solve problem -LDkAxmbeJ9r3DcfU0ZW -----
statements If 18
statements Return 18
expressions > 16
expressions == 10
expressions < 9
expressions * 1
----- All features used by all users to solve problem -LDkB1-McVTufbnKUw-8 -----
statements For 11
statements Return 16
statements If 11
expressions == 15
expressions + 11
expressions > 1
expressions < 1
functions len 5
functions sum 2
functions list 1
functions filter 1
----- All features used by all users to solve problem -LDkB3z2Pp9wRf-zCxBk -----
statements Return 15
statements For 9
statements If 1
functions sum 6
functions len 1
functions int 1
expressions + 9
expressions == 1
----- All features used by all users to solve problem -LDkB7FprNZsNyK67zFC -----
statements For 12
statements Return 16
statements If 12
functions type 8
functions len 1
functions isinstance 8
functions sum 

In [None]:
# Create an AWS Lambda function to return the results of solution_features()
# when posted a dictionary of user solution.




# Create a chart of the features by percentage used across all available examples.
# Find the difference between two aggregated lists.
