In [30]:
import numpy as np

In [31]:
def generateCodeWithComplexExpressions(num_expr_terms, max_num_statements):
    CONDITION_EXPRESSIONS = [' > 0', ' < 0', ' != 0', ' is None', '.attr == 0']
    CONDITION_OPERATIONS = ['and', 'or']
    EXCEPTIONS = [' ValueError', ' IndexError', ' AttributeError']
    CONTROL_COMMANDS_TYPES = ('for', 'while', 'if', 'try', 'assignment')
    CLOSING_COMMANDS_IMPLYING_BLOCKS = ('else', 'except')
    ASSIGNMENT_OPERATIONS = ['+', '-', '*', '/', '**']
    SYMBOLS = 'abcdefghijklmnoprstuvwxyz'
    
    def generateSymbol():
        return np.random.choice(list(SYMBOLS)) + str(np.random.randint(100))
    
    def generateForStatement():
        return 'for '
    def generateIfStatement():
        return 'if '
    def generateElseStatement():
        return 'else:\n'
    def generateWhileStatement():
        return 'while '
    def generateTryStatement():
        return 'try:\n'
    def generateExceptStatement():
        return ('except' + np.random.choice(EXCEPTIONS) + ':\n')
    def generateAssignmentStatement():
        return generateSymbol() + ' = '
    
    def generateOperation(expression_type):
        if expression_type == 'condition':
            return np.random.choice(CONDITION_OPERATIONS)
        elif expression_type == 'assignment':
            return np.random.choice(ASSIGNMENT_OPERATIONS)
    def generateCondition():
        return np.random.choice(CONDITION_EXPRESSIONS)
    
    def generateExpression(statement_type, num_expr_terms):
        def generateTerm(expression_type):
            if expression_type == 'condition':
                return ('(' + generateSymbol() + 
                        generateCondition() + ')')
            elif expression_type == 'assignment':
                return generateSymbol()

        if statement_type == 'try':
            return ''
        if statement_type == 'for':
            return generateSymbol() + ' in range(5):\n'
        
        if statement_type == 'assignment':
            expression_type = 'assignment' 
        else:
            expression_type = 'condition'
        expression = generateTerm(expression_type)
        for _ in range(1, num_expr_terms):
            expression += (' ' + generateOperation(expression_type) + 
                           ' ' + generateTerm(expression_type))
        
        return expression + ':\n' if expression_type == 'condition' else expression + '\n'

    OPENING_STATEMENT_GENERATING_FUNCS = {'for': generateForStatement,
                                          'while': generateWhileStatement,
                                          'if': generateIfStatement,
                                          'try': generateTryStatement,
                                          'assignment': generateAssignmentStatement}
    CLOSING_STATEMENT_GENERATING_FUNCS = {'else': generateElseStatement,
                                          'except': generateExceptStatement}
    OPEN_N_CLOSE_STATEMENT_MATCHES = {'if': 'else',
                                      'try': 'except',
                                      'for': None,
                                      'while': None,
                                      'assignment': None}

    snippet = 'def function_name' + str(np.random.randint(100)) + '(a):\n'
    
    num_statements = np.random.randint(max_num_statements)
    #num_expr_terms = np.random.randint(max_num_expr_terms)
    
    for i in range(num_statements):
        statement_type = np.random.choice(list(OPENING_STATEMENT_GENERATING_FUNCS))
        snippet += (' ' * 4 + OPENING_STATEMENT_GENERATING_FUNCS[statement_type]() +
                    generateExpression(statement_type, num_expr_terms))

        if statement_type != 'assignment':
            snippet += (' ' * 8 + generateAssignmentStatement() + 
                        generateExpression('assignment', num_expr_terms))
            if OPEN_N_CLOSE_STATEMENT_MATCHES[statement_type]:
                matching_closing_statement = OPEN_N_CLOSE_STATEMENT_MATCHES[statement_type]
                snippet += (' ' * 4 + CLOSING_STATEMENT_GENERATING_FUNCS[matching_closing_statement]() +
                            ' ' * 8 + generateAssignmentStatement() + 
                            generateExpression('assignment', num_expr_terms))
    return snippet, num_expr_terms, num_statements

In [32]:
path_to_data = 'D:\\ubuntu\\zephyr_data\\'

In [33]:
snippets = []
num_expr_terms, max_num_statements = 3, 15

for i in range(10000):
    snippet_info = generateCodeWithComplexExpressions(num_expr_terms, max_num_statements)
    snippets.append(snippet_info)

In [34]:
import pandas as pd

data = pd.DataFrame(snippets, 
                    columns=['snippet source code', 'num_expr_terms', 'num_statements'])
data.to_pickle(path_to_data + 'complex_exp_code\\complex_exp_code.pickle')