In [36]:
from pycparser import c_ast, c_parser, c_generator

parser = c_parser.CParser()
generator = c_generator.CGenerator()


## Dead Code

In [111]:
import random

In [123]:
dead_codes = [
    '''
    int main() {
        int alpha;
    }
    ''',
    '''
    int main() {
        int alpha = 0;
        int beta = 5;
        int gamma = alpha + beta;
    }
    ''',
    '''
    int main() {
        const int ALPHA = 10;
        const int BETA = 5;
    }
    ''',
    '''
    int main() {
        int alpha = 0;
        if(false) {
            alpha = 1;
        }
    }
    '''
    ,
    '''
    int main() {
        int alpha = 0;
        if(false) {
            alpha = 1;
        } else {
            alpha = 2;
        }
    }
    '''
]

In [117]:
compounds = []
for code in dead_codes:
    ast = parser.parse(code)
    compounds.append(ast.ext[0].body)

In [120]:

class deadCodeAdder(c_ast.NodeVisitor):
    def visit_FuncDef(self, node):
        if node.decl.name == 'main':
            for compound in compounds:
                index = random.randrange(len(node.body.block_items))
                node.body.block_items = node.body.block_items[:index] + compound.block_items + node.body.block_items[index:]

def add_dead_code(ast):
    v = deadCodeAdder()
    v.visit(ast)
    

In [122]:
src = """
int main()
{
    int n,i,shuzu[111],count1=0,count3=0,count2=0,count4=0;
    scanf("%d",&n);
    while(n>=100){
                  n=n-100;
                  count1++;
                  }
                      while(n>=50){
                  n=n-50;
                  count2++;
                  }
                      while(n>=20){
                  n=n-20;
                  count3++;
                  }
                      while(n>=10){
                  n=n-10;
                  count4++;
               return 0;
               }
}
"""
ast = parser.parse(src)

add_dead_code(ast)
print(generator.visit(ast))

int main()
{
  int n;
  int i;
  int shuzu[111];
  int count1 = 0;
  int count3 = 0;
  int count2 = 0;
  int alpha;
  int count4 = 0;
  scanf("%d", &n);
  while (n >= 100)
  {
    n = n - 100;
    count1++;
  }

  int alpha = 0;
  int beta = 5;
  int gamma = alpha + beta;
  while (n >= 50)
  {
    n = n - 50;
    count2++;
  }

  while (n >= 20)
  {
    n = n - 20;
    count3++;
  }

  while (n >= 10)
  {
    n = n - 10;
    count4++;
    return 0;
  }

}




## Variable Renaimg

In [89]:
import pickle
used_vars = pickle.load( open( "/home/david/projects/university/astnn/var_names.pkl", "rb" ) )

In [37]:
src = """
int main() {
    int alpha;
    alpha = 0;
    scanf("%d",&n);
}
"""

ast = parser.parse(src)
print(ast)
print(generator.visit(ast))

FileAST(ext=[FuncDef(decl=Decl(name='main',
                               quals=[
                                     ],
                               storage=[
                                       ],
                               funcspec=[
                                        ],
                               type=FuncDecl(args=None,
                                             type=TypeDecl(declname='main',
                                                           quals=[
                                                                 ],
                                                           type=IdentifierType(names=['int'
                                                                                     ]
                                                                               )
                                                           )
                                             ),
                               init=None,
                               

In [17]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [101]:
from gensim.models.word2vec import Word2Vec

word2vec = Word2Vec.load("/home/david/projects/university/astnn/data/train/embedding/node_w2v_128").wv

In [102]:
word2vec.most_similar(positive=[], negative=['alpha'], topn=10, restrict_vocab=None)

[('state', 0.13344937562942505),
 ('map', 0.09349325299263),
 ('room', 0.09310971200466156),
 ('location', 0.07769270241260529),
 ("'@'", 0.057959336787462234),
 ("'.'", 0.04344698041677475),
 ("'*'", 0.041571829468011856),
 ('loci', 0.03517623245716095),
 ('mrow', 0.02887730486690998),
 ('iterin', 0.02784087136387825)]

## Restrict to sensible variable names

In [103]:
import numpy as np

def restrict_w2v(w2v, restricted_word_set):
    new_vectors = []
    new_vocab = {}
    new_index2entity = []
    new_vectors_norm = []

    for i in range(len(w2v.vocab)):
        word = w2v.index2entity[i]
        vec = w2v.vectors[i]
        vocab = w2v.vocab[word]
        vec_norm = w2v.vectors_norm[i]
        if word in restricted_word_set:
            vocab.index = len(new_index2entity)
            new_index2entity.append(word)
            new_vocab[word] = vocab
            new_vectors.append(vec)
            new_vectors_norm.append(vec_norm)

    w2v.vocab = new_vocab
    w2v.vectors = np.array(new_vectors)
    w2v.index2entity = np.array(new_index2entity)
    w2v.index2word = np.array(new_index2entity)
    w2v.vectors_norm = np.array(new_vectors_norm)

In [104]:
restrict_w2v(word2vec, used_vars)
word2vec.most_similar(positive=[], negative=['alpha'], topn=10, restrict_vocab=None)

[('state', 0.13344937562942505),
 ('map', 0.09349325299263),
 ('room', 0.09310971200466156),
 ('location', 0.07769270241260529),
 ('loci', 0.03517623245716095),
 ('mrow', 0.02887730486690998),
 ('iterin', 0.02784087136387825),
 ('use', 0.024702150374650955),
 ('iterout', 0.021128982305526733),
 ('minj', 0.016142524778842926)]

In [126]:
from pycparser.c_ast import TypeDecl, ID

def get_antonym(word):
    return word2vec.most_similar(positive=[], negative=[word], topn=1, restrict_vocab=None)[0][0]

class declarationRenamer(c_ast.NodeVisitor):
    def visit_Decl(self, node):
        var_name = node.name
        antonymn = get_antonym(var_name)
        node.name = antonymn
        if type(node.type) is TypeDecl:
            node.type.declname = antonymn
        
class assignmentRenamer(c_ast.NodeVisitor):
    def visit_Assignment(self, node):
        if type(node.lvalue) is ID:
            var_name = node.lvalue.name
            antonymn = get_antonym(var_name)
            node.lvalue.name = antonymn
        if type(node.rvalue) is ID:
            var_name = node.rvalue.name
            antonymn = get_antonym(var_name)
            node.rvalue.name = antonymn
            
class unaryOpRenamer(c_ast.NodeVisitor):
    def visit_UnaryOp(self, node):
        if type(node.expr) is ID:
            var_name = node.expr.name
            antonymn = get_antonym(var_name)
            node.expr.name = antonymn
            
class binaryOpRenamer(c_ast.NodeVisitor):
    def visit_BinaryOp(self, node):
        if type(node.left) is ID:
            var_name = node.left.name
            antonymn = get_antonym(var_name)
            node.left.name = antonymn
        if type(node.right) is ID:
            var_name = node.right.name
            antonymn = get_antonym(var_name)
            node.right.name = antonymn

In [127]:
def rename_vars(ast):
    declaration_renamer = declarationRenamer()
    assignment_renamer = assignmentRenamer()
    unary_op_renamer = unaryOpRenamer()
    binary_op_renamer = binaryOpRenamer()
        
    declaration_renamer.visit(ast)
    assignment_renamer.visit(ast)
    unary_op_renamer.visit(ast)
    binary_op_renamer.visit(ast)

In [129]:
src = """
int main()
{
    int n,i,count1=0,count3=0,count2=0,count4=0;
    scanf("%d",&n);
    while(n>=100){
                  n=n-100;
                  count1++;
                  }
                      while(n>=50){
                  n=n-50;
                  count2++;
                  }
                      while(n>=20){
                  n=n-20;
                  count3++;
                  }
                      while(n>=10){
                  n=n-10;
                  count4++;
               return 0;
               }
}
"""
ast = parser.parse(src)

rename_vars(ast)
print(generator.visit(ast))

int main()
{
  int tempi;
  int win;
  int ws = 0;
  int ml = 0;
  int cos = 0;
  int sen = 0;
  scanf("%d", &tempi);
  while (tempi >= 100)
  {
    tempi = tempi - 100;
    ws++;
  }

  while (tempi >= 50)
  {
    tempi = tempi - 50;
    cos++;
  }

  while (tempi >= 20)
  {
    tempi = tempi - 20;
    ml++;
  }

  while (tempi >= 10)
  {
    tempi = tempi - 10;
    sen++;
    return 0;
  }

}


