# Evaluating Coccinelle Semantic Patches in linux-mainline

### Setting up the notebook

In [89]:
import json
import calendar
import random
from datetime import date, timedelta

import faker
import numpy as np
from pandas import DataFrame
from delorean import parse
import pandas as pd

### Preparing the data

In [90]:
# Dictionary listing all Semantic Patches in linux-4.14-rc6
# TODO: Should refactor into file 'cocci_linux_4_14_rc6.json'

cocci_linux_4_14_rc6 = {
    'info': {
        'repository': 'linux-4.14-rc6'
    },
    'scripts': [
        {
            'path': 'scripts/coccinelle/api/alloc',
            'filename': 'alloc_cast.cocci',
            'confidence': 'High',
            'description': "Remove casting the values returned by memory allocation functions like "
                "kmalloc, kzalloc, kmem_cache_alloc, kmem_cache_zalloc etc. "
                "This makes an effort to find cases of casting of values returned "
                "by kmalloc, kzalloc, kcalloc, kmem_cache_alloc, kmem_cache_zalloc, "
                "kmem_cache_alloc_node, kmalloc_node and kzalloc_node and removes the casting "
                "as it is not required. The result in the patch case may need some reformatting."
        },
        {
            'path': 'scripts/coccinelle/api/alloc',
            'filename': 'kzalloc_simple.cocci',
            'confidence': 'High',
            'description': """use kzalloc rather than..."""
        },
        {
            'path': 'scripts/coccinelle/api/alloc',
            'filename': 'pool_zalloc_simple.cocci',
            'confidence': 'N.A.',
            'description': """Use *_pool_zalloc rather than..."""
        },
        # TODO
    ]
}

# cocci_linux_4_14_rc6

In [91]:
print(cocci_linux_4_14_rc6)
len(cocci_linux_4_14_rc6['scripts'])

{'info': {'repository': 'linux-4.14-rc6'}, 'scripts': [{'path': 'scripts/coccinelle/api/alloc', 'filename': 'alloc_cast.cocci', 'confidence': 'High', 'description': 'Remove casting the values returned by memory allocation functions like kmalloc, kzalloc, kmem_cache_alloc, kmem_cache_zalloc etc. This makes an effort to find cases of casting of values returned by kmalloc, kzalloc, kcalloc, kmem_cache_alloc, kmem_cache_zalloc, kmem_cache_alloc_node, kmalloc_node and kzalloc_node and removes the casting as it is not required. The result in the patch case may need some reformatting.'}, {'path': 'scripts/coccinelle/api/alloc', 'filename': 'kzalloc_simple.cocci', 'confidence': 'High', 'description': 'use kzalloc rather than...'}, {'path': 'scripts/coccinelle/api/alloc', 'filename': 'pool_zalloc_simple.cocci', 'confidence': 'N.A.', 'description': 'Use *_pool_zalloc rather than...'}]}


3

In [92]:
# Report against linux-4.14-rc6 full-tree

report_linux_4_14_rc6_fulltree = {
    'info': {
        'note': """Report against linux-4.14-rc6 full-tree""",
        'script_start': 'Thu Oct  5 13:11:29 CEST 2017',
        'src_sha': 'd81fa669e3de7eb8a631d7d95dac5fbcb2bf9d4e',
    },
    'statistics': [
        {'filename': 'alloc_cast.cocci',         'L':  13, 'W':  0, 'E': 0},
        {'filename': 'kzalloc-simple.cocci',     'L':   4, 'W':  4, 'E': 0},
        {'filename': 'pool_zalloc-simple.cocci', 'L':  22, 'W': 22, 'E': 0}, 
        # TODO
    ]
}

In [93]:
# Report against linux-4.4.50 minimzed-tree

report_linux_4_4_50_minimizedtree = {
    'info': {
        'note': """Report against linux-4.4.50 minimzed-tree""",
        'script_start': 'Mon Oct 23 16:29:52 IST 2017',
        'cocci_sha': '9e66317d3c92ddaab330c125dfe9d06eee268aff',
        'src_sha': '90dcab23bbc81fbfa25dfdb91d4ce974a69bd210',
    },
    'statistics': [
        {'filename': 'alloc_cast.cocci',         'L':  2, 'W': 0, 'E': 0},
        {'filename': 'kzalloc-simple.cocci',     'L':  0, 'W': 0, 'E': 0},
        {'filename': 'pool_zalloc-simple.cocci', 'L':  5, 'W': 0, 'E': 0}, 
        # TODO
    ]
}

### Creating the DataFrame

In [94]:
data = cocci_linux_4_14_rc6['scripts']
df = DataFrame(data) # , index=cocci_linux_4_14_rc6['scripts']['filename']

print(df.index)
print()
print(df.filename)
print()
# df.head()

RangeIndex(start=0, stop=3, step=1)

0            alloc_cast.cocci
1        kzalloc_simple.cocci
2    pool_zalloc_simple.cocci
Name: filename, dtype: object



In [95]:
df.count()
#df.describe()

confidence     3
description    3
filename       3
path           3
dtype: int64

In [96]:
json.dumps(data)

'[{"path": "scripts/coccinelle/api/alloc", "filename": "alloc_cast.cocci", "confidence": "High", "description": "Remove casting the values returned by memory allocation functions like kmalloc, kzalloc, kmem_cache_alloc, kmem_cache_zalloc etc. This makes an effort to find cases of casting of values returned by kmalloc, kzalloc, kcalloc, kmem_cache_alloc, kmem_cache_zalloc, kmem_cache_alloc_node, kmalloc_node and kzalloc_node and removes the casting as it is not required. The result in the patch case may need some reformatting."}, {"path": "scripts/coccinelle/api/alloc", "filename": "kzalloc_simple.cocci", "confidence": "High", "description": "use kzalloc rather than..."}, {"path": "scripts/coccinelle/api/alloc", "filename": "pool_zalloc_simple.cocci", "confidence": "N.A.", "description": "Use *_pool_zalloc rather than..."}]'