# Evaluating Coccinelle Semantic Patches in linux-mainline

### Setting up the notebook

In [1]:
# Import all needed libraries
import json
import calendar
import random
import os
import sys
from datetime import date, timedelta

import faker
import numpy as np
import pandas as pd
from pandas import DataFrame, read_csv
from delorean import parse
import matplotlib

# Enable inline plotting
%matplotlib inline

In [2]:
print('Python version ' + sys.version)
print('Pandas version ' + pd.__version__)
print('Matplotlib version ' + matplotlib.__version__)

Python version 3.6.3 (v3.6.3:2c5fed8, Oct  3 2017, 18:11:49) [MSC v.1900 64 bit (AMD64)]
Pandas version 0.20.3
Matplotlib version 2.1.0


### Preparing the data

In [3]:
# Dictionary listing all Semantic Patches in linux-4.14-rc6
# TODO: Should refactor into file 'cocci_linux_4_14_rc6.json'

cocci_linux_4_14_rc6 = {
    'info': {
        'repository': 'linux-4.14-rc6'
    },
    'scripts': [
        {
            'path': 'scripts/coccinelle/api/alloc',
            'filename': 'alloc_cast.cocci',
            'confidence': 'High',
            'description': "Remove casting the values returned by memory allocation functions like "
                "kmalloc, kzalloc, kmem_cache_alloc, kmem_cache_zalloc etc. "
                "This makes an effort to find cases of casting of values returned "
                "by kmalloc, kzalloc, kcalloc, kmem_cache_alloc, kmem_cache_zalloc, "
                "kmem_cache_alloc_node, kmalloc_node and kzalloc_node and removes the casting "
                "as it is not required. The result in the patch case may need some reformatting."
        },
        {
            'path': 'scripts/coccinelle/api/alloc',
            'filename': 'kzalloc-simple.cocci',
            'confidence': 'High',
            'description': """use kzalloc rather than..."""
        },
        {
            'path': 'scripts/coccinelle/api/alloc',
            'filename': 'pool_zalloc-simple.cocci',
            'confidence': 'N.A.',
            'description': """Use *_pool_zalloc rather than..."""
        },
        # TODO: Complete fields for all the 59 entries
        {
            'path': 'scripts/coccinelle/api',
            'filename': 'd_find_alias.cocci',
            'confidence': 'Moderate',
            'description': "?"
        },
        {
            'path': 'scripts/coccinelle/debugfs',
            'filename': 'debugfs_simple_attr.cocci',
            'confidence': 'N.A.',
        },
        {
            'path': 'scripts/coccinelle/api',
            'filename': 'drm-get-put.cocci',
            'confidence': 'High',
        },
        {
            'path': 'scripts/coccinelle/api',
            'filename': 'err_cast.cocci',
            'confidence': 'High',
        },
        {
            'path': 'scripts/coccinelle/api',
            'filename': 'kstrdup.cocci',
            'confidence': 'High',
        },
        {
            'path': 'scripts/coccinelle/api',
            'filename': 'memdup.cocci',
            'confidence': 'High',
        },
        {
            'path': 'scripts/coccinelle/api',
            'filename': 'memdup_user.cocci',
            'confidence': 'High',
        },
        {
            'path': 'scripts/coccinelle/api',
            'filename': 'platform_no_drv_owner.cocci',
            'confidence': 'High',
        },
        {
            'path': 'scripts/coccinelle/api',
            'filename': 'pm_runtime.cocci',
            'confidence': 'Medium',
        },
        {
            'path': 'scripts/coccinelle/api',
            'filename': 'ptr_ret.cocci',
            'confidence': 'High',
        },
        {
            'path': 'scripts/coccinelle/api',
            'filename': 'resource_size.cocci',
            'confidence': 'High',
        },
        {
            'path': 'scripts/coccinelle/api',
            'filename': 'setup_timer.cocci',
            'confidence': 'High',
        },
        {
            'path': 'scripts/coccinelle/api',
            'filename': 'simple_open.cocci',
            'confidence': 'High',
        },
        {
            'path': 'scripts/coccinelle/api',
            'filename': 'vma_pages.cocci',
            'confidence': 'High',
        },
        {
            'path': 'scripts/coccinelle/free',
            'filename': 'clk_put.cocci',
            'confidence': 'Medium',
        },
        {
            'path': 'scripts/coccinelle/free',
            'filename': 'devm_free.cocci',
            'confidence': 'Moderate',
        },
        {
            'path': 'scripts/coccinelle/free',
            'filename': 'ifnullfree.cocci',
            'confidence': 'N.A.',
        },
        {
            'path': 'scripts/coccinelle/free',
            'filename': 'iounmap.cocci',
            'confidence': 'Moderate',
        },
        {
            'path': 'scripts/coccinelle/free',
            'filename': 'kfree.cocci',
            'confidence': 'Moderate',
        },
        {
            'path': 'scripts/coccinelle/free',
            'filename': 'kfreeaddr.cocci',
            'confidence': 'High',
        },
        {
            'path': 'scripts/coccinelle/free',
            'filename': 'pci_free_consistent.cocci',
            'confidence': 'Moderate',
        },
        {
            'path': 'scripts/coccinelle/free',
            'filename': 'device_node_continue.cocci',
            'confidence': 'High',
        },
        {
            'path': 'scripts/coccinelle/iterators',
            'filename': 'fen.cocci',
            'confidence': 'High'
            ,},
        {
            'path': 'scripts/coccinelle/iterators',
            'filename': 'ifnull.cocci',
            'confidence': 'Moderate',
        },
        {
            'path': 'scripts/coccinelle/iterators',
            'filename': 'list_entry_update.cocci',
            'confidence': 'High',
        },
        {
            'path': 'scripts/coccinelle/iterators',
            'filename': 'use_after_iter.cocci',
            'confidence': 'Moderate',
        },
        {
            'path': 'scripts/coccinelle/locks',
            'filename': 'call_kern.cocci',
            'confidence': 'Moderate',},
        {
            'path': 'scripts/coccinelle/locks',
            'filename': 'double_lock.cocci',
            'confidence': 'Moderate',
        },
        {
            'path': 'scripts/coccinelle/locks',
            'filename': 'flags.cocci',
            'confidence': 'High',
        },
        {
            'path': 'scripts/coccinelle/locks',
            'filename': 'mini_lock.cocci',
            'confidence': 'Moderate',
        },
        {
            'path': 'scripts/coccinelle/misc',
            'filename': 'array_size.cocci',
            'confidence': 'High',
        },
        {
            'path': 'scripts/coccinelle/misc',
            'filename': 'badty.cocci',
            'confidence': 'Moderate',
        },
        {
            'path': 'scripts/coccinelle/misc',
            'filename': 'boolconv.cocci',
            'confidence': 'N.A.',
        },
        {
            'path': 'scripts/coccinelle/misc',
            'filename': 'boolinit.cocci',
            'confidence': 'High',
        },
        {
            'path': 'scripts/coccinelle/misc',
            'filename': 'boolreturn.cocci',
            'confidence': 'High',
        },
        {
            'path': 'scripts/coccinelle/misc',
            'filename': 'bugon.cocci',
            'confidence': 'High',
        },
        {
            'path': 'scripts/coccinelle/misc',
            'filename': 'cond_no_effect.cocci',
            'confidence': 'Moderate',
        },
        {
            'path': 'scripts/coccinelle/misc',
            'filename': 'cstptr.cocci',
            'confidence': 'High',
        },
        {
            'path': 'scripts/coccinelle/misc',
            'filename': 'doubleinit.cocci',
            'confidence': 'Low',
        },
        {
            'path': 'scripts/coccinelle/misc',
            'filename': 'ifaddr.cocci',
            'confidence': 'High',
        },
        {
            'path': 'scripts/coccinelle/misc',
            'filename': 'ifcol.cocci',
            'confidence': 'Low',
        },
        {
            'path': 'scripts/coccinelle/misc',
            'filename': 'irqf_oneshot.cocci',
            'confidence': 'Moderate',
        },
        {
            'path': 'scripts/coccinelle/misc',
            'filename': 'noderef.cocci',
            'confidence': 'High',
        },
        {
            'path': 'scripts/coccinelle/misc',
            'filename': 'of_table.cocci',
            'confidence': 'Medium',
        },
        {
            'path': 'scripts/coccinelle/misc',
            'filename': 'orplus.cocci',
            'confidence': 'Moderate',
        },
        {
            'path': 'scripts/coccinelle/misc',
            'filename': 'returnvar.cocci',
            'confidence': 'Moderate',
        },
        {
            'path': 'scripts/coccinelle/misc',
            'filename': 'semicolon.cocci',
            'confidence': 'Moderate',
        },
        {
            'path': 'scripts/coccinelle/misc',
            'filename': 'warn.cocci',
            'confidence': 'High',
        },
        {
            'path': 'scripts/coccinelle/null',
            'filename': 'badzero.cocci',
            'confidence': 'High',
        },
        {
            'path': 'scripts/coccinelle/null',
            'filename': 'deref_null.cocci',
            'confidence': 'Moderate',
        },
        {
            'path': 'scripts/coccinelle/null',
            'filename': 'eno.cocci',
            'confidence': 'High',
        },
        {
            'path': 'scripts/coccinelle/null',
            'filename': 'kmerr.cocci',
            'confidence': 'High',
        },
        {
            'path': 'scripts/coccinelle/tests',
            'filename': 'doublebitand.cocci',
            'confidence': 'Moderate',
        },
        {
            'path': 'scripts/coccinelle/tests',
            'filename': 'doubletest.cocci',
            'confidence': 'Moderate',
        },
        {
            'path': 'scripts/coccinelle/tests',
            'filename': 'odd_ptr_err.cocci',
            'confidence': 'High',
        },
        {
            'path': 'scripts/coccinelle/tests',
            'filename': 'unsigned_lesser_than_zero.cocci',
            'confidence': 'Average',
        },
    ]
}

# cocci_linux_4_14_rc6

In [4]:
print(cocci_linux_4_14_rc6)
len(cocci_linux_4_14_rc6['scripts'])

{'info': {'repository': 'linux-4.14-rc6'}, 'scripts': [{'path': 'scripts/coccinelle/api/alloc', 'filename': 'alloc_cast.cocci', 'confidence': 'High', 'description': 'Remove casting the values returned by memory allocation functions like kmalloc, kzalloc, kmem_cache_alloc, kmem_cache_zalloc etc. This makes an effort to find cases of casting of values returned by kmalloc, kzalloc, kcalloc, kmem_cache_alloc, kmem_cache_zalloc, kmem_cache_alloc_node, kmalloc_node and kzalloc_node and removes the casting as it is not required. The result in the patch case may need some reformatting.'}, {'path': 'scripts/coccinelle/api/alloc', 'filename': 'kzalloc-simple.cocci', 'confidence': 'High', 'description': 'use kzalloc rather than...'}, {'path': 'scripts/coccinelle/api/alloc', 'filename': 'pool_zalloc-simple.cocci', 'confidence': 'N.A.', 'description': 'Use *_pool_zalloc rather than...'}, {'path': 'scripts/coccinelle/api', 'filename': 'd_find_alias.cocci', 'confidence': 'Moderate', 'description': '?

59

### Creating the DataFrame for cocci_linux_4_14_rc6

In [5]:
data = cocci_linux_4_14_rc6['scripts']
df = DataFrame(data) # , index=cocci_linux_4_14_rc6['scripts']['filename']

#df.count()
#df.describe()
#json.dumps(data)
#print(df.index)
#print()
#print(df.filename)
#print()

#df.head()

In [6]:
# Rearrange columns
cols = ['path', 'filename', 'confidence', 'description']
df = df[cols]

df

Unnamed: 0,path,filename,confidence,description
0,scripts/coccinelle/api/alloc,alloc_cast.cocci,High,Remove casting the values returned by memory a...
1,scripts/coccinelle/api/alloc,kzalloc-simple.cocci,High,use kzalloc rather than...
2,scripts/coccinelle/api/alloc,pool_zalloc-simple.cocci,N.A.,Use *_pool_zalloc rather than...
3,scripts/coccinelle/api,d_find_alias.cocci,Moderate,?
4,scripts/coccinelle/debugfs,debugfs_simple_attr.cocci,N.A.,
5,scripts/coccinelle/api,drm-get-put.cocci,High,
6,scripts/coccinelle/api,err_cast.cocci,High,
7,scripts/coccinelle/api,kstrdup.cocci,High,
8,scripts/coccinelle/api,memdup.cocci,High,
9,scripts/coccinelle/api,memdup_user.cocci,High,


In [7]:
# Notice that text of column "description" as shown above is truncated,
# however the cell inside the DataFrame contains the full value
df.iloc[0]['description']

'Remove casting the values returned by memory allocation functions like kmalloc, kzalloc, kmem_cache_alloc, kmem_cache_zalloc etc. This makes an effort to find cases of casting of values returned by kmalloc, kzalloc, kcalloc, kmem_cache_alloc, kmem_cache_zalloc, kmem_cache_alloc_node, kmalloc_node and kzalloc_node and removes the casting as it is not required. The result in the patch case may need some reformatting.'

In [8]:
# Display full text for column "description"
# See <https://stackoverflow.com/questions/23388810/ipython-notebook-output-cell-is-truncating-contents-of-my-list>
# See <http://pandas.pydata.org/pandas-docs/stable/options.html>

from pandas import DataFrame
from IPython.display import HTML

pd.options.display.max_colwidth = 2000
pd.options.display.max_seq_items = 200

HTML(df.to_html())

Unnamed: 0,path,filename,confidence,description
0,scripts/coccinelle/api/alloc,alloc_cast.cocci,High,"Remove casting the values returned by memory allocation functions like kmalloc, kzalloc, kmem_cache_alloc, kmem_cache_zalloc etc. This makes an effort to find cases of casting of values returned by kmalloc, kzalloc, kcalloc, kmem_cache_alloc, kmem_cache_zalloc, kmem_cache_alloc_node, kmalloc_node and kzalloc_node and removes the casting as it is not required. The result in the patch case may need some reformatting."
1,scripts/coccinelle/api/alloc,kzalloc-simple.cocci,High,use kzalloc rather than...
2,scripts/coccinelle/api/alloc,pool_zalloc-simple.cocci,N.A.,Use *_pool_zalloc rather than...
3,scripts/coccinelle/api,d_find_alias.cocci,Moderate,?
4,scripts/coccinelle/debugfs,debugfs_simple_attr.cocci,N.A.,
5,scripts/coccinelle/api,drm-get-put.cocci,High,
6,scripts/coccinelle/api,err_cast.cocci,High,
7,scripts/coccinelle/api,kstrdup.cocci,High,
8,scripts/coccinelle/api,memdup.cocci,High,
9,scripts/coccinelle/api,memdup_user.cocci,High,


In [9]:
# Export the dataframe to an Excel file
#df.to_excel('test.xls')

### Creating DataFrames for instrument-coccicheck reports 

In [10]:
# Report against linux-4.14-rc6 full-tree

report_linux_4_14_rc6_fulltree = {
    'info': {
        'note': """Report against linux-4.14-rc6 full-tree""",
        'script_start': 'Thu Oct  5 13:11:29 CEST 2017',
        'src_sha': 'd81fa669e3de7eb8a631d7d95dac5fbcb2bf9d4e',
    },
    'statistics': [
        {'filename': 'alloc_cast.cocci',         'L':  13, 'W':  0, 'E': 0},
        {'filename': 'kzalloc-simple.cocci',     'L':   4, 'W':  4, 'E': 0},
        {'filename': 'pool_zalloc-simple.cocci', 'L':  22, 'W': 22, 'E': 0}, 
        # TODO: Complete with all the 59 entries
        {'filename': 'd_find_alias.cocci',              'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'debugfs_simple_attr.cocci',       'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'drm-get-put.cocci',               'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'err_cast.cocci',                  'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'kstrdup.cocci',                   'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'memdup.cocci',                    'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'memdup_user.cocci',               'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'platform_no_drv_owner.cocci',     'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'pm_runtime.cocci',                'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'ptr_ret.cocci',                   'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'resource_size.cocci',             'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'setup_timer.cocci',               'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'simple_open.cocci',               'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'vma_pages.cocci',                 'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'clk_put.cocci',                   'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'devm_free.cocci',                 'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'ifnullfree.cocci',                'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'iounmap.cocci',                   'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'kfree.cocci',                     'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'kfreeaddr.cocci',                 'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'pci_free_consistent.cocci',       'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'device_node_continue.cocci',      'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'fen.cocci',                       'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'ifnull.cocci',                    'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'list_entry_update.cocci',         'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'use_after_iter.cocci',            'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'call_kern.cocci',                 'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'double_lock.cocci',               'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'flags.cocci',                     'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'mini_lock.cocci',                 'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'array_size.cocci',                'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'badty.cocci',                     'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'boolconv.cocci',                  'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'boolinit.cocci',                  'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'boolreturn.cocci',                'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'bugon.cocci',                     'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'cond_no_effect.cocci',            'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'cstptr.cocci',                    'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'doubleinit.cocci',                'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'ifaddr.cocci',                    'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'ifcol.cocci',                     'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'irqf_oneshot.cocci',              'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'noderef.cocci',                   'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'of_table.cocci',                  'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'orplus.cocci',                    'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'returnvar.cocci',                 'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'semicolon.cocci',                 'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'warn.cocci',                      'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'badzero.cocci',                   'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'deref_null.cocci',                'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'eno.cocci',                       'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'kmerr.cocci',                     'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'doublebitand.cocci',              'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'doubletest.cocci',                'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'odd_ptr_err.cocci',               'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'unsigned_lesser_than_zero.cocci', 'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
    ]
}

In [11]:
data2_full = DataFrame(report_linux_4_14_rc6_fulltree['statistics'])
data2_full = data2_full[['filename','L', 'W', 'E']]

data2_full

Unnamed: 0,filename,L,W,E
0,alloc_cast.cocci,13.0,0.0,0.0
1,kzalloc-simple.cocci,4.0,4.0,0.0
2,pool_zalloc-simple.cocci,22.0,22.0,0.0
3,d_find_alias.cocci,,,
4,debugfs_simple_attr.cocci,,,
5,drm-get-put.cocci,,,
6,err_cast.cocci,,,
7,kstrdup.cocci,,,
8,memdup.cocci,,,
9,memdup_user.cocci,,,


In [12]:
#data2_full.plot()
#data2_full[['L', 'W', 'E']].plot()
#data2_full[['filename','L', 'W', 'E']].plot(kind='bar')

#len(data2_full['filename'].unique())

In [13]:
# Report against linux-4.4.50 minimized-tree

report_linux_4_4_50_minimizedtree = {
    'info': {
        'note': """Report against linux-4.4.50 minimzed-tree""",
        'script_start': 'Mon Oct 23 16:29:52 IST 2017',
        'cocci_sha': '9e66317d3c92ddaab330c125dfe9d06eee268aff',
        'src_sha': '90dcab23bbc81fbfa25dfdb91d4ce974a69bd210',
    },
    'statistics': [
        {'filename': 'alloc_cast.cocci',                'L':  2, 'W': 0, 'E': 0},
        {'filename': 'kzalloc-simple.cocci',            'L':  0, 'W': 0, 'E': 0},
        {'filename': 'pool_zalloc-simple.cocci',        'L':  5, 'W': 0, 'E': 0}, 
        # TODO: Complete with all the 59 entries
        {'filename': 'd_find_alias.cocci',              'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'debugfs_simple_attr.cocci',       'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'drm-get-put.cocci',               'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'err_cast.cocci',                  'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'kstrdup.cocci',                   'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'memdup.cocci',                    'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'memdup_user.cocci',               'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'platform_no_drv_owner.cocci',     'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'pm_runtime.cocci',                'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'ptr_ret.cocci',                   'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'resource_size.cocci',             'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'setup_timer.cocci',               'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'simple_open.cocci',               'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'vma_pages.cocci',                 'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'clk_put.cocci',                   'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'devm_free.cocci',                 'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'ifnullfree.cocci',                'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'iounmap.cocci',                   'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'kfree.cocci',                     'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'kfreeaddr.cocci',                 'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'pci_free_consistent.cocci',       'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'device_node_continue.cocci',      'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'fen.cocci',                       'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'ifnull.cocci',                    'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'list_entry_update.cocci',         'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'use_after_iter.cocci',            'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'call_kern.cocci',                 'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'double_lock.cocci',               'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'flags.cocci',                     'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'mini_lock.cocci',                 'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'array_size.cocci',                'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'badty.cocci',                     'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'boolconv.cocci',                  'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'boolinit.cocci',                  'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'boolreturn.cocci',                'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'bugon.cocci',                     'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'cond_no_effect.cocci',            'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'cstptr.cocci',                    'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'doubleinit.cocci',                'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'ifaddr.cocci',                    'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'ifcol.cocci',                     'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'irqf_oneshot.cocci',              'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'noderef.cocci',                   'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'of_table.cocci',                  'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'orplus.cocci',                    'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'returnvar.cocci',                 'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'semicolon.cocci',                 'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'warn.cocci',                      'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'badzero.cocci',                   'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'deref_null.cocci',                'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'eno.cocci',                       'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'kmerr.cocci',                     'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'doublebitand.cocci',              'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'doubletest.cocci',                'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'odd_ptr_err.cocci',               'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
        {'filename': 'unsigned_lesser_than_zero.cocci', 'L': np.NaN, 'W': np.NaN, 'E': np.NaN},
    ]
}

In [14]:
data3_minimized = DataFrame(report_linux_4_4_50_minimizedtree['statistics'])
data3_minimized = data3_minimized[['filename','L', 'W', 'E']]

data3_minimized

Unnamed: 0,filename,L,W,E
0,alloc_cast.cocci,2.0,0.0,0.0
1,kzalloc-simple.cocci,0.0,0.0,0.0
2,pool_zalloc-simple.cocci,5.0,0.0,0.0
3,d_find_alias.cocci,,,
4,debugfs_simple_attr.cocci,,,
5,drm-get-put.cocci,,,
6,err_cast.cocci,,,
7,kstrdup.cocci,,,
8,memdup.cocci,,,
9,memdup_user.cocci,,,


### Assigning estimated safety relevance to cocci scripts

In [15]:
my_safetyrel_cocci = {
    'info': {
        # TODO
    },
    # rel: Safety Relevance (1=Highest, 9=lowest)
    'values': [
        {'filename': 'alloc_cast.cocci',                'rel': 5, 'notes': ''},
        {'filename': 'kzalloc-simple.cocci',            'rel': 5, 'notes': ''},
        {'filename': 'pool_zalloc-simple.cocci',        'rel': 8, 'notes': ''},
        {'filename': 'd_find_alias.cocci',              'rel': 3, 'notes': ''},
        {'filename': 'debugfs_simple_attr.cocci',       'rel': 6, 'notes': ''},
        {'filename': 'drm-get-put.cocci',               'rel': 6, 'notes': ''},
        {'filename': 'err_cast.cocci',                  'rel': 5, 'notes': ''},
        {'filename': 'kstrdup.cocci',                   'rel': 4, 'notes': ''},
        {'filename': 'memdup.cocci',                    'rel': 4, 'notes': ''},
        {'filename': 'memdup_user.cocci',               'rel': 4, 'notes': ''},
        {'filename': 'platform_no_drv_owner.cocci',     'rel': 5, 'notes': ''},
        {'filename': 'pm_runtime.cocci',                'rel': 4, 'notes': ''},
        {'filename': 'ptr_ret.cocci',                   'rel': 5, 'notes': ''},
        {'filename': 'resource_size.cocci',             'rel': 4, 'notes': ''},
        {'filename': 'setup_timer.cocci',               'rel': 6, 'notes': ''},
        {'filename': 'simple_open.cocci',               'rel': 4, 'notes': ''},
        {'filename': 'vma_pages.cocci',                 'rel': 4, 'notes': ''},
        {'filename': 'clk_put.cocci',                   'rel': 4, 'notes': ''},
        {'filename': 'devm_free.cocci',                 'rel': 2, 'notes': 'Beware of false positives'},
        {'filename': 'ifnullfree.cocci',                'rel': 7, 'notes': ''},
        {'filename': 'iounmap.cocci',                   'rel': 4, 'notes': 'Beware of false positives'},
        {'filename': 'kfree.cocci',                     'rel': 2, 'notes': 'Beware of false positives'},
        {'filename': 'kfreeaddr.cocci',                 'rel': 1, 'notes': ''},
        {'filename': 'pci_free_consistent.cocci',       'rel': 4, 'notes': ''},
        {'filename': 'device_node_continue.cocci',      'rel': 3, 'notes': ''},
        {'filename': 'fen.cocci',                       'rel': 5, 'notes': ''},
        {'filename': 'ifnull.cocci',                    'rel': 5, 'notes': 'Beware of false positives'},
        {'filename': 'list_entry_update.cocci',         'rel': 4, 'notes': ''},
        {'filename': 'use_after_iter.cocci',            'rel': 3, 'notes': 'Beware of false positives'},
        {'filename': 'call_kern.cocci',                 'rel': 4, 'notes': ''},
        {'filename': 'double_lock.cocci',               'rel': 5, 'notes': 'Beware of false positives'},
        {'filename': 'flags.cocci',                     'rel': 2, 'notes': ''},
        {'filename': 'mini_lock.cocci',                 'rel': 5, 'notes': 'Beware of false positives'},
        {'filename': 'array_size.cocci',                'rel': 4, 'notes': ''},
        {'filename': 'badty.cocci',                     'rel': 4, 'notes': 'Beware of false positives'},
        {'filename': 'boolconv.cocci',                  'rel': 9, 'notes': 'Cleanup code only'},
        {'filename': 'boolinit.cocci',                  'rel': 8, 'notes': ''},
        {'filename': 'boolreturn.cocci',                'rel': 6, 'notes': 'Does not affect code behaviour'},
        {'filename': 'bugon.cocci',                     'rel': 8, 'notes': ''},
        {'filename': 'cond_no_effect.cocci',            'rel': 4, 'notes': 'Developed by Nicholas Mc Guire, OSADL'},
        {'filename': 'cstptr.cocci',                    'rel': 1, 'notes': ''},
        {'filename': 'doubleinit.cocci',                'rel': 7, 'notes': 'High rate of false positives'},
        {'filename': 'ifaddr.cocci',                    'rel': 1, 'notes': ''},
        {'filename': 'ifcol.cocci',                     'rel': 7, 'notes': 'High rate of false positives'},
        {'filename': 'irqf_oneshot.cocci',              'rel': 6, 'notes': ''},
        {'filename': 'noderef.cocci',                   'rel': 2, 'notes': ''},
        {'filename': 'of_table.cocci',                  'rel': 2, 'notes': ''},
        {'filename': 'orplus.cocci',                    'rel': 4, 'notes': 'Beware of false positives'},
        {'filename': 'returnvar.cocci',                 'rel': 7, 'notes': ''},
        {'filename': 'semicolon.cocci',                 'rel': 8, 'notes': ''},
        {'filename': 'warn.cocci',                      'rel': 4, 'notes': ''},
        {'filename': 'badzero.cocci',                   'rel': 5, 'notes': ''},
        {'filename': 'deref_null.cocci',                'rel': 4, 'notes': 'Beware of false positives'},
        {'filename': 'eno.cocci',                       'rel': 2, 'notes': ''},
        {'filename': 'kmerr.cocci',                     'rel': 2, 'notes': ''},
        {'filename': 'doublebitand.cocci',              'rel': 7, 'notes': 'Beware of false positives'},
        {'filename': 'doubletest.cocci',                'rel': 7, 'notes': 'Beware of false positives'},
        {'filename': 'odd_ptr_err.cocci',               'rel': 4, 'notes': ''},
        {'filename': 'unsigned_lesser_than_zero.cocci', 'rel': 3, 'notes': ''},
        # TODO: Complete with all the 59 entries
    ]
}

In [16]:
data5_relevance = DataFrame(my_safetyrel_cocci['values'])
data5_relevance = data5_relevance[['filename','rel', 'notes']]

data5_relevance

Unnamed: 0,filename,rel,notes
0,alloc_cast.cocci,5,
1,kzalloc-simple.cocci,5,
2,pool_zalloc-simple.cocci,8,
3,d_find_alias.cocci,3,
4,debugfs_simple_attr.cocci,6,
5,drm-get-put.cocci,6,
6,err_cast.cocci,5,
7,kstrdup.cocci,4,
8,memdup.cocci,4,
9,memdup_user.cocci,4,


### Merge tables and aggregate results

In [17]:
#results = data5_relevance.copy(deep=True)
#results.sort_values(by='rel')

#print('DEBUG: id(data5_relevance):', id(data5_relevance))
#print('DEBUG: id(results):', id(results))
#print('DEBUG: len(results):', len(results))
#print('')

#for fn in results['filename']:
#    print('DEBUG: fn=', fn)

#print("DEBUG: df=", df)
#print("DEBUG: df.index=", df.index)
#print("DEBUG: df['filename']=", df['filename'])
#print('')

#results2 = DataFrame(index=results['filename'])
#print("DEBUG: results2=", results2)
#print("DEBUG: results2.index=", results2.index)

#print(results2)

#for ix in results.index:
#    #print('DEBUG: ix=', ix)
#    fn = results['filename'].get(ix)
#    rel = results['rel'].get(ix)
#    notes = results['notes'].get(ix)
#    print('DEBUG: ix=', ix, ', fn=', fn, ', rel=', rel, ', notes=', notes)
#    #results2.append({
#    #    'filename': fn,
#    #    'rel': rel,
#    #    'notes': notes,
#    #}, ignore_index=True)
#    
#    results2[fn]['rel'] = rel
#    results2[fn]['notes'] = notes
#    # TODO

In [18]:
#data5_relevance

In [19]:
#df

In [20]:
# Tutorial: Merge and Join dataframes with Panda
# <https://www.shanelynn.ie/merge-join-dataframes-python-pandas-index-1/>

# Reference: <https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.merge.html>

# Merge data5_relevance and df on filename
result = pd.merge(data5_relevance,
                  df,
                  on='filename')

# result

In [21]:
# TODO: Still debugging

#type(data2_full['L'])
#data2_full['L'].values
#data2_full['L'].values + data2_full['W'].values
#str(data2_full['L'])

# TODO: Should nest inside df_append_aggregate_LWE
def get_aggregate_LWE(dataframe, index):
    # print('DEBUG: get_aggregate_LWE(dataframe={0}, index={1})'.format(id(dataframe), index))
    try:
        this_l = int(dataframe['L'][index])
    except ValueError:
        this_l = '?'
    # print('DEBUG: this_l=', this_l)
    try:
        this_w = int(dataframe['W'][index])
    except ValueError:
        this_w = '?'
    # print('DEBUG: this_w=', this_w)
    try:
        this_e = int(dataframe['E'][index])
    except ValueError:
        this_e = '?'
    # print('DEBUG: this_e=', this_e)
    s = "{0}/{1}/{2}".format(this_l, this_w, this_e)
    return s

#int(data2_full['W'][1])
#print(get_aggregate_LWE(data2_full, 0))
#print(get_aggregate_LWE(data2_full, 3))
#data2_full.count()

def df_append_aggregate_LWE(dataframe, col_name):
    """Append to dataframe a new column col_name
    with strings 'L/W/E' created as the aggregation of columns L, W and E"""
    col = []
    for k in dataframe.index:
        #print("DEBUG: k=", k)
        entry = get_aggregate_LWE(dataframe, k)
        #print("DEBUG: entry=", entry)
        col.append(entry)
    #print("DEBUG: col=", col)
    dataframe[col_name] = col

         
# Create a new column 'LWE_full' with the aggregation of columns 'L'/'W'/'E'
df_append_aggregate_LWE(data2_full, "LWE_full")
#data2_full.head()
data2_full
#
# Join data2_full on filename
result = pd.merge(result, data2_full, on='filename')
#result

# Create a new column 'data3_LWE_minimal' with the aggregation of columns 'L'/'W'/'E'
df_append_aggregate_LWE(data3_minimized, "LWE_minimized")
#data3_minimized.head()
data3_minimized
#
# Join data3_minimize on filename
result = pd.merge(result, data3_minimized, on='filename')
#result

In [22]:
#result.sort_values(by='rel')
#x = result.sort_values(by='rel')[['filename', 'rel', 'notes', 'path', 'confidence']] # , 'description'
x = result.sort_values(by='rel')[['filename', 'rel', 'notes', 'path', 'confidence', 'LWE_full', 'LWE_minimized']] # , 'description'

# type(x)    # ==> pandas.core.frame.DataFrame
# x

In [23]:
# See <https://stackoverflow.com/questions/25698448/how-to-embed-html-into-ipython-output>

from IPython.core.display import display, HTML
display(HTML('<h3>Result: Prioritized list of Coccinelle scripts</h3>'))
display(HTML(x.to_html()))

Unnamed: 0,filename,rel,notes,path,confidence,LWE_full,LWE_minimized
22,kfreeaddr.cocci,1,,scripts/coccinelle/free,High,?/?/?,?/?/?
42,ifaddr.cocci,1,,scripts/coccinelle/misc,High,?/?/?,?/?/?
40,cstptr.cocci,1,,scripts/coccinelle/misc,High,?/?/?,?/?/?
54,kmerr.cocci,2,,scripts/coccinelle/null,High,?/?/?,?/?/?
53,eno.cocci,2,,scripts/coccinelle/null,High,?/?/?,?/?/?
18,devm_free.cocci,2,Beware of false positives,scripts/coccinelle/free,Moderate,?/?/?,?/?/?
31,flags.cocci,2,,scripts/coccinelle/locks,High,?/?/?,?/?/?
46,of_table.cocci,2,,scripts/coccinelle/misc,Medium,?/?/?,?/?/?
45,noderef.cocci,2,,scripts/coccinelle/misc,High,?/?/?,?/?/?
21,kfree.cocci,2,Beware of false positives,scripts/coccinelle/free,Moderate,?/?/?,?/?/?
