## [tool] 3. PR Vulnerability Reviewer

In [1]:
import codecs
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import re
import random
import colorama
from colorama import Fore, Back, Style
from IPython.display import clear_output
from ast import literal_eval

import settings
from utils import vplot, vprint

In [2]:
FRAMEWORKS = ['tensorflow','opencv','pytorch','keras', 'caffe']
FILTERED_DIR = settings.DATA_CONFIG['distilled_dir']

In [3]:
def read_csv(framework):
    filename = FILTERED_DIR + f'vuln_{framework}_pr+commit.csv'
    df = pd.read_csv(filename)
    return df

In [4]:
dfs = {}
for framework in FRAMEWORKS:
    dfs[framework] = read_csv(framework)

### Parse Commit + PR

In [5]:
def highlighter(text, keyword):
    l = re.split(keyword, text)
    if len(l) <= 0:
        return text
    text = f'{Style.BRIGHT + Fore.RED + keyword + Fore.RESET + Style.NORMAL}'.join(l)
    return text

In [6]:
def parse_list_string(l):
    l = re.split(r"[\'|\"],\s[\'|\"]", l.replace("\\n","\n").replace("\\t",'\t').replace('[','').replace(']',''))
    l = map(lambda x: x.replace("'",'').replace('"',''), l)
    return list(l)

In [7]:
def print_pr(pr, framework):
    keyword = pr['keyword']
    title = highlighter(text=pr['title'],keyword=keyword)
    description = highlighter(text=pr['description'],keyword=keyword)
    pr_number = pr['pr_number']
    pr_html = pr['pr_html']
    commit_urls = parse_list_string(pr['commit_html'])
    commit_msgs = parse_list_string(pr['message'])
    changed_files = np.unique(parse_list_string(pr['changed_files']))
    
    print(f"Framework: {framework} | Number: {pr_number} | URL: {pr_html} | Keyword: {keyword}")
    print(f"\n## Title\n{title}")
    print(f"\n## Description\n{description}\n")
    print(f"\n## Message (Total:{len(commit_urls)})")
    for index, message in enumerate(commit_msgs):
        print(f'[Commit {index+1}]  URL: {commit_urls[index]}')
        print(highlighter(text=message,keyword=keyword), '\n')
    print('------------|')
    
    print(f"\n## File Involve (Total:{len(changed_files)})")
    for file in changed_files:
        print(file)

## Preview

In [8]:
def preview(dfs, pr_number, framework):
    df = dfs[framework]
    try:
        pr = df[df['pr_number'] == pr_number].iloc[0]
        print_pr(pr=pr, framework=framework)
    except Exception:
        print("No matched record")

In [10]:
framework = 'opencv'
pr_number = 23112.0



preview(dfs=dfs,pr_number=pr_number,framework=framework)

Framework: opencv | Number: 23112 | URL: https://github.com/opencv/opencv/pull/23112 | Keyword: segmentation fault

## Title
DNN: fix possible [1m[31msegmentation fault[39m[22m error in winograd on x86

## Description
The [1m[31msegmentation fault[39m[22m error only happens in AVX only platform, while the `CV_TRY_AVX2` is true and `checkHardwareSupport(CPU_AVX2)` is false. And this inconsistency will cause Winograd branch code to enter the wrong memory block. And in AVX only platform, the Winograd can not be speeded up by AVX or AVX2 which will be slower than the generic Convolution branch. So, in short term, I think disabling Winograd is a better solution.

And for a long-term solution, we should support Winograd at https://github.com/opencv/opencv/blob/4.x/modules/dnn/src/layers/layers_common.simd.hpp. After that, OpenCV can automatically generate AVX or AVX2 code for Winograd based on CPU instruction set. I will try to implement this.

detailed discussion: https://github.com