In [37]:
import pandas as pd
import numpy as np
from radon.visitors import ComplexityVisitor
from radon.complexity import cc_rank
from radon.metrics import h_visit, mi_visit
from radon.raw import analyze

# Загрузка данных багфиксов

In [38]:
path_to_data = '/home/kks/zephyr_data/buggy_code/'
source_code_data = pd.read_pickle(path_to_data + 'bugfixes_test.pickle')

In [39]:
import re

def removeExtraSpaces(snippet):
    number_of_spaces_to_remove = re.search('\S', snippet.split('\n')[0]).start(0)
    return '\n'.join([snippet_line[number_of_spaces_to_remove:] 
                      for snippet_line in snippet.split('\n')])


def refineSnippet(snippet):
    try:
        ComplexityVisitor.from_code(snippet)
        return snippet
    except:
        try:
            snippet_with_removed_extra_spaces = removeExtraSpaces(snippet)
            ComplexityVisitor.from_code(snippet_with_removed_extra_spaces)
            return snippet_with_removed_extra_spaces
        except:
            return np.nan

In [40]:
source_code_data['before_merge'] = source_code_data['before_merge'].apply(refineSnippet)
source_code_data['after_merge'] = source_code_data['after_merge'].apply(refineSnippet)

In [41]:
source_code_data = source_code_data.loc[source_code_data['before_merge'].notna()]
source_code_data = source_code_data.loc[source_code_data['after_merge'].notna()]

# Функции вычисления метрик

In [42]:
def computeCyclomaticComplexity(snippet):
    return ComplexityVisitor.from_code(snippet).functions_complexity


def isFunction(snippet):
    return not ComplexityVisitor.from_code(snippet).functions[0].is_method


def computeHastadMetrics(snippet):
    hastad_metrics = h_visit(snippet).total
    return {metric:getattr(hastad_metrics, metric) for metric in dir(hastad_metrics) 
            if (not metric.startswith('_')) and ('index' not in metric) and ('count' not in metric)}


def computeRawMetrics(snippet):
    raw_metrics = analyze(snippet)
    return {metric:getattr(raw_metrics, metric) for metric in dir(raw_metrics) 
            if (not metric.startswith('_')) and ('index' not in metric) and ('count' not in metric)}


def computeMIindex(snippet):
    return mi_visit(snippet, multi=False)

# Цикломатическая сложность

In [43]:
source_code_data['cc_before'] = source_code_data['before_merge'].apply(computeCyclomaticComplexity)
source_code_data['cc_after'] = source_code_data['after_merge'].apply(computeCyclomaticComplexity)
source_code_data['is function_before'] = source_code_data['before_merge'].apply(isFunction)
source_code_data['is function_after'] = source_code_data['after_merge'].apply(isFunction)
source_code_data['cc_rank_before'] = source_code_data['cc_before'].apply(cc_rank)
source_code_data['cc_rank_after'] = source_code_data['cc_before'].apply(cc_rank)

# Метрики Хастада

In [44]:
hastad_metrics_before = source_code_data['before_merge'].apply(computeHastadMetrics)
hastad_metrics_after = source_code_data['after_merge'].apply(computeHastadMetrics)

In [45]:
for col in hastad_metrics_before.iloc[-1].keys():
    source_code_data[col + '_before'] = hastad_metrics_before.apply(lambda x: x[col])
    source_code_data[col + '_after'] = hastad_metrics_after.apply(lambda x: x[col])

# Сырые метрики

In [46]:
raw_metrics_before = source_code_data['before_merge'].apply(computeRawMetrics)
raw_metrics_after = source_code_data['after_merge'].apply(computeRawMetrics)

In [47]:
for col in raw_metrics_before.iloc[-1].keys():
    source_code_data[col + '_before'] = raw_metrics_before.apply(lambda x: x[col])
    source_code_data[col + '_after'] = raw_metrics_after.apply(lambda x: x[col])

# Индекс

In [48]:
source_code_data['MI_before'] = source_code_data['before_merge'].apply(computeMIindex)
source_code_data['MI_after'] = source_code_data['after_merge'].apply(computeMIindex)

In [49]:
source_code_data.to_pickle(path_to_data + 'bugfixes_test_metrics.pickle')

In [50]:
source_code_data

Unnamed: 0,before_merge,after_merge,url,bug type,bug description,bug filename,bug function_name,bug lines,full_traceback,traceback_type,...,loc_before,loc_after,multi_before,multi_after,single_comments_before,single_comments_after,sloc_before,sloc_after,MI_before,MI_after
353547,def remove_lb_backend_address_pool_address(cmd...,def remove_lb_backend_address_pool_address(cmd...,https://github.com/Azure/azure-cli/issues/14342,CWE-248: Uncaught Exception,Uncaught Exception when adding an address to a...,src/azure-cli/azure/cli/command_modules/networ...,remove_lb_backend_address_pool_address,[5],john@Azure:~$ az network lb address-pool addre...,AttributeError,...,7,9,0,0,0,0,7,9,77.880583,71.425380
355403,def split_action(arguments):\n class SplitA...,def split_action(arguments):\n class SplitA...,https://github.com/Azure/azure-cli/issues/793,CWE-754: Improper Check for Unusual or Excepti...,Attribute `arg.name` of `namespace` can be a s...,src/azure-cli-core/azure/cli/core/commands/arm.py,add_id_parameters.split_action,[16],'str' object has no attribute 'append'\nTraceb...,AttributeError,...,23,30,5,5,0,0,15,22,83.765366,83.845833
379188,"def parse_series(self, data, **kwargs):\n l...","def parse_series(self, data, **kwargs):\n l...",https://github.com/Flexget/Flexget/issues/2276,CWE-754: Improper Check for Unusual or Excepti...,"`guess_result.get('title')` can return None, t...",flexget/plugins/parsers/parser_guessit.py,ParserGuessit.parse_series,,2018-12-10 19:39 DEBUG parser_guessit movin...,AttributeError,...,154,156,0,0,12,12,136,138,40.457703,40.026548
99259,"def __init__(self, **kwargs):\n # Save valu...","def __init__(self, **kwargs):\n # Save valu...",https://github.com/GenericMappingTools/pygmt/i...,CWE-754: Improper Check for Unusual or Excepti...,There can be no value for key in default. No c...,pygmt/modules.py,config.__init__,[6],pygmt-session [ERROR]: Syntax error: Unrecogni...,pygmt.exceptions.GMTCLibError,...,13,40,0,0,2,2,10,37,100.000000,83.132187
403629,"def dump_checkpoint(self, weights_only: bool =...","def dump_checkpoint(self, weights_only: bool =...",https://github.com/PyTorchLightning/pytorch-li...,CWE-754: Improper Check for Unusual or Excepti...,There is no check if scaler is None,pytorch_lightning/trainer/training_io.py,TrainerIOMixin.dump_checkpoint,[46],Running command:\npython pipe/train_cnn.py\n/h...,AttributeError,...,68,68,6,6,11,11,37,37,76.393348,75.571340
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19489,"def process(self):\n """"""\n The Ransomwar...","def process(self):\n """"""\n The Ransomwar...",https://github.com/certtools/intelmq/issues/967,CWE-167: Improper Handling of Additional Speci...,when processing a variable io.StringIO(raw_rep...,intelmq/bots/parsers/abusech/parser_ransomware.py,AbuseCHRansomwaretrackerParserBot.process,[11],"Traceback (most recent call last):\nFile ""<std...",_csv.Error,...,44,45,5,5,0,0,35,36,47.541555,47.000054
19586,@staticmethod\ndef sanitize(value):\n value...,@staticmethod\ndef sanitize(value):\n value...,https://github.com/certtools/intelmq/issues/1022,CWE-237: Improper Handling of Structural Eleme...,when calling sanitize from self.__sanitize_val...,intelmq/lib/harmonization.py,FQDN.sanitize,[3],'source_queue': 'shadowserver-parser-dns-open-...,UnicodeError,...,5,5,0,0,0,0,5,5,100.000000,100.000000
20894,"def __init__(self, execer, ctx, **kwargs):\n ...","def __init__(self, execer, ctx, **kwargs):\n ...",https://github.com/xonsh/xonsh/issues/1070,CWE-232: Improper Handling of Undefined Values,"when working with the xonsh shell, a PromptToo...",xonsh/base_shell.py,BaseShell.__init__,"[5, 6]",cryzed@arch ~ $ xonsh update-aur-gits.xsh\nTra...,AttributeError,...,14,13,0,0,0,0,14,13,100.000000,100.000000
20902,"def cd(args, stdin=None):\n """"""Changes the ...","def cd(args, stdin=None):\n """"""Changes the ...",https://github.com/xonsh/xonsh/issues/1667,"CWE-126: Buffer Over-read, CWE-248: Uncaught E...",the xonsh emulates command line and Python in ...,xonsh/dirstack.py,cd,"[45, 46]","Traceback (most recent call last):\nFile ""C:\U...",IndexError,...,56,56,4,4,1,1,48,48,50.684319,50.342384
