# Test inspection for openhub repositories

This notebook is focused on extracting test cases from the multiple repositories for OpenHub  in GitHub in order to analysise the kind of test developers applied. We can also classify those test by groupping them according to the intention.

In [8]:
import os
from os.path import join as pjoin
import ast
import pandas as pd
from collections import defaultdict   


In [6]:
components_dir="../repos/"
components=[app for app in os.listdir(components_dir) if "." not in app]

In [7]:
components

['openhab1-addons',
 'openhab-util',
 'openhab-core',
 'openhab-syno-spk',
 'openhab-alexa',
 'openhab-docs',
 'quercus-osgi',
 'openhab-linuxpkg',
 'openhab-docker',
 'openhab-snap',
 'openhab-deps-repo',
 'openhab-windows',
 'openhab-pebble',
 'nrjavaserial',
 'openhab-bundles',
 'openhab-qnap-qpkg',
 'openhabian',
 'static-code-analysis',
 'openhab-cloud',
 'openhab-ios',
 'openhab-distro',
 'openhab-android',
 'openhab-addons']

In [5]:
def count_methods_classes_lines(file_path):
    with open(file_path, 'r') as file:
        code = file.read()

    tree = ast.parse(code)

    # Initialize counters
    num_methods = 0
    num_classes = 0
    num_lines = len(code.splitlines())

    for node in ast.walk(tree):
        if isinstance(node, ast.FunctionDef):
            num_methods += 1
        elif isinstance(node, ast.ClassDef):
            num_classes += 1

    return num_methods, num_classes, num_lines

In [None]:
def countDict(input_list):
    # Create a defaultdict to count occurrences of each element
    count_dict = defaultdict(int)
    
    # Count occurrences of each element in the list
    for item in input_list:
        count_dict[item] += 1
    
    # Convert defaultdict to a regular dictionary
    result_dict = dict(count_dict)
    sorted_dict = {k: v for k, v in sorted(result_dict.items(), key=lambda item: item[1],reverse=True)}

    return sorted_dict

In [None]:
language_extensions = ['.java', '.py', '.js', '.rb']


In [None]:
def testCodeAnalysis(codeFolder):
    codeFiles=[]
    testCodeFiles=[]

    for root,dir,files in os.walk(codeFolder):
        for file in files:
            file_extension = file[1]
            if file_extension in language_extensions:
                if 'test' in file_extension:
                    testCodeFiles.append((root,file))
                else:
                    
                    codeFiles.append((root,file))


    matchedTests=[]
    for cRoot,cfile in codeFiles:
        for tRoot, tfile in testCodeFiles:
            if cfile in tfile and "test" in tfile:
                matchedTests.append((pjoin(cRoot,cfile),pjoin(tRoot,tfile)))
                break
            
    return matchedTests,codeFiles, testCodeFiles

In [None]:
testCodeDF=pd.DataFrame([],columns=["app","test_code_exists","test_code%","python_test_imports"])
for app in tqdm(apps):
    if not os.path.exists(pjoin(test_components_dir,app)):
        testCodeDF.loc[len(testCodeDF)]=[app,"False","N/A","N/A"]
    else:
        codeFolder=pjoin(components_dir,app)
        testCodeFolder=pjoin(test_components_dir,app)
        matchedTests,codeFiles, testCodeFiles=testCodeAnalysis(codeFolder, testCodeFolder)
        tcPerc=round(len(matchedTests)/len(codeFiles)*100,1)
        
        testCodeFiles= [t[1] for t in matchedTests]
        totalImports=get_filelist_imports(testCodeFiles)
        totalImportsString=", ".join(totalImports)
        testCodeDF.loc[len(testCodeDF)]=[app,"True",tcPerc,totalImportsString]
            
#Add code for calculating local imports
localImports=get_project_imports("test_local_imports")
localImportsString=", ".join(localImports)
testCodeDF.loc[len(testCodeDF)]=["local_imports","True","N/A",localImportsString]

In [None]:
importDict={}
allTestImports=testCodeDF["python_test_imports"].tolist()
for impString in allTestImports:
    imps=[imp.strip() for imp in impString.split(",")]
    for imp in imps:
        if imp=="":
            continue
        try:
            importDict[imp]+=1
        except:
            importDict[imp]=1

In [None]:
sorted_importDict = dict(sorted(importDict.items(), key=lambda item: item[1],reverse=True))
sorted_importDict