# Warm Up

## Imports

In [None]:
from __future__ import print_function

In [None]:
import os,sys,json
import pandas as pd
import numpy as np
from copy import deepcopy
from glob import glob as re
from copy import deepcopy as dc

!{sys.executable} -m pip install --upgrade funbelts
!{sys.executable} -m pip install --upgrade python-git-info
!{sys.executable} -m pip install --upgrade deczoo
import funbelts as ut
from gitinfo import get_git_info as get_git
from deczoo import *

import Semgrep as semgrep
import Cryptolation as cryptolation
import Bandit as bandit
import DLint as dlint
import Licma as licma

import PyScan as pyscan
import PyMetrics as pymetrics

## From tasks.py (Base)

In [None]:
#!/usr/bin/env python3
import os,sys
from invoke import task


@task
def gitr(c):
	for x in [
		'git config --global user.email "EMAIL"',
		'git config --global user.name "UserName (dev@lite)"'
	]:
		print(x);os.system(x)

@task
def cleanenv(c):
	for x in [
		'CachedExtensions/',
		'CachedExtensionVSIXs/',
		'User/',
		'Machine/',
		'extensions/',
		'logs/',
		'coder.json',
		'machineid',
	]:
		x = "yes|rm -r " + str(x)
		print(x);os.system(x)

@task
def execute(c):
	print("Executing")


## From make.py -> tasks.py

In [None]:
#!/usr/bin/env python3
import os, sys, pwd, re, uuid, pwd, json, time, platform, dis
from abc import ABC, abstractmethod
from collections.abc import Callable
from typing import List, Any
from glob import glob
from copy import copy as dc

if __name__ == '__main__':
	cmd, args = [], set(map(lambda x: x.lower().strip(), sys.argv))
	if "install" in args:
		install = lambda name: os.system(f"{sys.executable} -m pip install --upgrade {name}")
		[install(x) for x in ['pandas', 'funbelts']]

import pandas as pd
from funbelts import progressBar, isMac, mac_addr as getMacAddr, excelwriter, run as try_fail, wait_for, user, clean_string as filterForName
import funbelts as ut


def get_file_output(path, suffix):
	output = filterForName(path.replace(".py", ""))
	return output + suffix


docker_base = 'docker' if isMac() else 'sudo docker'


def mac_addr():
	file_name = "MAC_ADDRESS.txt"

	if not os.path.exists(file_name):
		with open(file_name, 'w+') as writer:
			writer.write(getMacAddr())

	return file_name

def rules():
	import Bandit as bandit
	import Licma as licma
	import DLint as dlint
	import Semgrep as semgrep
	for scan in [bandit.application(), dlint.application(), licma.application(), semgrep.application()]:
		with open(scan.name()+"_ruleset.csv","w+") as writer:
			writer.write(f"{scan.name()} ID,Cryptolation ID\n")
			try:
				for key,value in scan.rules().items():
					if value != -1:
						writer.write(f"{key},{value}\n")
			except Exception as e:
				print(e)
				pass

def indicateBase():
	file_name = str('MAC' if isMac() else 'LINUX') + ".txt"
	try_fail(f"touch {file_name}")

	mac_addr()
	return file_name


def get_docker_connect():
	if os.path.exists(f"MAC.txt"):
		running = f"/Users/{user()}/.docker/run/docker.sock"
	else:
		running = "/var/run/docker.sock"
	return running


class BaseApplicationClass(ABC):

	def __init__(self, name: str = None, links={}):
		super().__init__()
		self.__name = name
		#self.__version = version
		self.__links = links
		self.dl_folder = None
		self.__modules = None
		self.dl_name = None
		self.downloaded_files = []
		self.hash_files = {}

	def lazy_scanning(self):
		return False

	def name(self, compare: str = None):
		output = self.__name
		if compare:
			output = compare.lower() == output.lower()
		return output

	def version(self, setting_version: str = None):
		if setting_version:
			self.__version = setting_version
		return self.__version

	@property
	def links(self):
		return dc(self.__links)

	@property
	@abstractmethod
	def dl_link(self):
		pass

	@staticmethod
	def sha256sum(filename):
		#https://stackoverflow.com/questions/22058048/hashing-a-file-in-python
		import hashlib
		h  = hashlib.sha256()
		b  = bytearray(128*1024)
		mv = memoryview(b)
		with open(filename, 'rb', buffering=0) as f:
			for n in iter(lambda : f.readinto(mv), 0):
				h.update(mv[:n])
		return str(h.hexdigest())

	def verify(self, file_name):
		print(f"Verifing the file {file_name}")
		if file_name not in self.hash_files.keys():
			return False
		print(f"Current Hash: {BaseApplicationClass.sha256sum(file_name)}")
		return BaseApplicationClass.sha256sum(file_name) == self.hash_files[file_name]

	def download(self):
		output = True

		if self.dl_link is None:
			return True

		try:
			foil_name = self.dl_link.split('/')[-1]
			download_name = "DL_" + self.name() + "_" + foil_name

			if not os.path.exists(download_name):
				run_cmd = f"wget {self.dl_link};mkdir {download_name};unzip {foil_name} -d {download_name}"
				print(run_cmd);try_fail(run_cmd)
				if not self.verify(foil_name):
					print(f"The archived file {foil_name} does not match the saved hash")
					sys.exit(-5)
				os.remove(foil_name)

			fldrs = os.listdir(download_name)[0]
			self.dl_folder = os.path.join(download_name, fldrs)
			self.downloaded_files += [download_name]
		except:
			output = False
			pass

		return output

	@property
	def dl_files(self):
		return self.downloaded_files

	@property
	@abstractmethod
	def modules(self):
		pass

	@property
	def get_modules(self):
		if self.__modules is None:
			self.__modules = self.modules
		return self.__modules

	@abstractmethod
	def install(self):
		pass

	@abstractmethod
	def scan(self, file_name: str, isDir: bool = False) -> (pd.DataFrame, int):
		pass

	@abstractmethod
	def mapp(self,
			self_frame: pd.DataFrame,
			projectName: str,
			columns: List[str],
			gen,
			time_taken: str = None):
		pass

	@abstractmethod
	def clean(self) -> List[str]:
		pass

	@abstractmethod
	def mapp_rule_id(self, rule_id: str) -> str:
		pass

	def current_user(self):
		return user()

	def base_is_mac(self):
		return os.path.exists(f"MAC.txt")

	def is_docker_env(self):
		path = '/proc/self/cgroup'
		return (os.path.exists('/.dockerenv') or os.path.isfile(path) and
				any('docker' in line for line in open(path)))

#Reviewing
errorcodes = ut.arr_to_pd([
	{'ErrorCodes':1,'Description':'Error manually reviewing the context'},
	{'ErrorCodes':2,'Description':'Issue with checking the context'},
	{'ErrorCodes':3,'Description':'Not enough given context'},
])

def grab_context(file_name, line_num, check:bool=False):
	file_name = "/tests" + file_name
	if check:
		print(file_name)
	return ut.retrieve_context(file_name, line_num, patternmatch=ut.import_global_context)


def check(cur_row, lines_for_context:int=5):
	rule_info, message, context, line = cur_row['cryptolationID'], cur_row['Message'], cur_row['context'], cur_row['Line']
	TP,TN,FP,FN = 0,0,0,0
	context_lines_len = len(context.split('\n'))

	print(f"""
Current Rule :> {rule_info}
Message	  :> {message}
Error @	  :> {line}
# of Lines   :> {context_lines_len}

{context}
============================================================================================
""")

	if lines_for_context >  context_lines_len:
		print(f"The context only has {context_lines_len}, not the minimum {lines_for_context}")
		time.sleep(1)
		return (3,3,3,3)

	response = ut.to_int(input("Is this a {0}:TP / 1:FP / 2:TN / 3:FN / 4:Err?"))
	if response == None or response not in [0,1,2,3,4]:
		response = 0

	if   response in [0,4]: #TP
		TP = 1
	elif response in [1,4]: #FP
		FP = 1
	elif response in [2,4]: #TN
		TN = 1
	elif response in [3,4]: #FN
		FN = 1


	"""
	return {
		"TP":TP,
		"TN":TN,
		"FP":FP,
		"FN":FN
	}"""
	return (TP,TN,FP,FN)


row_listing,errors = [],[]


def to_review(row):
	check_one = row['ProjectType'] in ["Benchmark","Top-Ranked"]
	check_two = row['IsVuln']
	check_three = ut.is_not_empty(row['cryptolationID'])

	return check_one and check_two and check_three

def review_frame(frame: pd.DataFrame):
	for cur_row in frame.itertuples():
		row = cur_row._asdict()
		try:
			if ut.is_empty(row['context']) and row['IsVuln']: 
				row['context'] = grab_context(row['qual_name'], ut.to_int(row['Line']))
		except Exception as e:
			errors += [{
				'FileName':row['qual_name'],
				'QualLoc':row['FullQualName'],
				'Location':0,
				'Exception':str(e),
			}]

		#Already checked
		try:
			if to_review(row):
				row['TP'],row['TN'],row['FP'],row['FN'] = check(row)
		except Exception as e:
			row['TP'],row['TN'],row['FP'],row['FN'] = 2,2,2,2  
			errors += [{
				'FileName':row['qual_name'],
				'QualLoc':row['FullQualName'],
				'Location':1,
				'Exception':str(e),
			}]

		row_listing += [row]
	return ut.arr_to_pd(row_listing)
#Reviewing

def downthemall():
	import Bandit as bandit
	import Licma as licma
	import DLint as dlint
	import Semgrep as semgrep
	import Cryptolation as cryptolation
	applications = [bandit.application(), dlint.application(), licma.application(), semgrep.application(), cryptolation.application()]
	for scan in applications:
		print(scan.name())
		print(scan.download())
	return applications

def modules(applications=None):
	if applications is None:
		applications = downthemall()

	return {app.name(): app.get_modules for app in applications}

def match_data(sheet, _x:str='ToolName', _y:str='Module'):
	discovered = []

	tools = list(sheet[_x].unique())
	for mod in list(sheet[_y].unique()):
		temp = {
			'Module':mod,
			'Count':sheet[ (sheet[_y] == mod) ].shape[0]
		}
		in_tools = 0
		for tool in tools:
			searched = list(sheet[ (sheet[_x] == tool) ][_y])
			contains = mod in searched or any(x.startswith(mod) for x in searched) 

			temp[tool] = contains
			in_tools += int(contains)

		temp["InTools"] = in_tools

		discovered += [
			temp
		]
	return ut.arr_to_pd(discovered)

def bare_break_down(name, mod_string):
	output = []

	running = ""
	for sub_mod in mod_string.split("."):
		running += sub_mod
		output += [{
			'ToolName':name,
			'Module':running
		}]
		running += "."

	return ut.arr_to_pd(output)

def breaking_down_modules(apps=pd.DataFrame()):
	output = []

	for app, app_mods in modules(apps).items():
		for app_mod in list(set(app_mods)):
			output += ut.pd_to_arr(bare_break_down(app, app_mod))

	return ut.arr_to_pd(output)

if __name__ == '__main__':
	cmd, args, exe, printy = [], set(map(lambda x: x.lower().strip(), sys.argv)), False, []

	if any({"python", "run", "scan"} & args):
		cmd = [
			'jupyter nbconvert LiveTests.ipynb --to python',
			#'jupyter nbconvert LiveTests.ipynb --to python', 'ipython LiveTests.py', 'rm LiveTests.py'
			#'jupyter nbconvert --execute --to notebook --inplace --allow-errors --ExecutePreprocessor.timeout=-1 LiveTests.ipynb'
		]
		printy = ['ipython3 --no-banner --no-confirm-exit --quick -i LiveTests.py']
	elif any({"docker", "build"} & args):
		indicateBase()
		try_fail(f"echo DATA={os.path.abspath(os.curdir)} > env.txt")
		printy = [
			docker_base +
			' run --rm -it  -v "`pwd`:/sync" -v ' + get_docker_connect() +
			':/var/run/docker.sock  frantzme/pytesting:latest'
		]
	elif "clean" in args:
		cmd = [
		    "rm *.txt", "rm *.csv", "rm *.pkl", "rm Makefile", "rm *.xlsx.zip",
		    "rm *.xlsx", "rm *_SPLIT_*py", "rm *.json", "rm TEMP_*",
		    "rm LiveTests.py",
		    "find . -maxdepth 1 -type d -name '[a-zA-Z]*' -exec rm -r {} \;"
		]
	elif any({"downall"} & args):
		downthemall()
	elif any({"modules"} & args):
		os.system(f"apt-get update && yes|apt-get install wget")
		applications = downthemall()
		breaking_down_modules(applications).to_csv("modules.csv", index=False)
		for app in applications:
			for dl_files in app.dl_files:
				cmd += [
					f"yes|rm -r {dl_files}"
				]
	elif any({"semgrep_info"} & args):
		os.system(f"apt-get update && yes|apt-get install wget")
		import json
		import Semgrep
		semgrep = Semgrep.application()

		saved_file, contents = "semgrep_contents.json", None

		if not os.path.exists(saved_file):
			semgrep.download()
			contents = semgrep.grab_contents
		else:
			with open("semgrep_contents.json","w+") as writer:
				contents = json.load(writer)

		mapped = []
		for x in contents:
			mapped += semgrep.content_property(x)

		ut.arr_to_pd(mapped).to_csv("SemGrep_Identifiable_Info.csv")
	elif any({"test_info"} & args):
		folder_name = "00_ToScan"
		dir_to_check = os.path.join(os.path.abspath(os.curdir),folder_name)
		dir_check_name = dir_to_check+"/"
		filtered_by_folder = lambda foil: foil.replace(os.path.basename(foil),'') != dir_check_name
		test_module_info = []

		container = []
		for foil in ut.file_by_type(dir_to_check,".py"):
			print(foil)
			if filtered_by_folder(foil) and "ipynb" not in foil:
				folder_disection = foil.replace(os.path.basename(foil),'').replace(dir_check_name,'').replace('/','')
				pattern_type, test_type = folder_disection.split('_')
				has_pattern = pattern_type == 'pattern'

				foil_name = os.path.basename(foil)
				rule = int(foil_name.split('rule_')[-1].split('_')[0])
				is_fieldsensitive = 'Field-Sensitive' in foil_name
				is_global = 'Global' in foil_name
				is_interprocedural = 'Interprocedural' in foil_name
				is_dblinterprocedural = 'InterproceduralViaReturn' in foil_name
				is_pathsensitive = 'Path-Sensitive' in foil_name

				with open(foil,'r') as reader:
					contents = ''.join(reader.readlines())

				try:
					imports = ':'.join([__.argval for __ in dis.get_instructions(contents) if 'IMPORT_NAME' in __.opname])
				except Exception as e:
					print(f"Error reading the file:> {e}")
					print(contents)
					print("="*15)
					imports = None
					while ut.is_empty(imports):
						imports = input("Please manually enter all of the base import names delimited by a colon:> ").strip()
						print()

					print("="*20)

				for imp in imports.split(':'):
					test_module_info += ut.pd_to_arr(bare_break_down(foil, imp))

				container += [{
					'FileName':foil_name,
					'FileDir':os.path.dirname(foil).replace(dir_check_name,''),
					'Rule':rule,
					'HasPattern':has_pattern,
					'TestType':test_type,
					"FieldSensitive":is_fieldsensitive,
					"Global":is_global,
					"InterProcedural":is_interprocedural,
					"DBLInterprocedural":is_dblinterprocedural,
					"PathSensitive":is_pathsensitive,

					"FieldSensitive_INT":int(is_fieldsensitive),
					"Global_INT":int(is_global),
					"InterProcedural_INT":int(is_interprocedural),
					"DBLInterprocedural_INT":int(is_dblinterprocedural),
					"PathSensitive_INT":int(is_pathsensitive),

					"Runnable": None,#is_runnable(foil),
					"Imports":imports,
					"Contents":contents
				}]
		ut.arr_to_pd(container).to_csv(f"{folder_name}_FileInfo_Container.csv")
		ut.arr_to_pd(test_module_info).to_csv(f"{folder_name}_TestModule.csv")
	elif any({"merged_test_info"} & args):
		"""
		Grabbing the modules from the files
		"""
		os.system(f"apt-get update && yes|apt-get install wget p7zip-full")
		applications = downthemall()
		application_modules = breaking_down_modules(applications)

		from pprint import pprint
		for app in application_modules:
			pprint(app)

		if input("Continue? [y/n]> ").strip().lower() != 'y':
			sys.exit(-1)

		folder_name = "00_ToScan"
		dir_to_check = os.path.join(os.path.abspath(os.curdir),folder_name)
		dir_check_name = dir_to_check+"/"
		filtered_by_folder = lambda foil: foil.replace(os.path.basename(foil),'') != dir_check_name
		test_module_info = []

		container = []
		for foil in ut.file_by_type(dir_to_check,".py"):
			print(foil)
			if filtered_by_folder(foil) and "ipynb" not in foil:
				with open(foil,'r') as reader:
					contents = ''.join(reader.readlines())

				try:
					imports = ':'.join([__.argval for __ in dis.get_instructions(contents) if 'IMPORT_NAME' in __.opname])
				except Exception as e:
					print(f"Error reading the file:> {e}")
					print(contents)
					print("="*15)
					imports = None
					while ut.is_empty(imports):
						imports = input("Please manually enter all of the base import names delimited by a colon:> ").strip()
						print()

					print("="*20)

				for imp in imports.split(':'):
					test_module_info += ut.pd_to_arr(bare_break_down(foil, imp))

		tests_pd = ut.arr_to_pd(test_module_info)
		if list(application_modules.columns) != list(tests_pd.columns):
			print("The keys are not the same")
			print(application_modules.columns)
			print(tests_pd.columns)
			sys.exit(0)
		else:
			print("This next step make take a little while")
			merged = ut.arr_to_pd( ut.pd_to_arr(application_modules) + ut.pd_to_arr(tests_pd) )

		merged.to_csv(f"Merged_Test_Info.csv")
		cur_matched = match_data(merged).to_csv(f"Merged_Matched.csv")
		with ut.xcyl("Combination.xlsx") as writer:
			writer.add_frame("Merged_Test_Info", merged)
			writer.add_frame("Merged_Matched", cur_matched)

		for app in applications:
			for dl_files in app.dl_files:
				cmd += [
					f"yes|rm -r {dl_files}"
				]
	for run_cmd in cmd:
		print(run_cmd);try_fail(run_cmd)
	for run_cmd in printy:
		print(run_cmd)


In [None]:
ut.set_mito()
botID,chatID = "2130746695:AAHzqwcww-3d5sZmHmds1fG41WDIFeJgTlc","656252903"

## Setup and Prep

In [None]:
data_types = {
	"projecttype":str,
	"projectname":str,
	"projecturl":str,
	"qual_name":str,
	"tool_name":str,
	"time_taken":int,
	"Program_Lines":int,
	"Total_Lines":int,
	"Number_of_Imports":str, 
	"MCC":int,
	"IsVuln":bool,
	"Name":str,
	"OG_ID":str,
	"cryptolationID":int,
	"Message":str,
	"Line":int,
	"severity":str,
	"extra_info":str,
	"confidence":str,
	"context":str,
	"TP":int,
	"FP":int,
	"TN":int,
	"FN":int
}

def gen(
	projecttype=np.NaN,
	projectname=np.NaN,
	projecturl=np.NaN,
	qual_name=np.NaN,
	tool_name=np.NaN,
	time_taken=np.NaN,
	Program_Lines=np.NaN,
	Total_Lines=np.NaN,
	Number_of_Imports=np.NaN,
	MCC=np.NaN,
	IsVuln=np.NaN,
	Name=np.NaN,
	OG_ID=np.NaN,
	cryptolationID=np.NaN,
	Message=np.NaN,
	Line=np.NaN,
	severity=np.NaN,
	extra_info=np.NaN,
	confidence=np.NaN,
	context=np.NaN,
	TP=np.NaN,
	FP=np.NaN,
	TN=np.NaN,
	FN=np.NaN,
	METRIC_FILE="None"
):
	if isinstance(METRIC_FILE,pd.DataFrame):
		dyct = METRIC_FILE.to_dict()
		def grab(val):
			if val in dyct and dyct[val] != {}:
				return dyct[val][0]
			return np.NaN
		
		return {
			"projecttype":ut.to_int(ut.from_nan(projecttype) or grab('projecttype'),return_self=True),
			"projectname":ut.to_int(ut.from_nan(projectname) or grab('projectname'),return_self=True),
			"projecturl":ut.to_int(ut.from_nan(projecturl) or grab('projecturl'),return_self=True),
			"qual_name":ut.to_int(ut.from_nan(qual_name) or grab('qual_name'),return_self=True),
			"tool_name":ut.to_int(ut.from_nan(tool_name) or grab('tool_name'),return_self=True),
			"time_taken":ut.to_int(ut.from_nan(time_taken) or grab('time_taken'),return_self=True),
			"Program_Lines":ut.to_int(ut.from_nan(Program_Lines) or grab('Program_Lines'),return_self=True),
			"Total_Lines":ut.to_int(ut.from_nan(Total_Lines) or grab('Total_Lines'),return_self=True),
			"Number_of_Imports":ut.to_int(ut.from_nan(Number_of_Imports) or grab('Number_of_Imports'),return_self=True),
			"MCC":ut.to_int(ut.from_nan(MCC) or grab('MCC'),return_self=True),
			"IsVuln":ut.to_int(ut.from_nan(IsVuln) or grab('IsVuln'),return_self=True),
			"Name":ut.to_int(ut.from_nan(Name) or grab('Name'),return_self=True),
			"OG_ID":ut.to_int(ut.from_nan(OG_ID) or grab('OG_ID'),return_self=True),
			"cryptolationID":ut.to_int(ut.from_nan(cryptolationID) or grab('cryptolationID'),return_self=True),
			"Message":ut.to_int(ut.from_nan(Message) or grab('Message'),return_self=True),
			"Line":ut.to_int(ut.from_nan(Line) or grab('Line'),return_self=True),
			"severity":ut.to_int(ut.from_nan(severity) or grab('severity'),return_self=True),
			"extra_info":ut.to_int(ut.from_nan(extra_info) or grab('extra_info'),return_self=True),
			"confidence":ut.to_int(ut.from_nan(confidence) or grab('confidence'),return_self=True),
			"context":ut.to_int(ut.from_nan(context) or grab('context'),return_self=True),
			"TP":ut.to_int(ut.from_nan(TP) or grab('TP'),return_self=True),
			"FP":ut.to_int(ut.from_nan(FP) or grab('FP'),return_self=True),
			"TN":ut.to_int(ut.from_nan(TN) or grab('TN'),return_self=True),
			"FN":ut.to_int(ut.from_nan(FN) or grab('FN'),return_self=True)
		}
	else:
		return {
			"projecttype":ut.to_int(projecttype,return_self=True),
			"projectname":ut.to_int(projectname,return_self=True),
			"projecturl":ut.to_int(projecturl,return_self=True),
			"qual_name":ut.to_int(qual_name,return_self=True),
			"tool_name":ut.to_int(tool_name,return_self=True),
			"time_taken":ut.to_int(time_taken,return_self=True),
			"Program_Lines":ut.to_int(Program_Lines,return_self=True),
			"Total_Lines":ut.to_int(Total_Lines,return_self=True),
			"Number_of_Imports":ut.to_int(Number_of_Imports,return_self=True),
			"MCC":ut.to_int(MCC,return_self=True),
			"IsVuln":ut.to_int(IsVuln,return_self=True),
			"Name":ut.to_int(Name,return_self=True),
			"OG_ID":ut.to_int(OG_ID,return_self=True),
			"cryptolationID":ut.to_int(cryptolationID,return_self=True),
			"Message":ut.to_int(Message,return_self=True),
			"Line":ut.to_int(Line,return_self=True),
			"severity":ut.to_int(severity,return_self=True),
			"extra_info":ut.to_int(extra_info,return_self=True),
			"confidence":ut.to_int(confidence,return_self=True),
			"context":ut.to_int(context,return_self=True),
			"TP":ut.to_int(TP,return_self=True),
			"FP":ut.to_int(FP,return_self=True),
			"TN":ut.to_int(TN,return_self=True),
			"FN":ut.to_int(FN,return_self=True)
		}

In [None]:
def write_out(raw_msg,logger_file=f".logging_file_{ut.cur_time}.csv"):
	log_msg = ut.cur_time_ms() + " , " + raw_msg
	with open(logger_file, "a+") as appender:
		appender.write(log_msg+"\n")
	print(log_msg)
	return

disp = lambda _input: write_out(_input)
disp_msg = lambda _input: disp(_input)

def settypes(df,hard_set=False):
	for col in data_types.keys():
		sub_type = data_types[col]
		_set = hard_set
		if sub_type == bool:
			df[col]=df[col].fillna(False)
			_set = True
		elif sub_type == int:
			if _set:
				df[col]=df[col].fillna(-1)
		elif sub_type == str:
			df[col]=df[col].fillna('')
			_set = True
		
		if _set:
			df[col] = df[col].astype(sub_type)
	df.set_index(['qual_name','Line','tool_name'])
	return df

ut.run("rm *.logging_file_*.log")

# Materials Prep

In [None]:
use_json_files = False
folder_name = "00_ToScan"
if use_json_files:
	prepare = None
else:
	prepare = [{
		folder_name:{'url': None, 'tag': None, 'commit': None}
	}]


if use_json_files:
	full_libraries = re("*.json")
else:
	full_libraries = prepare

In [None]:
disp_msg("Starting to warm up the apps")
metrics, scanners = [], []

scanners += [semgrep.application()]
metrics += [pymetrics.application()]
metrics_name = "METRICS"

applications = metrics + scanners

disp_msg("Loaded up the apps")

In [None]:
unique_repos, extra_logging, raw_timing, scanningExceptions = set(), prepare is None, [], []
golden_standard = "PyMetrics"

def add_exception(name, proj_name, break_time):
	global scanningExceptions
	scanningExceptions += [ut.dyct_frame({
		"Time": ut.cur_time_ms(),
		"App": f"{name}",
		"Proj": f"{proj_name}",
		"Exception": f"Time out at {break_time} seconds"
	})]

def add_timing(proj_name, tool_name, time):
	global raw_timing
	raw_timing += [ut.dyct_frame({
			'ProjectName':proj_name,
			'ToolName':tool_name,
			'Time (s)':time
	})]

empty_frame = lambda:pd.DataFrame([gen()],columns=data_types.keys())

current_contents = {
	'Standard':None,
	'Overall':[]
}

# Running

In [None]:
for app in applications:
	disp_msg(f"Installing App {app.name()}")
	app.install()
	disp_msg(f"Finishing  App {app.name()}: {app.version()}")

## Raw Execution of Tools

In [None]:
for app_itr, app in enumerate(applications):
	name = app.name() if app in scanners else metrics_name
	print(f"Currently working on app {app.name()}")
	current_contents[app.name()] = []

	for LibraryValue in ut.progressBar( dc(full_libraries) ):
		if use_json_files:
			with open(LibraryValue,'r') as reader:
				dictionary = json.load(reader)
		else:
			dictionary = LibraryValue

		append = []
		for proj_itr, (proj_name,proj_value) in enumerate(dictionary.items()):
			proj_url, proj_tag, proj_commit = proj_value['url'] , proj_value['tag'], proj_value['commit']

			with ut.GRepo(proj_name, proj_url, proj_tag, proj_commit, delete=proj_url is not None, silent=False, local_dir = False) as repo:
				try:
					@timeout(time_limit=60 * 10) #10 minutes timeout
					def scannr(proj_name, app, app_name:str, proj_url:str):
						return app.scan(proj_name,True), True
					(app_results,app_timing),successfully_scanned = scannr(proj_name=proj_name, app=app, app_name=app.name(), proj_url=proj_url)
				except Exception as e:
					successfully_scanned, app_timing = False, -1
					add_exception(name, proj_name, 600)
					pass

				add_timing(proj_name, app.name(), app_timing)
				app_set_types = settypes(app.mapp(app_results,proj_name,columns=data_types.keys(),gen=gen,time_taken=app_timing)) if successfully_scanned else empty_frame()

				app_set_types.to_csv(f"{app.name()}_{proj_name}_FULLYSCAN.csv")
				current_contents[app.name()] = ut.frame_dycts(app_set_types)

				for row in current_contents[app.name()]:

					if git_info := get_git(proj_name):
						row['GIT_commit'] = git_info['commit']
						row['GIT_tree'] = git_info['tree']
						row['GIT_parent'] = git_info['parent']
						row['GIT_author'] = git_info['author']
						row['GIT_author_date'] = git_info['author_date']
					else:
						row['GIT_commit'] = None
						row['GIT_tree'] = None
						row['GIT_parent'] = None
						row['GIT_author'] = None
						row['GIT_author_date'] = None

					for key,value in repo.get_info().items():
						row["REPO_"+key] = value

				print(f"Current App :> {app.name()}")
				if app.name(golden_standard):
					current_contents['Standard'] = app_set_types

## Post Execution Prettifying

### Merging and collecting and dropped files by scanning tools

In [None]:
# Picking up any non scanned or itemized files
for app_itr, app in enumerate(applications):
	if app.name(golden_standard):
		for golden_standard_row in current_contents[app.name()]:
			current_contents['Overall'] += [golden_standard_row]
		continue

	current_set_to_clear = dc(current_contents['Standard'])
	def check_remove(qual_name):
		output = None
		for sub_row in current_set_to_clear.itertuples():
			row = sub_row._asdict()
			if row['qual_name'] == qual_name:
				output = sub_row
				break
		if output is not None:
			current_set_to_clear.drop(current_set_to_clear.index[output.Index])
			output = output._asdict()
		return output

	for current_scanned_file in current_contents[app.name()]:
		if alive_value := check_remove(current_scanned_file['qual_name']):

			for key in ['Total_Lines', 'Program_Lines', 'Number_of_Imports', 'MCC']:
				current_scanned_file[key] = alive_value[key]
		else:
			for key in ['Total_Lines', 'Program_Lines', 'Imports', 'MCC']:
				current_scanned_file[key] = -10

		file_name, line = current_scanned_file['qual_name'], current_scanned_file['Line']
		current_scanned_file['Context']  = ut.retrieve_context(file_name, line, 5,ut.import_global_context)

		current_contents['Overall'] += [current_scanned_file]

	for unfound_file_raw in current_set_to_clear.itertuples():
		unfound_file = unfound_file_raw._asdict()
		unfound_file['tool_name'] = app.name()
		unfound_file['vuln'] = False
		current_contents['Overall'] += [unfound_file]

In [None]:
import json
with open('raw_save.json','w+') as writer:
	json.dump(current_contents['Overall'], writer)

### Combining Total Information

In [None]:
overallContainer =  pd.DataFrame() if len(current_contents['Overall']) == 0 else ut.arr_to_pd(current_contents['Overall'])
timing = pd.DataFrame() if len(raw_timing) == 0 else ut.arr_to_pd(raw_timing)
scanningException = pd.DataFrame() if len(scanningExceptions) == 0 else ut.arr_to_pd(scanningExceptions)

# Post Mortem

In [None]:
for (frame,name) in [
	(overallContainer, 'FullyScanned'),
	(timing, 'OverallTiming'),
	(scanningException, 'ScanningExceptions')
]:
	for output_type in ["csv","pkl"]:
		ut.save_frames(frame,ut.clean_string(name),output_type)

In [None]:
foil_name = "Finalized_Results"
with ut.telegramBot(botID,chatID) as bot:
	if not foil_name.endswith(".xlsx"):
		foil_name = foil_name + ".xlsx"
	with ut.xcyl(foil_name) as writer:
		writer.addr("FullResults", overallContainer)

	bot.upload(foil_name)

# La Fin