This is the code for breaking dratstat

apache · Aug 13, 2018 · a24e05b · a24e05b
1 parent d8df871
commit a24e05b
Show file tree

Hide file tree

Showing 3 changed files with 8 additions and 174 deletions.
diff --git a/distribution/src/main/resources/bin/dratstats.py b/distribution/src/main/resources/bin/dratstats.py
@@ -268,7 +268,7 @@ def run(repos_list, output_dir):
 			print("\nOODT Started: OK\n")
 
 			print('Adding repository: '+str(rep)+' to Solr')
-			index_solr(json.dumps([rep]))
+			# index_solr(json.dumps([rep]))
 
 
 			print("\nRunning DRAT on " + rep["repo"] + " ...\n")
@@ -295,178 +295,9 @@ def run(repos_list, output_dir):
 					wait_for_job("urn:drat:MimePartitioner")
 					wait_for_job("urn:drat:RatCodeAudit")
 					stats['map_end'] = current_datetime()
-
-					if retval:
-						time.sleep(5)
-						stats['reduce_start'] = current_datetime()
-
-						# Extract data from RatAggregate File
-						totalNotes = 0
-						totalBinaries = 0
-						totalArchives = 0
-						totalStandards = 0
-						totalApache = 0
-						totalGenerated = 0
-						totalUnknown = 0
-
-						rat_dir = os.getenv("DRAT_HOME") + "/data/archive/rat"
-
-						# Iterate over all RAT log files 
-						for root, dirs, files in os.walk(rat_dir):
-							for filename in files:
-								if filename.endswith(".log"):
-									(notes, binaries, archives,standards,apachelicensed,generated,unknown) = parseFile(os.path.join(root, filename))
-									totalNotes = totalNotes + notes
-									totalBinaries = totalBinaries + binaries
-									totalArchives = totalArchives + archives
-									totalStandards = totalStandards + standards
-									totalApache = totalApache + apachelicensed
-									totalGenerated = totalGenerated + generated
-									totalUnknown = totalUnknown + unknown
-
-						stats["license_Notes"] = totalNotes
-						stats["license_Binaries"] = totalBinaries
-						stats["license_Archives"] = totalArchives
-						stats["license_Standards"] = totalStandards
-						stats["license_Apache"] = totalApache
-						stats["license_Generated"] = totalGenerated
-						stats["license_Unknown"] = totalUnknown
-
-						stats['reduce_end'] = current_datetime()
-						print "\nDRAT Scan Completed: OK\n"
-
-			time.sleep(5)
-
-			if retval:
-				# Copy Data with datetime variables above, extract output from RatAggregate file, extract data from Solr Core
-				printnow ("\nCopying data to Solr and Output Directory...\n")
-
-				# Extract data from Solr
-				neg_mimetype = ["image", "application", "text", "video", "audio", "message", "multipart"]
-				connection = urllib2.urlopen(os.getenv("SOLR_URL") + "/drat/select?q=*%3A*&rows=0&facet=true&facet.field=mimetype&wt=python&indent=true")
-
-				response = eval(connection.read())
-				mime_count = response["facet_counts"]["facet_fields"]["mimetype"]
-
-				for i in range(0, len(mime_count), 2):
-					if mime_count[i].split("/")[0] not in neg_mimetype:
-						stats["mime_" + mime_count[i]] = mime_count[i + 1]
-
-
-				# Count the number of files
-				stats["files"] = count_num_files(rep["repo"], ".git")
-
-				# Write data into Solr
-				stats["type"] = 'software'
-				stats_data = []
-				stats_data.append(stats)
-				json_data = json.dumps(stats_data)
-				index_solr(json_data)
-
-				# Parse RAT logs
-				rat_logs_dir = os.getenv("DRAT_HOME") + "/data/archive/rat/*/*.log"
-				rat_license = {}
-				rat_header = {}
-				for filename in glob.glob(rat_logs_dir):
-					#print('=' * 20)
-					l = 0
-					h = 0
-					cur_file = ''
-					cur_header = ''
-					cur_section = ''
-					parsedHeaders = False
-					parsedLicenses = False
-
-					with open(filename, 'rb') as f:
-						printnow('Parsing rat log: ['+filename+']')
-						for line in f:
-							if '*****************************************************' in line:
-								l = 0
-								h = 0
-								if cur_section == 'licenses':
-									parsedLicenses = True
-								if cur_section == 'headers':
-									parsedHeaders = True
-
-								cur_file = ''
-								cur_header = ''
-								cur_section = ''
-							if line.startswith('  Files with Apache') and not parsedLicenses:
-								cur_section = 'licenses'
-							if line.startswith(' Printing headers for ') and not parsedHeaders:
-								cur_section = 'headers'
-							if cur_section == 'licenses':
-								l += 1
-								if l > 4:
-									line = line.strip()
-									if line:
-										print("File: %s with License Line: %s" % (filename, line))
-										li = parse_license(line)
-										rat_license[li[0]] = li[1]
-									 	print(li)
-							if cur_section == 'headers':
-								if '=====================================================' in line or '== File:' in line:
-									h += 1
-								if h == 2:
-									cur_file = line.split("/")[-1].strip()
-								if h == 3:
-									cur_header += line
-								if h == 4:
-									rat_header[cur_file] = cur_header.split("\n", 1)[1]
-									cur_file = ''
-									cur_header = ''
-									h = 1
-					if h == 3:
-						rat_header[cur_file] = cur_header.split("\n", 1)[1]
-					parsedHeaders = True
-					parsedLicenses = True
-
-				# Index RAT logs into Solr
-				connection = urllib2.urlopen(os.getenv("SOLR_URL") +
-											 "/drat/select?q=*%3A*&fl=filename%2Cfilelocation%2Cmimetype&wt=python&rows="
-											 + str(stats["files"]) +"&indent=true")
-				response = eval(connection.read())
-				docs = response['response']['docs']
-				file_data = []
-				batch = 100
-				dc = 0
-
-				for doc in docs:
-					fdata = {}
-					fdata['id'] = os.path.join(doc['filelocation'][0], doc['filename'][0])
-					m = md5.new()
-					m.update(fdata['id'])
-					hashId = m.hexdigest()
-					fileId = hashId+"-"+doc['filename'][0]
-
-					if fileId not in rat_license:
-						print "File: "+str(fdata['id'])+": ID: ["+fileId+"] not present in parsed licenses => Likely file copying issue. Skipping."
-						continue #handle issue with DRAT #93
-
-					fdata["type"] = 'file'
-					fdata['parent'] = rep["repo"]
-					fdata['mimetype'] = doc['mimetype'][0]
-					fdata['license'] = rat_license[fileId]
-					if fileId in rat_header:
-						fdata['header'] = rat_header[fileId]
-					file_data.append(fdata)
-					dc += 1
-					if dc % batch == 0:
-						json_data = json.dumps(file_data)
-						index_solr(json_data)
-						file_data = []
-				if dc % batch != 0:
-					json_data = json.dumps(file_data)
-					index_solr(json_data)
-
-				# Copying data to Output Directory
-				repos_out = output_dir + "/" + normalize_path(rep["repo"])
-				shutil.copytree(os.getenv("DRAT_HOME") + "/data", repos_out)
-				print("\nData copied to Solr and Output Directory: OK\n")
-
-			else:
-				print ("\nDRAT Scan Completed: Resulted in Error\n")
-
+					print ("\nwaiting for Rat Aggregator...\n")
+					wait_for_job("urn:drat:RatAggregator")
+
 
 			time.sleep(5)
 			print ("\nStopping OODT...\n")

diff --git a/nohup.out b/nohup.out
@@ -0,0 +1,3 @@
+Started dynamic workflow with id '6453cca6-9f30-11e8-b99d-f5018c8e9233'
+
+Navigate to http://localhost:8080/opsui/ to view the OODT browser and http://localhost:8080/solr to view the Solr catalog.
diff --git a/webapps/proteus-new/src/main/webapp/resources/src/components/statisticscomp.vue b/webapps/proteus-new/src/main/webapp/resources/src/components/statisticscomp.vue
@@ -156,7 +156,7 @@ the License.
         return this.stat.crawledfiles/this.stat.numOfFiles *100;
       },
       indexingprogress(){
-        return this.stat.indexedfiles/this.stat.numberOfFiles * 100;
+        return this.stat.indexedfiles/this.stat.numOfFiles * 100;
       }
     }
 }