Skip to content
This repository has been archived by the owner on May 12, 2021. It is now read-only.

Commit

Permalink
Completed Dratstat integration with proteus all the functions of DRAT
Browse files Browse the repository at this point in the history
can be done using the proteus-new Web application
  • Loading branch information
ahmedifhaam committed Aug 3, 2018
1 parent 5aa0300 commit ea03bb7
Show file tree
Hide file tree
Showing 7 changed files with 285 additions and 23 deletions.
1 change: 1 addition & 0 deletions distribution/src/main/resources/bin/dratstats.py
Expand Up @@ -344,6 +344,7 @@ def run(repos_list, output_dir):
# Extract data from Solr
neg_mimetype = ["image", "application", "text", "video", "audio", "message", "multipart"]
connection = urllib2.urlopen(os.getenv("SOLR_URL") + "/drat/select?q=*%3A*&rows=0&facet=true&facet.field=mimetype&wt=python&indent=true")

response = eval(connection.read())
mime_count = response["facet_counts"]["facet_fields"]["mimetype"]

Expand Down
206 changes: 201 additions & 5 deletions pge/src/main/resources/bin/rat_aggregator/rat_aggregator.py
Expand Up @@ -26,6 +26,36 @@
import sys
import os
import getopt
import subprocess
import time
import shutil
import datetime
import csv
import urllib2
import json
import xmlrpclib
import getopt
import glob
import md5
import requests


def parse_license(s):
li_dict = {'N': 'Notes', 'B': 'Binaries', 'A': 'Archives', 'AL': 'Apache', '!?????': 'Unknown'}
if s and not s.isspace():
arr = s.split("/", 1)
li = arr[0].strip()
if li in li_dict:
li = li_dict[li]

if len(arr) > 1 and len(arr[1].split("/")) > 0:
return [arr[1].split("/")[-1], li]
else:
#print('split not correct during license parsing '+str(arr))
return ["/dev/null", li_dict['!?????']]
else:
#print('blank line provided to parse license ['+s+']')
return ["/dev/null", li_dict['!?????']]


def parseFile(filepath):
Expand Down Expand Up @@ -55,15 +85,39 @@ def parseFile(filepath):
if line.find('Unknown Licenses') != -1:
unknown = unknown + int(line.split(' ')[0].strip())
return (notes, binaries,archives,standards,apachelicensed,generated,unknown)

return (-1,-1,-1,-1,-1,-1,-1)

def count_num_files(path, exclude):
count = 0
for root, dirs, files in os.walk(path):
for filename in files:
if exclude not in os.path.join(root, filename):
count += 1
return count

def index_solr(json_data):
#print(json_data)
request = urllib2.Request(os.getenv("SOLR_URL") + "/statistics/update/json?commit=true")
request.add_header('Content-type', 'application/json')
urllib2.urlopen(request, json_data)

def main(argv=None):
usage = 'rat_aggregator.py logfile1 logfile2 ... logfileN'
#print("starting rat aggregator")

repo_file_url = os.getenv("DRAT_HOME") + "/data/repo"
with open(repo_file_url,'rb')as repoFile:
data = ''
for line in repoFile:
data+=line
rep = eval(data)

index_solr(json.dumps([rep]))

if len(argv) == 0:
print usage
sys.exit()
print usage
sys.exit()

totalNotes = 0
totalBinaries = 0
Expand All @@ -83,9 +137,151 @@ def main(argv=None):
totalGenerated = totalGenerated + generated
totalUnknown = totalUnknown + unknown

print "Notes,Binaries,Archives,Standards,Apache,Generated,Unknown"
print str(totalNotes)+","+str(totalBinaries)+","+str(totalArchives)+","+str(totalStandards)+","+str(totalApache)+","+str(totalGenerated)+","+str(totalUnknown)
#Additionally
stats = {}
stats["license_Notes"] = totalNotes
stats["license_Binaries"] = totalBinaries
stats["license_Archives"] = totalArchives
stats["license_Standards"] = totalStandards
stats["license_Apache"] = totalApache
stats["license_Generated"] = totalGenerated
stats["license_Unknown"] = totalUnknown



stats['id'] =rep["repo"]
retVal = True

if retVal:
# Copy Data with datetime variables above, extract output from RatAggregate file, extract data from Solr Core
#print("\nCopying data to Solr and Output Directory...\n")

# Extract data from Solr
neg_mimetype = ["image", "application", "text", "video", "audio", "message", "multipart"]
connection = requests.get(os.getenv("SOLR_URL") + "/drat/select?q=*%3A*&rows=0&facet=true&facet.field=mimetype&wt=python&indent=true")

response = eval(connection.text)
mime_count = response["facet_counts"]["facet_fields"]["mimetype"]

for i in range(0, len(mime_count), 2):
if mime_count[i].split("/")[0] not in neg_mimetype:
stats["mime_" + mime_count[i]] = mime_count[i + 1]


# Count the number of files
stats["files"] = count_num_files(rep["repo"], ".git")

# Write data into Solr
stats["type"] = 'software'
stats_data = []
stats_data.append(stats)
json_data = json.dumps(stats_data)
index_solr(json_data)

# Parse RAT logs
rat_logs_dir = os.getenv("DRAT_HOME") + "/data/archive/rat/*/*.log"
rat_license = {}
rat_header = {}
for filename in glob.glob(rat_logs_dir):
l = 0
h = 0
cur_file = ''
cur_header = ''
cur_section = ''
parsedHeaders = False
parsedLicenses = False

with open(filename, 'rb') as f:
for line in f:
if '*****************************************************' in line:
l = 0
h = 0
if cur_section == 'licenses':
parsedLicenses = True
if cur_section == 'headers':
parsedHeaders = True

cur_file = ''
cur_header = ''
cur_section = ''
if line.startswith(' Files with Apache') and not parsedLicenses:
cur_section = 'licenses'
if line.startswith(' Printing headers for ') and not parsedHeaders:
cur_section = 'headers'
if cur_section == 'licenses':
l += 1
if l > 4:
line = line.strip()
if line:
#print("File: %s with License Line: %s" % (filename, line))
li = parse_license(line)
rat_license[li[0]] = li[1]
#print(li)
if cur_section == 'headers':
if '=====================================================' in line or '== File:' in line:
h += 1
if h == 2:
cur_file = line.split("/")[-1].strip()
if h == 3:
cur_header += line
if h == 4:
rat_header[cur_file] = cur_header.split("\n", 1)[1]
cur_file = ''
cur_header = ''
h = 1
if h == 3:
rat_header[cur_file] = cur_header.split("\n", 1)[1]
parsedHeaders = True
parsedLicenses = True

# Index RAT logs into Solr
connection = requests.get(os.getenv("SOLR_URL") +
"/drat/select?q=*%3A*&fl=filename%2Cfilelocation%2Cmimetype&wt=python&rows="
+ str(stats["files"]) +"&indent=true")

response = eval(connection.text)
docs = response['response']['docs']
file_data = []
batch = 100
dc = 0

for doc in docs:
fdata = {}
fdata['id'] = os.path.join(doc['filelocation'][0], doc['filename'][0])
m = md5.new()
m.update(fdata['id'])
hashId = m.hexdigest()
fileId = hashId+"-"+doc['filename'][0]

if fileId not in rat_license:
#print "File: "+str(fdata['id'])+": ID: ["+fileId+"] not present in parsed licenses => Likely file copying issue. Skipping."
continue #handle issue with DRAT #93

fdata["type"] = 'file'
fdata['parent'] = rep["repo"]
fdata['mimetype'] = doc['mimetype'][0]
fdata['license'] = rat_license[fileId]
if fileId in rat_header:
fdata['header'] = rat_header[fileId]
file_data.append(fdata)
dc += 1
if dc % batch == 0:
json_data = json.dumps(file_data)
index_solr(json_data)
file_data = []
if dc % batch != 0:
json_data = json.dumps(file_data)
index_solr(json_data)

# Copying data to Output Directory
print ("Notes,Binaries,Archives,Standards,Apache,Generated,Unknown")
print str(totalNotes)+","+str(totalBinaries)+","+str(totalArchives)+","+str(totalStandards)+","+str(totalApache)+" ,"+str(totalGenerated)+","+str(totalUnknown)

#print("\nData copied to Solr and Output Directory: OK\n")


if __name__ == "__main__":
main(sys.argv[1:])



1 change: 1 addition & 0 deletions proteus/src/main/java/backend/FileConstants.java
Expand Up @@ -29,6 +29,7 @@ public class FileConstants {
public static final String WORKFLOW_PATH = buildDratSubdirectoryPath("/deploy/workflow/bin/wmgr-client");
public static final String DRAT_PATH = buildDratSubdirectoryPath("/deploy/bin/drat");
public static final String DRAT_TEMP_UNZIPPED_PATH = buildDratSubdirectoryPath("/deploy/data/staging");
public static final String CURRENT_REPO_DETAILS_FILE = buildDratSubdirectoryPath("/deploy/data/repo");
public static final String DRAT_TEMP_LOG_OUTPUT = buildDratSubdirectoryPath("/deploy/data/drat_output.log");
public static final String SOLR_INDEXER_CONFIG_PATH = buildDratSubdirectoryPath("/deploy/filemgr/etc/indexer.properties");

Expand Down
10 changes: 9 additions & 1 deletion proteus/src/main/java/drat/proteus/rest/DratRequestWrapper.java
Expand Up @@ -19,5 +19,13 @@

public class DratRequestWrapper {
//needed for JSON Requests
public String dirPath;
public String id;
public String repo;
public String name;
public String loc_url;
public String description;
public String type="project";



}
16 changes: 11 additions & 5 deletions proteus/src/main/java/drat/proteus/rest/DratRestResource.java
Expand Up @@ -17,12 +17,12 @@

package drat.proteus.rest;

import java.io.File;
import java.io.IOException;
import java.io.*;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.logging.Logger;

import com.google.gson.Gson;
import org.wicketstuff.rest.annotations.MethodMapping;
import org.wicketstuff.rest.annotations.parameters.RequestBody;
import org.wicketstuff.rest.contenthandling.json.webserialdeserial.GsonWebSerialDeserial;
Expand All @@ -48,19 +48,20 @@ public DratRestResource() {

@MethodMapping(value = "/go", httpMethod = HttpMethod.POST)
public void go(@RequestBody DratRequestWrapper body) throws Exception {
dratWrapper.setIndexablePath(body.dirPath);
dumpToFile(body);
dratWrapper.setIndexablePath(body.repo);
dratWrapper.go();
}

@MethodMapping(value = "/index", httpMethod = HttpMethod.POST)
public void index(@RequestBody DratRequestWrapper body) throws Exception {
dratWrapper.setIndexablePath(body.dirPath);
dratWrapper.setIndexablePath(body.repo);
dratWrapper.index();
}

@MethodMapping(value = "/crawl", httpMethod = HttpMethod.POST)
public void crawl(@RequestBody DratRequestWrapper body) throws Exception {
dratWrapper.setIndexablePath(body.dirPath);
dratWrapper.setIndexablePath(body.repo);
dratWrapper.crawl();
}

Expand Down Expand Up @@ -93,4 +94,9 @@ public String getProcessLog() {
return "Log is empty!";
}
}

public void dumpToFile(DratRequestWrapper body) throws IOException {
File repo = new File(FileConstants.CURRENT_REPO_DETAILS_FILE);
Files.write(repo.toPath(),new Gson().toJson(body).getBytes());
}
}
2 changes: 1 addition & 1 deletion webapps/proteus-new/src/main/webapp/resources/src/App.vue
Expand Up @@ -30,7 +30,7 @@ the License.
<v-list class="pa-0">
<v-list-tile avatar>
<v-list-tile-avatar>
<img src="https://randomuser.me/api/portraits/men/85.jpg">
<img src="./logo.png">
</v-list-tile-avatar>

<v-list-tile-content>
Expand Down

0 comments on commit ea03bb7

Please sign in to comment.