Skip to content
This repository has been archived by the owner. It is now read-only.
Permalink
Browse files
- remaining fixes to make #93 functional
 - still some issues with DRAT viz
   viz application
1. expects to have some "objects" in Solr
of type: "project"
i get the concept
a project has a repo, and in the repo is the results of an audit
so, type:project->type:software (stats), looks those up
then it looks up all type:file
which contain the audit results (license header parsing)
however
even if I hack it to display
a project (even though drat stats doesn't index any objects of that type
in solr)
, it only "sort of works" and in doing so
a bunch of stuff is messed up
  • Loading branch information
chrismattmann committed Jul 20, 2017
1 parent a37c0b2 commit a219e8aa3c324ece6a19d87fe6b01eaee16c9023
Showing 6 changed files with 78 additions and 22 deletions.
@@ -87,6 +87,8 @@
<exclude>${groupId}:${parent.artifactId}-pcs-services</exclude>
<exclude>${groupId}:${parent.artifactId}-fmprod</exclude>
<exclude>${groupId}:${parent.artifactId}-solr-webapp</exclude>
<exclude>${groupId}:${parent.artifactId}-proteus</exclude>
<exclude>${groupId}:${parent.artifactId}-viz</exclude>
<exclude>org.apache.tika:tika-app</exclude>
</excludes>
</dependencySet>
@@ -31,6 +31,7 @@
import xmlrpclib
import getopt
import glob
import md5

# Check for environment variables
def check_env_var():
@@ -204,8 +205,8 @@ def parse_license(s):
li = arr[0].strip()
if li in li_dict:
li = li_dict[li]
#return [arr[1].split("/")[-1].strip().replace("_|_", "/"), li]
return ["/" + arr[1], li]

return [arr[1].split("/")[-1], li]


# Index into Solr
@@ -362,7 +363,7 @@ def run(repos_list, output_dir):
if '=====================================================' in line or '== File:' in line:
h += 1
if h == 2:
cur_file = line.split("/")[-1].strip().replace("_|_", "/")
cur_file = line.split("/")[-1].strip()
if h == 3:
cur_header += line
if h == 4:
@@ -382,18 +383,25 @@ def run(repos_list, output_dir):
file_data = []
batch = 100
dc = 0

for doc in docs:
fdata = {}
fdata['id'] = os.path.join(doc['filelocation'][0], doc['filename'][0])
if fdata['id'] not in rat_license:
print "File: "+str(fdata['id'])+" not present in parsed licenses => Likely file copying issue. Skipping."
m = md5.new()
m.update(fdata['id'])
hashId = m.hexdigest()
fileId = hashId+"-"+doc['filename'][0]

if fileId not in rat_license:
print "File: "+str(fdata['id'])+": ID: ["+fileId+"] not present in parsed licenses => Likely file copying issue. Skipping."
continue #handle issue with DRAT #93

fdata["type"] = 'file'
fdata['parent'] = repository
fdata['mimetype'] = doc['mimetype'][0]
fdata['license'] = rat_license[fdata['id']]
if fdata['id'] in rat_header:
fdata['header'] = rat_header[fdata['id']]
fdata['license'] = rat_license[fileId]
if fileId in rat_header:
fdata['header'] = rat_header[fileId]
file_data.append(fdata)
dc += 1
if dc % batch == 0:
@@ -0,0 +1,58 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.oodt.cas.pge.staging;

import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.util.logging.Logger;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.oodt.cas.filemgr.structs.exceptions.DataTransferException;
import org.apache.oodt.cas.pge.metadata.PgeMetadata;

import com.google.common.io.Files;

public class HashingOrigFileStager extends FileManagerFileStager {

private static final Logger LOG = Logger
.getLogger(HashingOrigFileStager.class.getName());

/*
* (non-Javadoc)
*
* @see
* org.apache.oodt.cas.pge.staging.FileManagerFileStager#stageFile(java.net.
* URI, java.io.File, org.apache.oodt.cas.pge.metadata.PgeMetadata,
* java.util.logging.Logger)
*/
@Override
public void stageFile(URI stageFile, File destDir, PgeMetadata pgeMetadata,
Logger logger)
throws IOException, DataTransferException, InstantiationException {
super.stageFile(stageFile, destDir, pgeMetadata, logger);
String appendUri = DigestUtils.md5Hex(new File(stageFile).getAbsolutePath());
String fromPath = destDir + File.separator
+ new File(stageFile).getName();
String toPath = destDir + File.separator + appendUri + "-"
+ new File(stageFile).getName();
LOG.info("Orig File Path: [" + stageFile.toString() + "]: MD5 Hash: ["
+ appendUri + "]: renaming: [" + fromPath + "] to: [" + toPath + "]");
Files.move(new File(fromPath), new File(toPath));
}

}
@@ -37,6 +37,7 @@

<!-- automatically perform file staging -->
<metadata key="ForceStaging" val="true"/>
<metadata key="PGETask_FileStager" val="org.apache.oodt.cas.pge.staging.HashingOrigFileStager"/>

<!-- helpful keys -->
<metadata key="LessThan" val="&#x3C;"/>
@@ -116,8 +116,8 @@
<module>crawler</module>
<module>pge</module>
<module>solr</module>
<module>distribution</module>
<module>webapps</module>
<module>proteus</module>
<module>distribution</module>
</modules>
</project>
@@ -13,19 +13,6 @@
<name>DRAT Webapp (D3 Viz)</name>
<artifactId>dms-viz</artifactId>
<packaging>war</packaging>

<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-war-plugin</artifactId>
<version>2.1.1</version>
<configuration>
</configuration>
</plugin>
</plugins>
</build>

<dependencies>
</dependencies>
</project>

0 comments on commit a219e8a

Please sign in to comment.