Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions code/parsing-engine/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,18 @@ plugins {
repositories {
// Use JCenter for resolving dependencies.
jcenter()
mavenCentral()
}

dependencies {
implementation group: 'org.apache.lucene', name: 'lucene-core', version: '8.1.0'
implementation group: 'org.apache.lucene', name: 'lucene-queryparser', version: '8.1.0'
implementation group: 'org.apache.lucene', name: 'lucene-analyzers-common', version: '8.1.0'
implementation group: 'com.googlecode.json-simple', name: 'json-simple', version: '1.1.1'




// Use JUnit Jupiter API for testing.
implementation 'commons-lang:commons-lang:2.6'
implementation 'edu.stanford.nlp:stanford-corenlp:4.3.1'
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
package edu.illinois.phantom.analysisengine;

import edu.illinois.phantom.model.UserQuery;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.SimpleAnalyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.search.*;
import org.apache.lucene.store.FSDirectory;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;

import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.nio.file.Paths;
import java.util.*;
import java.util.logging.Logger;


public class ScoringEngine {
private static final Logger LOGGER = Logger.getLogger(ScoringEngine.class.getName());

private static Analyzer analyzer = new SimpleAnalyzer();
private IndexWriter writer;
private ArrayList<File> queue = new ArrayList<>();

ScoringEngine() throws IOException {
FSDirectory dir = FSDirectory.open(Paths.get(getClass().getResource("/CORPUS").getFile()));
IndexWriterConfig config = new IndexWriterConfig(analyzer);
writer = new IndexWriter(dir, config);
}

public void indexFilesDirectory() throws IOException {

addFiles(new File(getClass().getResource("/CORPUS").getFile()));


queue.forEach(file -> {
try {

FileReader fr = new FileReader(file);
Object obj = new JSONParser().parse(fr);
JSONObject jo = (JSONObject) obj;
String location = (String) jo.get("location");
JSONArray ja = (JSONArray) jo.get("skills");
String allSkills = " ";
Iterator itr2 = ja.iterator();
Document document = new Document();
document.add(new StringField("path", file.getPath(), Field.Store.YES));
document.add(new StringField("filename", file.getName(), Field.Store.YES));
document.add(new StringField("location", location, Field.Store.YES));
while (itr2.hasNext()) {
Iterator<Map.Entry> itr1 = ((Map) itr2.next()).entrySet().iterator();
int duration=0;
String skill = null;
while (itr1.hasNext()) {
Map.Entry pair = itr1.next();
if(pair.getKey().toString().equalsIgnoreCase("duration")) {
duration = Integer.parseInt(pair.getValue().toString());
}
if(pair.getKey().toString().equalsIgnoreCase("skill")) {
skill = pair.getValue().toString();
}

}
allSkills = allSkills + skill;
String skills = skill.toUpperCase()+"_FIELD";
//document.add(new LegacyIntField(skills, duration ,Field.Store.YES));

document.add(new IntPoint(skills, duration));
document.add(new StoredField(skills,duration));
document.add(new StringField("allSkills", allSkills, Field.Store.YES));
writer.addDocument(document);
}
fr.close();
}
catch (Exception e) {
e.printStackTrace();
}

});

queue.clear();
writer.commit();
writer.close();


}

public Set<String> searchQuery(List<UserQuery> userQuery) throws IOException {
IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(getClass().getResource("/CORPUS").getFile())));
IndexSearcher searcher = new IndexSearcher(reader);

// Query query = IntRange.newWithinQuery("Java_FIELD",new int[] {5},new int[] {Integer.MAX_VALUE});
// Query query2 = IntRange.newWithinQuery("Kafka_FIELD", new int[] {1},new int[] {Integer.MAX_VALUE});
// Query query3 = IntRange.newWithinQuery("Angular_FIELD", new int[] {10},new int[] {Integer.MAX_VALUE});

/*Query query = NumericRangeQuery.newIntRange("Java_FIELD",5,50,true,true);
Query query2 = NumericRangeQuery.newIntRange("Kafka_FIELD", 1,30,true,true);
Query query3 = NumericRangeQuery.newIntRange("Angular_FIELD", 10,100,true,true);
query.setBoost((float) 2.0);

BooleanQuery booleanQuery = new BooleanQuery();
booleanQuery.add(query, BooleanClause.Occur.SHOULD);
booleanQuery.add(query2, BooleanClause.Occur.SHOULD);
booleanQuery.add(query3, BooleanClause.Occur.SHOULD);

*/
BooleanQuery.Builder builder = new BooleanQuery.Builder();
userQuery.forEach(inputQuery -> {
Query query;
if(inputQuery.isMandatorySkill()) {
query = new BoostQuery(IntPoint.newRangeQuery(inputQuery.getSkill(), inputQuery.getMinExperience()
, Integer.MAX_VALUE), (float) inputQuery.getMinExperience());
}
else {
query = IntPoint.newRangeQuery(inputQuery.getSkill(), inputQuery.getMinExperience()
, Integer.MAX_VALUE);
}

builder.add(query,BooleanClause.Occur.SHOULD);
});

BooleanQuery booleanQuery = builder.build();

TopScoreDocCollector collector = null;
HashSet<String> resultset = new LinkedHashSet<>();

try {
collector = TopScoreDocCollector.create(100,Integer.MAX_VALUE); //Scoring for all the documents.
searcher.search(booleanQuery, collector);
ScoreDoc[] hits = collector.topDocs().scoreDocs;

for (int i = 0; i < Math.min(50, hits.length); ++i) {
int docId = hits[i].doc;
Document d = searcher.doc(docId);
resultset.add(d.get("location"));
String location = d.get("location");
System.out.println("File location--->>>" + location + " Score-->>>" + hits[i].score);
}
}
catch (Exception e) {
e.printStackTrace();
}
System.out.println("Result Document-->>" + resultset);
return resultset;
}

private void addFiles(File file) {

if (!file.exists()) {
System.out.println(file + " does not exist.");
}
if (file.isDirectory()) {
for (File f : file.listFiles()) {
addFiles(f);
}
} else {
String filename = file.getName().toLowerCase();
// ===================================================
// Only index text files
// ===================================================
if (filename.endsWith(".json")) {
queue.add(file);
} else {
System.out.println("Skipped " + filename);
}
}
}

public static void main(String args[]) throws IOException {
ScoringEngine scoringEngine = new ScoringEngine();
scoringEngine.indexFilesDirectory();
//TODO: Remove Later
UserQuery query1 = new UserQuery("JAVA",15,true);
UserQuery query2 = new UserQuery("KAFKA",5,true);
UserQuery query3 = new UserQuery("ANGULAR",2,false);

ArrayList<UserQuery> userQueryArrayList = new ArrayList<>();
userQueryArrayList.add(query1);
userQueryArrayList.add(query2);
userQueryArrayList.add(query3);

scoringEngine.searchQuery(userQueryArrayList);
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
package edu.illinois.phantom.model;

import lombok.Builder;
import lombok.ToString;

@Builder
@ToString
public class UserQuery {
private String skill;
private int minExperience;
boolean mandatorySkill;

public UserQuery(String skill, int minExperience, boolean mandatorySkill) {
this.skill = skill.toUpperCase()+"_FIELD";
this.minExperience = minExperience;
this.mandatorySkill = mandatorySkill;
}

public String getSkill() {
return skill;
}

public void setSkill(String skill) {
this.skill = skill.toUpperCase()+"_FIELD";;
}

public int getMinExperience() {
return minExperience;
}

public void setMinExperience(int minExperience) {
this.minExperience = minExperience;
}

public boolean isMandatorySkill() {
return mandatorySkill;
}

public void setMandatorySkill(boolean mandatorySkill) {
this.mandatorySkill = mandatorySkill;
}
}
5 changes: 5 additions & 0 deletions code/parsing-engine/src/main/resources/CORPUS/REsume_Java.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Java 15
Javascript 2
Spark 5
ExtJS 1
Angular 1
4 changes: 4 additions & 0 deletions code/parsing-engine/src/main/resources/CORPUS/Resume_UI.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
ExtJS 5
Angular 10
Java 2
Javascript 2