Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(argparse): Shifted from argparse to plac #75

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,4 @@ include atarashi/data/Ngram_keywords.json

prune .git
prune venv
prune test*

prune test*
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -195,4 +195,4 @@ This will generate file in `docs/_build/html`. Go to: index.html

You can change the theme of the documentation by changing `html_theme` in config.py file in `docs/` folder.
You can choose from {'alabaster', 'classic', 'sphinxdoc', 'scrolls', 'agogo', 'traditional', 'nature', 'haiku', 'pyramid', 'bizstyle'}
[Reference](https://www.sphinx-doc.org/en/master/usage/theming.html)
[Reference](https://www.sphinx-doc.org/en/master/usage/theming.html)
2 changes: 1 addition & 1 deletion atarashi/agents/atarashiAgent.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,4 +72,4 @@ def exactMatcher(licenseText, licenses):
output.append(licenses.iloc[idx]['shortname'])
if not output:
return -1
return output
return output
32 changes: 14 additions & 18 deletions atarashi/agents/cosineSimNgram.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
import argparse
import plac
from enum import Enum
import itertools
import json
Expand Down Expand Up @@ -184,24 +184,16 @@ def setSimAlgo(self, newAlgo):
self.simType = newAlgo


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("processedLicenseList", help="Specify the processed license list file")
parser.add_argument("ngramJson", help="Specify the location of NGRAM JSON")
parser.add_argument("inputFile", help="Specify the input file which needs to be scanned")
parser.add_argument("-s", "--similarity", required=False, default="BigramCosineSim",
choices=["CosineSim", "DiceSim", "BigramCosineSim"],
help="Specify the similarity algorithm that you want")
parser.add_argument("-v", "--verbose", help="increase output verbosity",
action='count', default=0)
args = parser.parse_args()

licenseList = args.processedLicenseList
ngramJsonLoc = args.ngramJson
inputFile = args.inputFile
simType = args.similarity
verbose = args.verbose
@plac.annotations(
licenseList = plac.Annotation("Specify the processed license list file", "positional", None, str, metavar="processedLicenseList"),
ngramJsonLoc = plac.Annotation("Specify the location of NGRAM JSON", metavar="ngramJson"),
inputFile = plac.Annotation("Specify the input file which needs to be scanned"),
similarity = plac.Annotation("Specify the similarity algorithm that you want", "option", "s", str, ["CosineSim", "DiceSim", "BigramCosineSim"], metavar="{CosineSim,DiceSim,BigramCosineSim}"),
verbose = plac.Annotation("increase output verbosity", "flag", "v")
)

def main(licenseList, ngramJsonLoc, inputFile, similarity="BigramCosineSim", verbose=False):
simType = similarity
scanner = NgramAgent(licenseList, ngramJson=ngramJsonLoc, verbose=verbose)
if simType == "CosineSim":
scanner.setSimAlgo(NgramAgent.NgramAlgo.cosineSim)
Expand All @@ -215,3 +207,7 @@ def setSimAlgo(self, newAlgo):
print("N-Gram identifier and " + str(simType) + " is " + str(result))
else:
print("Result is nothing")


if __name__ == "__main__":
plac.call(main)
28 changes: 14 additions & 14 deletions atarashi/agents/dameruLevenDist.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""

import argparse
import plac
import sys

from pyxdameraulevenshtein import damerau_levenshtein_distance
Expand Down Expand Up @@ -62,18 +62,18 @@ def scan(self, filePath):
return temp[0]


if __name__ == "__main__":
print("The file has been run directly")
parser = argparse.ArgumentParser()
parser.add_argument("inputFile", help="Specify the input file which needs to be scanned")
parser.add_argument("processedLicenseList",
help="Specify the processed license list file which contains licenses")
parser.add_argument("-v", "--verbose", help="increase output verbosity",
action="count", default=0)
args = parser.parse_args()
filename = args.inputFile
licenseList = args.processedLicenseList
verbose = args.verbose
@plac.annotations(
filename = plac.Annotation("Specify the input file which needs to be scanned", metavar="inputFile"),
licenseList = plac.Annotation("Specify the processed license list file which contains licenses", "positional", None, str, metavar="processedLicenseList"),
verbose = plac.Annotation("increase output verbosity", "flag", "v")
)


def main(filename, licenseList, verbose=False):
print("The file has been run directly")
scanner = DameruLevenDist(licenseList, verbose=verbose)
print("License Detected using Dameru Leven Distance: " + str(scanner.scan(filename)))
print("License Detected using Dameru Leven Distance: " + str(scanner.scan(filename)))


if __name__ == "__main__":
plac.call(main)
30 changes: 12 additions & 18 deletions atarashi/agents/tfidf.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
__author__ = "Aman Jain"
__email__ = "amanjain5221@gmail.com"

import argparse
import plac
from enum import Enum
import itertools
import time
Expand Down Expand Up @@ -151,28 +151,22 @@ def setSimAlgo(self, newAlgo):
self.algo = newAlgo


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("-s", "--tfidf_similarity", required=False,
default="ScoreSim",
choices=["CosineSim", "ScoreSim"],
help="Specify the similarity algorithm that you want")
parser.add_argument("inputFile", help="Specify the input file which needs to be scanned")
parser.add_argument("processedLicenseList",
help="Specify the processed license list file which contains licenses")
parser.add_argument("-v", "--verbose", help="increase output verbosity",
action="count", default=0)
args = parser.parse_args()

tfidf_similarity = args.tfidf_similarity
filename = args.inputFile
licenseList = args.processedLicenseList
verbose = args.verbose
@plac.annotations(
filename = plac.Annotation("Specify the input file which needs to be scanned", metavar="inputFile"),
licenseList = plac.Annotation("Specify the processed license list file which contains licenses", "positional", None, str, metavar="processedLicenseList"),
tfidf_similarity = plac.Annotation("Specify the similarity algorithm that you want", "option", "s", str, ["CosineSim", "ScoreSim"], metavar="{CosineSim,ScoreSim}"),
verbose = plac.Annotation("increase output verbosity", "flag", "v")
)


def main(filename, licenseList, tfidf_similarity="ScoreSim", verbose=False):
scanner = TFIDF(licenseList, verbose=verbose)
if tfidf_similarity == "CosineSim":
scanner.setSimAlgo(TFIDF.TfidfAlgo.cosineSim)
print("License Detected using TF-IDF algorithm + cosine similarity " + str(scanner.scan(filename)))
else:
scanner.setSimAlgo(TFIDF.TfidfAlgo.scoreSim)
print("License Detected using TF-IDF algorithm + sum score " + str(scanner.scan(filename)))

if __name__ == "__main__":
plac.call(main)
27 changes: 13 additions & 14 deletions atarashi/agents/wordFrequencySimilarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
__author__ = "Aman Jain"
__email__ = "amanjain5221@gmail.com"

import argparse
import plac
import re

from atarashi.agents.atarashiAgent import AtarashiAgent, exactMatcher
Expand Down Expand Up @@ -79,19 +79,18 @@ def scan(self, filePath):
return temp


if __name__ == "__main__":
print("The file has been called from main")
parser = argparse.ArgumentParser()
parser.add_argument("inputFile", help = "Specify the input file which needs to be scanned")
parser.add_argument("processedLicenseList",
help = "Specify the processed license list file which contains licenses")
parser.add_argument("-v", "--verbose", help = "increase output verbosity",
action = "count", default = 0)

args = parser.parse_args()
filename = args.inputFile
licenseList = args.processedLicenseList
verbose = args.verbose
@plac.annotations(
filename = plac.Annotation("Specify the input file which needs to be scanned", metavar="inputFile"),
licenseList = plac.Annotation("Specify the processed license list file which contains licenses", "positional", None, str, metavar="processedLicenseList"),
verbose = plac.Annotation("increase output verbosity", "flag", "v")
)


def main(filename, licenseList, verbose=False):
print("The file has been called from main")
scanner = WordFrequencySimilarity(licenseList, verbose = verbose)
print("The result from Histogram similarity algo is ", scanner.scan(filename))


if __name__ == "__main__":
plac.call(main)
52 changes: 23 additions & 29 deletions atarashi/atarashii.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
import argparse
import plac
import os
import json
import os
Expand All @@ -35,7 +35,9 @@
__version__ = "0.0.11"


def atarashii_runner(inputFile, processedLicense, agent_name, similarity="CosineSim", ngramJsonLoc=None, verbose=None):


def atarashii_runner(inputFile, agent_name, processedLicense, similarity="CosineSim", ngram_json=None, verbose=None):
'''
:param inputFile: Input File for scanning of license
:param processedLicense: Processed License List (CSV) path (Default path already provided)
Expand Down Expand Up @@ -70,7 +72,7 @@ def atarashii_runner(inputFile, processedLicense, agent_name, similarity="Cosine
print("Please choose similarity from {CosineSim,ScoreSim}")
return -1
elif agent_name == "Ngram":
scanner = NgramAgent(processedLicense, ngramJson=ngramJsonLoc)
scanner = NgramAgent(processedLicense, ngramJson=ngram_json)
if similarity == "CosineSim":
scanner.setSimAlgo(NgramAgent.NgramAlgo.cosineSim)
elif similarity == "DiceSim":
Expand All @@ -89,44 +91,32 @@ def atarashii_runner(inputFile, processedLicense, agent_name, similarity="Cosine
raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), inputFile)


def main():
@plac.annotations(
agent_name = plac.Annotation("Name of the agent that needs to be run", "option", "a", str, ["wordFrequencySimilarity", "DLD", "tfidf", "Ngram"], metavar="{wordFrequencySimilarity,DLD,tfidf,Ngram}"),
inputFile = plac.Annotation("Specify the input file path to scan", "positional", None, str, metavar="inputFile"),
processedLicense = plac.Annotation("Specify the location of processed license list file", "option", "l", str, metavar="PROCESSEDLICENSELIST"),
ngram_json = plac.Annotation("Specify the location of Ngram JSON (for Ngram agent only)", "option", "j"),
similarity = plac.Annotation("Specify the similarity algorithm that you want. First 2 are for TFIDF and last 3 are for Ngram", "option", "s", str, ["ScoreSim", "CosineSim", "DiceSim", "BigramCosineSim"], metavar="{ScoreSim,CosineSim,DiceSim,BigramCosineSim}"),
verbose = plac.Annotation("increase output verbosity", "flag", "v")
)

def evaluate(inputFile, processedLicense, ngram_json, agent_name="wordFrequencySimilarity", similarity="CosineSim", verbose=False):
'''
Calls atarashii_runner for each file in the folder/ repository specified by user
Prints the Input file path and the JSON output from atarashii_runner
'''
defaultProcessed = resource_filename("atarashi", "data/licenses/processedLicenses.csv")
defaultJSON = resource_filename("atarashi", "data/Ngram_keywords.json")
parser = argparse.ArgumentParser()
parser.add_argument("inputFile", help="Specify the input file path to scan")
parser.add_argument("-l", "--processedLicenseList", required=False,
help="Specify the location of processed license list file")
parser.add_argument("-a", "--agent_name", required=True,
choices=['wordFrequencySimilarity', 'DLD', 'tfidf', 'Ngram'],
help="Name of the agent that needs to be run")
parser.add_argument("-s", "--similarity", required=False, default="CosineSim",
choices=["ScoreSim", "CosineSim", "DiceSim", "BigramCosineSim"],
help="Specify the similarity algorithm that you want."
" First 2 are for TFIDF and last 3 are for Ngram")
parser.add_argument("-j", "--ngram_json", required=False,
help="Specify the location of Ngram JSON (for Ngram agent only)")
parser.add_argument("-v", "--verbose", help="increase output verbosity",
action="count", default=0)
parser.add_argument('-V', '--version', action='version', version='%(prog)s ' + __version__)
args = parser.parse_args()
inputFile = args.inputFile
agent_name = args.agent_name
similarity = args.similarity
verbose = args.verbose
processedLicense = args.processedLicenseList
ngram_json = args.ngram_json

if processedLicense is None:
processedLicense = defaultProcessed
if ngram_json is None:
ngram_json = defaultJSON
if similarity is None:
similarity = "CosineSim"

try:
result = atarashii_runner(inputFile, processedLicense, agent_name, similarity, ngram_json, verbose)
result = atarashii_runner(inputFile, agent_name, processedLicense, similarity, ngram_json, verbose)
if result != -1:
if agent_name == "wordFrequencySimilarity":
result = [{
Expand All @@ -150,5 +140,9 @@ def main():
print("Error: " + e.strerror+ ": '" + e.filename + "'")


def main():
plac.call(evaluate)


if __name__ == '__main__':
main()
plac.call(evaluate)
21 changes: 8 additions & 13 deletions atarashi/build_deps.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
__author__ = "Gaurav Mishra"
__email__ = "gmishx@gmail.com"

import argparse
import plac
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.realpath(__file__)) + '/../')
Expand All @@ -40,7 +40,12 @@
The merged CSV is then processesed which is then used to create the Ngrams.
"""

def download_dependencies(threads = os.cpu_count(), verbose = 0):
@plac.annotations(
threads = plac.Annotation("No of threads to use for download. Default: CPU count", "option", "t", int, metavar="THREADS"),
verbose = plac.Annotation("increase output verbosity", "flag", "v")
)

def download_dependencies(threads = os.cpu_count(), verbose = False):
currentDir = os.path.dirname(os.path.abspath(__file__))
licenseListCsv = currentDir + "/data/licenses/licenseList.csv"
processedLicenseListCsv = currentDir + "/data/licenses/processedLicenses.csv"
Expand All @@ -59,14 +64,4 @@ def download_dependencies(threads = os.cpu_count(), verbose = 0):
createNgrams(processedLicenseListCsv, ngramJsonLoc, threads, verbose)

if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("-t", "--threads", required = False, default = os.cpu_count(),
type = int,
help = "No of threads to use for download. Default: CPU count")
parser.add_argument("-v", "--verbose", help = "increase output verbosity",
action = "count", default = 0)
args = parser.parse_args()
threads = args.threads
verbose = args.verbose

download_dependencies(threads, verbose)
plac.call(download_dependencies)
Loading