fossology · Aman-Codes · Jan 21, 2021
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -14,5 +14,4 @@ include atarashi/data/Ngram_keywords.json
 
 prune .git
 prune venv
-prune test*
-
+prune test*
diff --git a/README.md b/README.md
@@ -195,4 +195,4 @@ This will generate file in `docs/_build/html`. Go to: index.html
 
 You can change the theme of the documentation by changing `html_theme` in config.py file in `docs/` folder.
 You can choose from {'alabaster', 'classic', 'sphinxdoc', 'scrolls', 'agogo', 'traditional', 'nature', 'haiku', 'pyramid', 'bizstyle'}
-[Reference](https://www.sphinx-doc.org/en/master/usage/theming.html)
+[Reference](https://www.sphinx-doc.org/en/master/usage/theming.html)
diff --git a/atarashi/agents/atarashiAgent.py b/atarashi/agents/atarashiAgent.py
@@ -72,4 +72,4 @@ def exactMatcher(licenseText, licenses):
       output.append(licenses.iloc[idx]['shortname'])
   if not output:
     return -1
-  return output
+  return output
diff --git a/atarashi/agents/cosineSimNgram.py b/atarashi/agents/cosineSimNgram.py
@@ -18,7 +18,7 @@
 with this program; if not, write to the Free Software Foundation, Inc.,
 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
-import argparse
+import plac
 from enum import Enum
 import itertools
 import json
@@ -184,24 +184,16 @@ def setSimAlgo(self, newAlgo):
       self.simType = newAlgo
 
 
-if __name__ == "__main__":
-  parser = argparse.ArgumentParser()
-  parser.add_argument("processedLicenseList", help="Specify the processed license list file")
-  parser.add_argument("ngramJson", help="Specify the location of NGRAM JSON")
-  parser.add_argument("inputFile", help="Specify the input file which needs to be scanned")
-  parser.add_argument("-s", "--similarity", required=False, default="BigramCosineSim",
-                      choices=["CosineSim", "DiceSim", "BigramCosineSim"],
-                      help="Specify the similarity algorithm that you want")
-  parser.add_argument("-v", "--verbose", help="increase output verbosity",
-                      action='count', default=0)
-  args = parser.parse_args()
-
-  licenseList = args.processedLicenseList
-  ngramJsonLoc = args.ngramJson
-  inputFile = args.inputFile
-  simType = args.similarity
-  verbose = args.verbose
+@plac.annotations(
+  licenseList = plac.Annotation("Specify the processed license list file", "positional", None, str, metavar="processedLicenseList"),
+  ngramJsonLoc = plac.Annotation("Specify the location of NGRAM JSON", metavar="ngramJson"),
+  inputFile = plac.Annotation("Specify the input file which needs to be scanned"),
+  similarity = plac.Annotation("Specify the similarity algorithm that you want", "option", "s", str, ["CosineSim", "DiceSim", "BigramCosineSim"], metavar="{CosineSim,DiceSim,BigramCosineSim}"),
+  verbose = plac.Annotation("increase output verbosity", "flag", "v")  
+)
 
+def main(licenseList, ngramJsonLoc, inputFile, similarity="BigramCosineSim", verbose=False):
+  simType = similarity
   scanner = NgramAgent(licenseList, ngramJson=ngramJsonLoc, verbose=verbose)
   if simType == "CosineSim":
     scanner.setSimAlgo(NgramAgent.NgramAlgo.cosineSim)
@@ -215,3 +207,7 @@ def setSimAlgo(self, newAlgo):
     print("N-Gram identifier and " + str(simType) + " is " + str(result))
   else:
     print("Result is nothing")
+
+
+if __name__ == "__main__":
+  plac.call(main)
diff --git a/atarashi/agents/dameruLevenDist.py b/atarashi/agents/dameruLevenDist.py
@@ -19,7 +19,7 @@
 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
 
-import argparse
+import plac
 import sys
 
 from pyxdameraulevenshtein import damerau_levenshtein_distance
@@ -62,18 +62,18 @@ def scan(self, filePath):
       return temp[0]
 
 
-if __name__ == "__main__":
-  print("The file has been run directly")
-  parser = argparse.ArgumentParser()
-  parser.add_argument("inputFile", help="Specify the input file which needs to be scanned")
-  parser.add_argument("processedLicenseList",
-                      help="Specify the processed license list file which contains licenses")
-  parser.add_argument("-v", "--verbose", help="increase output verbosity",
-                      action="count", default=0)
-  args = parser.parse_args()
-  filename = args.inputFile
-  licenseList = args.processedLicenseList
-  verbose = args.verbose
+@plac.annotations(
+  filename = plac.Annotation("Specify the input file which needs to be scanned", metavar="inputFile"),
+  licenseList = plac.Annotation("Specify the processed license list file which contains licenses", "positional", None, str, metavar="processedLicenseList"),
+  verbose = plac.Annotation("increase output verbosity", "flag", "v")  
+)
 
+
+def main(filename, licenseList, verbose=False):
+  print("The file has been run directly")
   scanner = DameruLevenDist(licenseList, verbose=verbose)
-  print("License Detected using Dameru Leven Distance: " + str(scanner.scan(filename)))
+  print("License Detected using Dameru Leven Distance: " + str(scanner.scan(filename)))  
+
+
+if __name__ == "__main__":
+  plac.call(main)
diff --git a/atarashi/agents/tfidf.py b/atarashi/agents/tfidf.py
@@ -22,7 +22,7 @@
 __author__ = "Aman Jain"
 __email__ = "amanjain5221@gmail.com"
 
-import argparse
+import plac
 from enum import Enum
 import itertools
 import time
@@ -151,28 +151,22 @@ def setSimAlgo(self, newAlgo):
       self.algo = newAlgo
 
 
-if __name__ == "__main__":
-  parser = argparse.ArgumentParser()
-  parser.add_argument("-s", "--tfidf_similarity", required=False,
-                      default="ScoreSim",
-                      choices=["CosineSim", "ScoreSim"],
-                      help="Specify the similarity algorithm that you want")
-  parser.add_argument("inputFile", help="Specify the input file which needs to be scanned")
-  parser.add_argument("processedLicenseList",
-                      help="Specify the processed license list file which contains licenses")
-  parser.add_argument("-v", "--verbose", help="increase output verbosity",
-                      action="count", default=0)
-  args = parser.parse_args()
-
-  tfidf_similarity = args.tfidf_similarity
-  filename = args.inputFile
-  licenseList = args.processedLicenseList
-  verbose = args.verbose
+@plac.annotations(
+  filename = plac.Annotation("Specify the input file which needs to be scanned", metavar="inputFile"),
+  licenseList = plac.Annotation("Specify the processed license list file which contains licenses", "positional", None, str, metavar="processedLicenseList"),
+  tfidf_similarity = plac.Annotation("Specify the similarity algorithm that you want", "option", "s", str, ["CosineSim", "ScoreSim"], metavar="{CosineSim,ScoreSim}"),
+  verbose = plac.Annotation("increase output verbosity", "flag", "v")  
+)
+
 
+def main(filename, licenseList, tfidf_similarity="ScoreSim", verbose=False):
   scanner = TFIDF(licenseList, verbose=verbose)
   if tfidf_similarity == "CosineSim":
     scanner.setSimAlgo(TFIDF.TfidfAlgo.cosineSim)
     print("License Detected using TF-IDF algorithm + cosine similarity " + str(scanner.scan(filename)))
   else:
     scanner.setSimAlgo(TFIDF.TfidfAlgo.scoreSim)
     print("License Detected using TF-IDF algorithm + sum score " + str(scanner.scan(filename)))
+
+if __name__ == "__main__":
+  plac.call(main)
diff --git a/atarashi/agents/wordFrequencySimilarity.py b/atarashi/agents/wordFrequencySimilarity.py
@@ -22,7 +22,7 @@
 __author__ = "Aman Jain"
 __email__ = "amanjain5221@gmail.com"
 
-import argparse
+import plac
 import re
 
 from atarashi.agents.atarashiAgent import AtarashiAgent, exactMatcher
@@ -79,19 +79,18 @@ def scan(self, filePath):
       return temp
 
 
-if __name__ == "__main__":
-  print("The file has been called from main")
-  parser = argparse.ArgumentParser()
-  parser.add_argument("inputFile", help = "Specify the input file which needs to be scanned")
-  parser.add_argument("processedLicenseList",
-                      help = "Specify the processed license list file which contains licenses")
-  parser.add_argument("-v", "--verbose", help = "increase output verbosity",
-                      action = "count", default = 0)
-
-  args = parser.parse_args()
-  filename = args.inputFile
-  licenseList = args.processedLicenseList
-  verbose = args.verbose
+@plac.annotations(
+  filename = plac.Annotation("Specify the input file which needs to be scanned", metavar="inputFile"),
+  licenseList = plac.Annotation("Specify the processed license list file which contains licenses", "positional", None, str, metavar="processedLicenseList"),
+  verbose = plac.Annotation("increase output verbosity", "flag", "v")  
+)
 
+
+def main(filename, licenseList, verbose=False):
+  print("The file has been called from main")
   scanner = WordFrequencySimilarity(licenseList, verbose = verbose)
   print("The result from Histogram similarity algo is ", scanner.scan(filename))
+
+
+if __name__ == "__main__":
+  plac.call(main)
diff --git a/atarashi/atarashii.py b/atarashi/atarashii.py
@@ -18,7 +18,7 @@
 with this program; if not, write to the Free Software Foundation, Inc.,
 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
-import argparse
+import plac
 import os
 import json
 import os
@@ -35,7 +35,9 @@
 __version__ = "0.0.11"
 
 
-def atarashii_runner(inputFile, processedLicense, agent_name, similarity="CosineSim", ngramJsonLoc=None, verbose=None):
+
+
+def atarashii_runner(inputFile, agent_name, processedLicense, similarity="CosineSim", ngram_json=None, verbose=None):
   '''
   :param inputFile: Input File for scanning of license
   :param processedLicense: Processed License List (CSV) path (Default path already provided)
@@ -70,7 +72,7 @@ def atarashii_runner(inputFile, processedLicense, agent_name, similarity="Cosine
       print("Please choose similarity from {CosineSim,ScoreSim}")
       return -1
   elif agent_name == "Ngram":
-    scanner = NgramAgent(processedLicense, ngramJson=ngramJsonLoc)
+    scanner = NgramAgent(processedLicense, ngramJson=ngram_json)
     if similarity == "CosineSim":
       scanner.setSimAlgo(NgramAgent.NgramAlgo.cosineSim)
     elif similarity == "DiceSim":
@@ -89,44 +91,32 @@ def atarashii_runner(inputFile, processedLicense, agent_name, similarity="Cosine
     raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), inputFile)
 
 
-def main():
+@plac.annotations(
+  agent_name = plac.Annotation("Name of the agent that needs to be run", "option", "a", str, ["wordFrequencySimilarity", "DLD", "tfidf", "Ngram"], metavar="{wordFrequencySimilarity,DLD,tfidf,Ngram}"),  
+  inputFile = plac.Annotation("Specify the input file path to scan", "positional", None, str, metavar="inputFile"),
+  processedLicense = plac.Annotation("Specify the location of processed license list file", "option", "l", str, metavar="PROCESSEDLICENSELIST"),
+  ngram_json = plac.Annotation("Specify the location of Ngram JSON (for Ngram agent only)", "option", "j"),
+  similarity = plac.Annotation("Specify the similarity algorithm that you want. First 2 are for TFIDF and last 3 are for Ngram", "option", "s", str, ["ScoreSim", "CosineSim", "DiceSim", "BigramCosineSim"], metavar="{ScoreSim,CosineSim,DiceSim,BigramCosineSim}"),
+  verbose = plac.Annotation("increase output verbosity", "flag", "v")  
+)
+
+def evaluate(inputFile, processedLicense, ngram_json, agent_name="wordFrequencySimilarity", similarity="CosineSim", verbose=False):
   '''
   Calls atarashii_runner for each file in the folder/ repository specified by user
   Prints the Input file path and the JSON output from atarashii_runner
   '''
   defaultProcessed = resource_filename("atarashi", "data/licenses/processedLicenses.csv")
   defaultJSON = resource_filename("atarashi", "data/Ngram_keywords.json")
-  parser = argparse.ArgumentParser()
-  parser.add_argument("inputFile", help="Specify the input file path to scan")
-  parser.add_argument("-l", "--processedLicenseList", required=False,
-                      help="Specify the location of processed license list file")
-  parser.add_argument("-a", "--agent_name", required=True,
-                      choices=['wordFrequencySimilarity', 'DLD', 'tfidf', 'Ngram'],
-                      help="Name of the agent that needs to be run")
-  parser.add_argument("-s", "--similarity", required=False, default="CosineSim",
-                      choices=["ScoreSim", "CosineSim", "DiceSim", "BigramCosineSim"],
-                      help="Specify the similarity algorithm that you want."
-                           " First 2 are for TFIDF and last 3 are for Ngram")
-  parser.add_argument("-j", "--ngram_json", required=False,
-                      help="Specify the location of Ngram JSON (for Ngram agent only)")
-  parser.add_argument("-v", "--verbose", help="increase output verbosity",
-                      action="count", default=0)
-  parser.add_argument('-V', '--version', action='version', version='%(prog)s ' + __version__)
-  args = parser.parse_args()
-  inputFile = args.inputFile
-  agent_name = args.agent_name
-  similarity = args.similarity
-  verbose = args.verbose
-  processedLicense = args.processedLicenseList
-  ngram_json = args.ngram_json
 
   if processedLicense is None:
     processedLicense = defaultProcessed
   if ngram_json is None:
     ngram_json = defaultJSON
+  if similarity is None:
+    similarity = "CosineSim"
 
   try:
-    result = atarashii_runner(inputFile, processedLicense, agent_name, similarity, ngram_json, verbose)
+    result = atarashii_runner(inputFile, agent_name, processedLicense, similarity, ngram_json, verbose)
     if result != -1:
       if agent_name == "wordFrequencySimilarity":
         result = [{
@@ -150,5 +140,9 @@ def main():
     print("Error: " + e.strerror+ ": '" + e.filename + "'")
 
 
+def main():
+  plac.call(evaluate)
+
+
 if __name__ == '__main__':
-  main()
+  plac.call(evaluate)
diff --git a/atarashi/build_deps.py b/atarashi/build_deps.py
@@ -22,7 +22,7 @@
 __author__ = "Gaurav Mishra"
 __email__ = "gmishx@gmail.com"
 
-import argparse
+import plac
 import os
 import sys
 sys.path.insert(0, os.path.dirname(os.path.realpath(__file__)) + '/../')
@@ -40,7 +40,12 @@
 The merged CSV is then processesed which is then used to create the Ngrams.
 """
 
-def download_dependencies(threads = os.cpu_count(), verbose = 0):
+@plac.annotations(
+  threads = plac.Annotation("No of threads to use for download. Default: CPU count", "option", "t", int, metavar="THREADS"),
+  verbose = plac.Annotation("increase output verbosity", "flag", "v")
+)
+
+def download_dependencies(threads = os.cpu_count(), verbose = False):
   currentDir = os.path.dirname(os.path.abspath(__file__))
   licenseListCsv = currentDir + "/data/licenses/licenseList.csv"
   processedLicenseListCsv = currentDir + "/data/licenses/processedLicenses.csv"
@@ -59,14 +64,4 @@ def download_dependencies(threads = os.cpu_count(), verbose = 0):
   createNgrams(processedLicenseListCsv, ngramJsonLoc, threads, verbose)
 
 if __name__ == "__main__":
-  parser = argparse.ArgumentParser()
-  parser.add_argument("-t", "--threads", required = False, default = os.cpu_count(),
-                      type = int,
-                      help = "No of threads to use for download. Default: CPU count")
-  parser.add_argument("-v", "--verbose", help = "increase output verbosity",
-                      action = "count", default = 0)
-  args = parser.parse_args()
-  threads = args.threads
-  verbose = args.verbose
-
-  download_dependencies(threads, verbose)
+  plac.call(download_dependencies)