Merge pull request #308 from /issues/150

Migration of config read to topicexplorer.config
inpho · May 20, 2018 · a35c615 · a35c615
2 parents a4cca6f + c12a94f
commit a35c615
Show file tree

Hide file tree

Showing 7 changed files with 99 additions and 98 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,6 +1,5 @@
 *.pyc
 sandbox
-config.*
 .coverage
 mprofile_*
 

diff --git a/coverage.sh b/coverage.sh
@@ -20,7 +20,7 @@ trap 'echo "trying to kill $DEMO_PID" && kill -2 $DEMO_PID && echo "killed $DEMO
 sleep 15
 
 test_url () {
-    return $([ $(curl -i $1 2>/dev/null | head -n 1 | cut -d$' ' -f2) == $2 ])
+    return $([[ $(curl -i $1 2>/dev/null | head -n 1 | cut -d$' ' -f2) == $2 ]])
 }
 test_url http://localhost:8000/ 200
 EXIT=$(($EXIT+$?))

diff --git a/topicexplorer/cluster.py b/topicexplorer/cluster.py
@@ -5,19 +5,26 @@
 @author: adi
 """
 from __future__ import absolute_import
-
+from future import standard_library
+standard_library.install_aliases()
 from builtins import zip
 from builtins import map
 from builtins import range
 from builtins import object
+
 from codecs import open
-from . import config
-from .config import moduleLoad
+from collections import defaultdict
+from configparser import ConfigParser as ConfigParser
+from itertools import repeat, chain
+import os.path
+
 import numpy as np
 from sklearn import manifold
 from sklearn import cluster
-import os.path
-from itertools import repeat, chain
+from vsm import *
+from vsm.viewer.wrappers import doc_label_name
+
+
 
 class dimensionReduce(object):
     def __init__(self,config_name):
@@ -94,3 +101,49 @@ def write(self,filename):
                 outfile.write(row)
 
 
+# load the topic models
+class keydefaultdict(defaultdict):
+    """ Solution from: http://stackoverflow.com/a/2912455 """
+    def __missing__(self, key):
+        if self.default_factory is None:
+            raise KeyError( key )
+        else:
+            ret = self[key] = self.default_factory(key)
+            return ret
+
+# load in the configuration file
+class moduleLoad(object):
+
+    def __init__(self,config_name):
+        self.config_file = config_name
+        #self.config_file = r"C:/Users/adi/Anaconda/topicexplorer/data23.ini" 
+        self.config = ConfigParser({
+            'topic_range': None,
+            'topics': None})
+        self.config.read(self.config_file)
+        self.lda_m = keydefaultdict(self.load_model)
+        self.lda_v = keydefaultdict(self.load_viewer)
+
+    # load the corpus
+    def load_corpus(self):
+        self.c = Corpus.load(self.config.get('main', 'corpus_file'))
+        self.context_type = self.config.get('main', 'context_type')
+        self.ctx_metadata = self.c.view_metadata(self.context_type)
+        self.all_ids = self.ctx_metadata[doc_label_name(self.context_type)]
+
+    # create topic model patterns
+    def create_model_pattern(self):
+        self.pattern = self.config.get('main', 'model_pattern')
+        if self.config.get('main', 'topics'):
+            self.topic_range = eval(self.config.get('main', 'topics'))
+
+    def load_model(self,k):
+        if k in self.topic_range:
+            return LdaCgsSeq.load(self.pattern.format(k))
+        else:
+            raise KeyError("No model trained for k={}.".format(k))
+    def load_viewer(self,k):
+        """ Function to dynamically load the LdaCgsViewer. 
+            Failure handling for missing keys is handled by `load_model`"""
+        return LdaCgsViewer(self.c,self.lda_m[k])
+
diff --git a/topicexplorer/config.py b/topicexplorer/config.py
@@ -1,63 +1,37 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Sun Apr 17 17:49:05 2016
-
-@author: adi
-"""
+from __future__ import print_function
 from future import standard_library
 standard_library.install_aliases()
-from builtins import object
-from vsm import *
-from vsm.viewer.wrappers import doc_label_name
-
-import os.path
-from collections import defaultdict
-from configparser import ConfigParser as ConfigParser
-
-# load the topic models
-class keydefaultdict(defaultdict):
-    """ Solution from: http://stackoverflow.com/a/2912455 """
-    def __missing__(self, key):
-        if self.default_factory is None:
-            raise KeyError( key )
-        else:
-            ret = self[key] = self.default_factory(key)
-            return ret
-
-# load in the configuration file
-class moduleLoad(object):
-
-    def __init__(self,config_name):
-        self.config_file = config_name
-        #self.config_file = r"C:/Users/adi/Anaconda/topicexplorer/data23.ini" 
-        self.config = ConfigParser({
-            'topic_range': None,
-            'topics': None})
-        self.config.read(self.config_file)
-        self.lda_m = keydefaultdict(self.load_model)
-        self.lda_v = keydefaultdict(self.load_viewer)
 
-    # load the corpus
-    def load_corpus(self):
-        self.c = Corpus.load(self.config.get('main', 'corpus_file'))
-        self.context_type = self.config.get('main', 'context_type')
-        self.ctx_metadata = self.c.view_metadata(self.context_type)
-        self.all_ids = self.ctx_metadata[doc_label_name(self.context_type)]
+from codecs import open
+from configparser import ConfigParser
 
-    # create topic model patterns
-    def create_model_pattern(self):
-        self.pattern = self.config.get('main', 'model_pattern')
-        if self.config.get('main', 'topics'):
-            self.topic_range = eval(self.config.get('main', 'topics'))
+def read(filename):
+    config = ConfigParser({
+        "htrc": False,
+        "sentences": False,
+        'certfile': None,
+        'keyfile': None,
+        'ca_certs': None,
+        'ssl': False,
+        'port': '8000',
+        'host': '127.0.0.1',
+        'icons': 'link',
+        'corpus_link': None,
+        'doc_title_format': '{0}',
+        'doc_url_format': '',
+        'raw_corpus': None,
+        'label_module': None,
+        'fulltext': False,
+        'pdf' : False,
+        'topics': None,
+        'cluster': None,
+        'corpus_desc' : None,
+        'home_link' : '/',
+        'lang' : None, 
+        'tokenizer': 'default'
+    })
 
-    def load_model(self,k):
-        if k in self.topic_range:
-            return LdaCgsSeq.load(self.pattern.format(k))
-        else:
-            raise KeyError("No model trained for k={}.".format(k))
-    def load_viewer(self,k):
-        """ Function to dynamically load the LdaCgsViewer. 
-            Failure handling for missing keys is handled by `load_model`"""
-        return LdaCgsViewer(self.c,self.lda_m[k])
+    with open(filename, encoding='utf8') as configfile:
+        config.read_file(configfile)
 
-
+    return config
diff --git a/topicexplorer/prep.py b/topicexplorer/prep.py
@@ -15,6 +15,8 @@
 
 from codecs import open
 from unidecode import unidecode
+
+import topicexplorer.config
 from topicexplorer.lib.util import isint, is_valid_configfile, bool_prompt
 
 # NLTK Langauges
@@ -360,9 +362,7 @@ def get_low_filter(c, words=None, items=None, counts=None):
     return (low_filter, candidates)
 
 def main(args):
-    config = ConfigParser({"htrc": False,
-                           "sentences": "False"})
-    config.read(args.config_file)
+    config = topicexplorer.config.read(args.config_file)
 
     if config.getboolean("main", "sentences"):
         from vsm.extensions.ldasentences import CorpusSent as Corpus

diff --git a/topicexplorer/server.py b/topicexplorer/server.py
@@ -26,6 +26,7 @@
 
 from bottle import (abort, redirect, request, response, route, run, 
                     static_file, Bottle, ServerAdapter)
+import topicexplorer.config
 from topicexplorer.lib.color import get_topic_colors, rgb2hex
 from topicexplorer.lib.ssl import SSLWSGIRefServer
 from topicexplorer.lib.util import (int_prompt, bool_prompt, is_valid_filepath,
@@ -773,31 +774,7 @@ def main(args, app=None):
 
 
 def create_app(args):
-    # load in the configuration file
-    config = ConfigParser({
-        'certfile': None,
-        'keyfile': None,
-        'ca_certs': None,
-        'ssl': False,
-        'port': '8000',
-        'host': '127.0.0.1',
-        'icons': 'link',
-        'corpus_link': None,
-        'doc_title_format': '{0}',
-        'doc_url_format': '',
-        'raw_corpus': None,
-        'label_module': None,
-        'fulltext': 'false',
-        'pdf' : 'false',
-        'topics': None,
-        'cluster': None,
-        'corpus_desc' : None,
-        'home_link' : '/',
-        'lang': None,
-        'tokenizer': 'default'})
-
-    with open(args.config, encoding='utf8') as configfile:
-        config.read_file(configfile)
+    config = topicexplorer.config.read(args.config)
 
     # path variables
     context_type = config.get('main', 'context_type')

diff --git a/topicexplorer/train.py b/topicexplorer/train.py
@@ -8,10 +8,10 @@
 from builtins import range
 
 from configparser import RawConfigParser as ConfigWriter
-from configparser import SafeConfigParser as ConfigParser
 from configparser import NoOptionError
 import os.path
 
+import topicexplorer.config
 from topicexplorer.lib.util import bool_prompt, int_prompt, is_valid_configfile
 
 
@@ -82,8 +82,7 @@ def cluster(n_clusters, config_file):
     dimension_reduce_model.fit_kmeans(int(n_clusters))
 
     print("writing model files for Isomap and kmeans\n")
-    config = ConfigParser()
-    config.read(config_file)
+    config = topicexplorer.config.read(config_file)
     corpus_filename = config.get("main", "corpus_file")
     filename = corpus_filename.split('.')[0] + '-cluster.csv'
 
@@ -101,8 +100,7 @@ def main(args):
         cluster(args.cluster, args.config_file)
         return
 
-    config = ConfigParser({"sentences": "False"})
-    config.read(args.config_file)
+    config = topicexplorer.config.read(args.config_file)
     corpus_filename = config.get("main", "corpus_file")
     model_path = config.get("main", "path")
 
@@ -243,7 +241,7 @@ def main(args):
             config.remove_option("main", "cluster")
             try:
                 os.remove(cluster_path)
-            except IOError:
+            except (OSError, IOError):
                 # fail silently on IOError
                 pass