Skip to content

Commit

Permalink
Merge pull request #308 from /issues/150
Browse files Browse the repository at this point in the history
Migration of config read to topicexplorer.config
  • Loading branch information
JaimieMurdock committed May 20, 2018
2 parents a4cca6f + c12a94f commit a35c615
Show file tree
Hide file tree
Showing 7 changed files with 99 additions and 98 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
*.pyc
sandbox
config.*
.coverage
mprofile_*

Expand Down
2 changes: 1 addition & 1 deletion coverage.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ trap 'echo "trying to kill $DEMO_PID" && kill -2 $DEMO_PID && echo "killed $DEMO
sleep 15

test_url () {
return $([ $(curl -i $1 2>/dev/null | head -n 1 | cut -d$' ' -f2) == $2 ])
return $([[ $(curl -i $1 2>/dev/null | head -n 1 | cut -d$' ' -f2) == $2 ]])
}
test_url http://localhost:8000/ 200
EXIT=$(($EXIT+$?))
Expand Down
63 changes: 58 additions & 5 deletions topicexplorer/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,26 @@
@author: adi
"""
from __future__ import absolute_import

from future import standard_library
standard_library.install_aliases()
from builtins import zip
from builtins import map
from builtins import range
from builtins import object

from codecs import open
from . import config
from .config import moduleLoad
from collections import defaultdict
from configparser import ConfigParser as ConfigParser
from itertools import repeat, chain
import os.path

import numpy as np
from sklearn import manifold
from sklearn import cluster
import os.path
from itertools import repeat, chain
from vsm import *
from vsm.viewer.wrappers import doc_label_name



class dimensionReduce(object):
def __init__(self,config_name):
Expand Down Expand Up @@ -94,3 +101,49 @@ def write(self,filename):
outfile.write(row)


# load the topic models
class keydefaultdict(defaultdict):
""" Solution from: http://stackoverflow.com/a/2912455 """
def __missing__(self, key):
if self.default_factory is None:
raise KeyError( key )
else:
ret = self[key] = self.default_factory(key)
return ret

# load in the configuration file
class moduleLoad(object):

def __init__(self,config_name):
self.config_file = config_name
#self.config_file = r"C:/Users/adi/Anaconda/topicexplorer/data23.ini"
self.config = ConfigParser({
'topic_range': None,
'topics': None})
self.config.read(self.config_file)
self.lda_m = keydefaultdict(self.load_model)
self.lda_v = keydefaultdict(self.load_viewer)

# load the corpus
def load_corpus(self):
self.c = Corpus.load(self.config.get('main', 'corpus_file'))
self.context_type = self.config.get('main', 'context_type')
self.ctx_metadata = self.c.view_metadata(self.context_type)
self.all_ids = self.ctx_metadata[doc_label_name(self.context_type)]

# create topic model patterns
def create_model_pattern(self):
self.pattern = self.config.get('main', 'model_pattern')
if self.config.get('main', 'topics'):
self.topic_range = eval(self.config.get('main', 'topics'))

def load_model(self,k):
if k in self.topic_range:
return LdaCgsSeq.load(self.pattern.format(k))
else:
raise KeyError("No model trained for k={}.".format(k))
def load_viewer(self,k):
""" Function to dynamically load the LdaCgsViewer.
Failure handling for missing keys is handled by `load_model`"""
return LdaCgsViewer(self.c,self.lda_m[k])

88 changes: 31 additions & 57 deletions topicexplorer/config.py
Original file line number Diff line number Diff line change
@@ -1,63 +1,37 @@
# -*- coding: utf-8 -*-
"""
Created on Sun Apr 17 17:49:05 2016
@author: adi
"""
from __future__ import print_function
from future import standard_library
standard_library.install_aliases()
from builtins import object
from vsm import *
from vsm.viewer.wrappers import doc_label_name

import os.path
from collections import defaultdict
from configparser import ConfigParser as ConfigParser

# load the topic models
class keydefaultdict(defaultdict):
""" Solution from: http://stackoverflow.com/a/2912455 """
def __missing__(self, key):
if self.default_factory is None:
raise KeyError( key )
else:
ret = self[key] = self.default_factory(key)
return ret

# load in the configuration file
class moduleLoad(object):

def __init__(self,config_name):
self.config_file = config_name
#self.config_file = r"C:/Users/adi/Anaconda/topicexplorer/data23.ini"
self.config = ConfigParser({
'topic_range': None,
'topics': None})
self.config.read(self.config_file)
self.lda_m = keydefaultdict(self.load_model)
self.lda_v = keydefaultdict(self.load_viewer)

# load the corpus
def load_corpus(self):
self.c = Corpus.load(self.config.get('main', 'corpus_file'))
self.context_type = self.config.get('main', 'context_type')
self.ctx_metadata = self.c.view_metadata(self.context_type)
self.all_ids = self.ctx_metadata[doc_label_name(self.context_type)]
from codecs import open
from configparser import ConfigParser

# create topic model patterns
def create_model_pattern(self):
self.pattern = self.config.get('main', 'model_pattern')
if self.config.get('main', 'topics'):
self.topic_range = eval(self.config.get('main', 'topics'))
def read(filename):
config = ConfigParser({
"htrc": False,
"sentences": False,
'certfile': None,
'keyfile': None,
'ca_certs': None,
'ssl': False,
'port': '8000',
'host': '127.0.0.1',
'icons': 'link',
'corpus_link': None,
'doc_title_format': '{0}',
'doc_url_format': '',
'raw_corpus': None,
'label_module': None,
'fulltext': False,
'pdf' : False,
'topics': None,
'cluster': None,
'corpus_desc' : None,
'home_link' : '/',
'lang' : None,
'tokenizer': 'default'
})

def load_model(self,k):
if k in self.topic_range:
return LdaCgsSeq.load(self.pattern.format(k))
else:
raise KeyError("No model trained for k={}.".format(k))
def load_viewer(self,k):
""" Function to dynamically load the LdaCgsViewer.
Failure handling for missing keys is handled by `load_model`"""
return LdaCgsViewer(self.c,self.lda_m[k])
with open(filename, encoding='utf8') as configfile:
config.read_file(configfile)


return config
6 changes: 3 additions & 3 deletions topicexplorer/prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@

from codecs import open
from unidecode import unidecode

import topicexplorer.config
from topicexplorer.lib.util import isint, is_valid_configfile, bool_prompt

# NLTK Langauges
Expand Down Expand Up @@ -360,9 +362,7 @@ def get_low_filter(c, words=None, items=None, counts=None):
return (low_filter, candidates)

def main(args):
config = ConfigParser({"htrc": False,
"sentences": "False"})
config.read(args.config_file)
config = topicexplorer.config.read(args.config_file)

if config.getboolean("main", "sentences"):
from vsm.extensions.ldasentences import CorpusSent as Corpus
Expand Down
27 changes: 2 additions & 25 deletions topicexplorer/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

from bottle import (abort, redirect, request, response, route, run,
static_file, Bottle, ServerAdapter)
import topicexplorer.config
from topicexplorer.lib.color import get_topic_colors, rgb2hex
from topicexplorer.lib.ssl import SSLWSGIRefServer
from topicexplorer.lib.util import (int_prompt, bool_prompt, is_valid_filepath,
Expand Down Expand Up @@ -773,31 +774,7 @@ def main(args, app=None):


def create_app(args):
# load in the configuration file
config = ConfigParser({
'certfile': None,
'keyfile': None,
'ca_certs': None,
'ssl': False,
'port': '8000',
'host': '127.0.0.1',
'icons': 'link',
'corpus_link': None,
'doc_title_format': '{0}',
'doc_url_format': '',
'raw_corpus': None,
'label_module': None,
'fulltext': 'false',
'pdf' : 'false',
'topics': None,
'cluster': None,
'corpus_desc' : None,
'home_link' : '/',
'lang': None,
'tokenizer': 'default'})

with open(args.config, encoding='utf8') as configfile:
config.read_file(configfile)
config = topicexplorer.config.read(args.config)

# path variables
context_type = config.get('main', 'context_type')
Expand Down
10 changes: 4 additions & 6 deletions topicexplorer/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@
from builtins import range

from configparser import RawConfigParser as ConfigWriter
from configparser import SafeConfigParser as ConfigParser
from configparser import NoOptionError
import os.path

import topicexplorer.config
from topicexplorer.lib.util import bool_prompt, int_prompt, is_valid_configfile


Expand Down Expand Up @@ -82,8 +82,7 @@ def cluster(n_clusters, config_file):
dimension_reduce_model.fit_kmeans(int(n_clusters))

print("writing model files for Isomap and kmeans\n")
config = ConfigParser()
config.read(config_file)
config = topicexplorer.config.read(config_file)
corpus_filename = config.get("main", "corpus_file")
filename = corpus_filename.split('.')[0] + '-cluster.csv'

Expand All @@ -101,8 +100,7 @@ def main(args):
cluster(args.cluster, args.config_file)
return

config = ConfigParser({"sentences": "False"})
config.read(args.config_file)
config = topicexplorer.config.read(args.config_file)
corpus_filename = config.get("main", "corpus_file")
model_path = config.get("main", "path")

Expand Down Expand Up @@ -243,7 +241,7 @@ def main(args):
config.remove_option("main", "cluster")
try:
os.remove(cluster_path)
except IOError:
except (OSError, IOError):
# fail silently on IOError
pass

Expand Down

0 comments on commit a35c615

Please sign in to comment.