This repository has been archived by the owner on Mar 17, 2023. It is now read-only.
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Initial commit. Working engine, web app running, but the two are not …
…yet connected.
- Loading branch information
0 parents
commit 2cb4030
Showing
2,231 changed files
with
466,391 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
.idea/* | ||
*~ | ||
*.pyc | ||
scan.db | ||
test.db | ||
celerybeat-schedule | ||
scan/private.py | ||
.vagrant | ||
.coverage | ||
data/models/* | ||
!.vc |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
Scan | ||
----------------------------------------- | ||
|
||
Scan is a lightweight server that allows for automated scoring of essays. | ||
|
||
Installation | ||
----------------------------------------- | ||
|
||
# Vagrant | ||
|
||
|
||
|
||
# Manual | ||
|
||
Linux is currently the best supported platform, but it is also possible to install on windows. | ||
|
||
## Ubuntu | ||
|
||
``` | ||
xargs -a apt-packages.txt install -y | ||
pip install -r pre-requirements.txt | ||
pip install -r requirements.txt | ||
``` | ||
|
||
## Windows | ||
|
||
|
||
#. Install the scipy stack from [here](http://www.lfd.uci.edu/~gohlke/pythonlibs/#scipy-stack). | ||
#. Install scikit-learn from the [same place](http://www.lfd.uci.edu/~gohlke/pythonlibs/#scikit-learn) | ||
|
||
|
||
|
||
Please see install instructions here: | ||
|
||
http://scikit-learn.org/0.9/install.html | ||
|
||
|
||
Usage | ||
------------------------------------------ | ||
|
||
``` | ||
nosetests --with-coverage --cover-package="core" --logging-level="INFO" | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
# A generic, single database configuration. | ||
|
||
[alembic] | ||
# path to migration scripts | ||
script_location = alembic | ||
|
||
# template used to generate migration files | ||
# file_template = %%(rev)s_%%(slug)s | ||
|
||
# max length of characters to apply to the | ||
# "slug" field | ||
#truncate_slug_length = 40 | ||
|
||
# set to 'true' to run the environment during | ||
# the 'revision' command, regardless of autogenerate | ||
# revision_environment = false | ||
|
||
sqlalchemy.url = sqlite:///scan.db | ||
|
||
|
||
# Logging configuration | ||
[loggers] | ||
keys = root,sqlalchemy,alembic | ||
|
||
[handlers] | ||
keys = console | ||
|
||
[formatters] | ||
keys = generic | ||
|
||
[logger_root] | ||
level = WARN | ||
handlers = console | ||
qualname = | ||
|
||
[logger_sqlalchemy] | ||
level = WARN | ||
handlers = | ||
qualname = sqlalchemy.engine | ||
|
||
[logger_alembic] | ||
level = INFO | ||
handlers = | ||
qualname = alembic | ||
|
||
[handler_console] | ||
class = StreamHandler | ||
args = (sys.stderr,) | ||
level = NOTSET | ||
formatter = generic | ||
|
||
[formatter_generic] | ||
format = %(levelname)-5.5s [%(name)s] %(message)s | ||
datefmt = %H:%M:%S |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Generic single-database configuration. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
from __future__ import with_statement | ||
from alembic import context | ||
from sqlalchemy import engine_from_config, pool | ||
from logging.config import fileConfig | ||
from app import db | ||
|
||
# this is the Alembic Config object, which provides | ||
# access to the values within the .ini file in use. | ||
config = context.config | ||
|
||
# Interpret the config file for Python logging. | ||
# This line sets up loggers basically. | ||
fileConfig(config.config_file_name) | ||
|
||
# add your model's MetaData object here | ||
# for 'autogenerate' support | ||
# from myapp import mymodel | ||
# target_metadata = mymodel.Base.metadata | ||
target_metadata = db.Model.metadata | ||
|
||
# other values from the config, defined by the needs of env.py, | ||
# can be acquired: | ||
# my_important_option = config.get_main_option("my_important_option") | ||
# ... etc. | ||
|
||
def run_migrations_offline(): | ||
"""Run migrations in 'offline' mode. | ||
This configures the context with just a URL | ||
and not an Engine, though an Engine is acceptable | ||
here as well. By skipping the Engine creation | ||
we don't even need a DBAPI to be available. | ||
Calls to context.execute() here emit the given string to the | ||
script output. | ||
""" | ||
url = config.get_main_option("sqlalchemy.url") | ||
context.configure(url=url) | ||
|
||
with context.begin_transaction(): | ||
context.run_migrations() | ||
|
||
def run_migrations_online(): | ||
"""Run migrations in 'online' mode. | ||
In this scenario we need to create an Engine | ||
and associate a connection with the context. | ||
""" | ||
engine = engine_from_config( | ||
config.get_section(config.config_ini_section), | ||
prefix='sqlalchemy.', | ||
poolclass=pool.NullPool) | ||
|
||
connection = engine.connect() | ||
context.configure( | ||
connection=connection, | ||
target_metadata=target_metadata | ||
) | ||
|
||
try: | ||
with context.begin_transaction(): | ||
context.run_migrations() | ||
finally: | ||
connection.close() | ||
|
||
if context.is_offline_mode(): | ||
run_migrations_offline() | ||
else: | ||
run_migrations_online() | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
"""${message} | ||
|
||
Revision ID: ${up_revision} | ||
Revises: ${down_revision} | ||
Create Date: ${create_date} | ||
|
||
""" | ||
|
||
# revision identifiers, used by Alembic. | ||
revision = ${repr(up_revision)} | ||
down_revision = ${repr(down_revision)} | ||
|
||
from alembic import op | ||
import sqlalchemy as sa | ||
${imports if imports else ""} | ||
|
||
def upgrade(): | ||
${upgrades if upgrades else "pass"} | ||
|
||
|
||
def downgrade(): | ||
${downgrades if downgrades else "pass"} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
from flask import Flask | ||
from flask.ext.security import Security, SQLAlchemyUserDatastore | ||
from flask.ext.babel import Babel | ||
from scan import settings | ||
from celery import Celery | ||
from core.database.models import db, User, Role | ||
from flask.ext.cache import Cache | ||
from core.web.main_views import main_views | ||
|
||
def make_celery(app): | ||
celery = Celery(app.import_name, broker=app.config['BROKER_URL']) | ||
celery.conf.update(app.config) | ||
TaskBase = celery.Task | ||
class ContextTask(TaskBase): | ||
abstract = True | ||
def __call__(self, *args, **kwargs): | ||
with app.app_context(): | ||
return TaskBase.__call__(self, *args, **kwargs) | ||
celery.Task = ContextTask | ||
return celery | ||
|
||
def create_app(): | ||
app = Flask(__name__, template_folder='templates') | ||
app.config.from_object('scan.settings') | ||
db.app = app | ||
db.init_app(app) | ||
return app | ||
|
||
def create_test_app(): | ||
app = create_app() | ||
app.config.from_object('scan.test_settings') | ||
db.app = app | ||
db.init_app(app) | ||
return app | ||
|
||
app = create_app() | ||
app.register_blueprint(main_views) | ||
cache = Cache(app) | ||
|
||
babel = Babel(app) | ||
celery = make_celery(app) | ||
|
||
if __name__ == '__main__': | ||
db.create_all(app=app) | ||
user_datastore = SQLAlchemyUserDatastore(db, User, Role) | ||
security = Security(app, user_datastore) | ||
app.run(debug=settings.DEBUG, host="0.0.0.0") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
python-pip | ||
libfreetype6 | ||
libfreetype6-dev | ||
zlib1g-dev | ||
openssl | ||
memcached | ||
python-dev | ||
libssl-dev | ||
python-setuptools | ||
libatlas-dev | ||
libatlas3-base | ||
redis-server |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
__author__ = 'vik' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
__author__ = 'vik' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
from core.preprocessors.grammar import GrammarCorrector | ||
from core.preprocessors.spelling import SpellCorrector | ||
from stemming.porter2 import stem | ||
from core.algo.vectorizer import Vectorizer | ||
import re | ||
import numpy as np | ||
|
||
class FeatureGenerator(object): | ||
def __init__(self, normal_vectorizer=None, clean_vectorizer=None): | ||
self.mf_generator = MetaFeatureGenerator() | ||
self.fit_complete = False | ||
if normal_vectorizer and clean_vectorizer: | ||
self.fit_complete = True | ||
|
||
if normal_vectorizer: | ||
self.normal_vectorizer = normal_vectorizer | ||
else: | ||
self.normal_vectorizer = Vectorizer() | ||
|
||
if clean_vectorizer: | ||
self.clean_vectorizer = clean_vectorizer | ||
else: | ||
self.clean_vectorizer = Vectorizer() | ||
|
||
def fit(self, input_text, input_scores): | ||
self.normal_vectorizer.fit(input_text, input_scores) | ||
clean_text = [self.mf_generator.generate_clean_stem_text(t) for t in input_text] | ||
self.clean_vectorizer.fit(clean_text, input_scores) | ||
|
||
def get_features(self, text): | ||
vec_feats = self.generate_vectorizer_features(text) | ||
vec_keys = self.normal_vectorizer.vocab + self.clean_vectorizer.vocab | ||
|
||
meta_feats = self.generate_meta_features(text) | ||
meta_keys = meta_feats.keys() | ||
meta_keys.sort() | ||
meta_feat_arr = np.matrix([meta_feats[k] for k in meta_keys]) | ||
|
||
self.colnames = vec_keys + meta_keys | ||
|
||
return np.hstack([vec_feats, meta_feat_arr]) | ||
|
||
def generate_meta_features(self, text): | ||
feats = self.mf_generator.generate_meta_features(text) | ||
return feats | ||
|
||
def generate_vectorizer_features(self, text): | ||
clean_text = self.mf_generator.generate_clean_stem_text(text) | ||
feats = self.normal_vectorizer.get_features([text]) | ||
clean_feats = self.clean_vectorizer.get_features([clean_text]) | ||
return np.hstack([feats, clean_feats]) | ||
|
||
class MetaFeatureGenerator(object): | ||
speech_parts = dict( | ||
nouns=["NN", "NNP", "NNPS", "NNS"], | ||
verbs=["VB", "VBD", "VBG", "VBN", "VBP", "VBZ"], | ||
adjectives=["JJ", "JJR", "JJS"], | ||
adverbs=["RB", "RBR", "RBS"] | ||
) | ||
|
||
def __init__(self): | ||
self.grammar = GrammarCorrector() | ||
self.spelling = SpellCorrector() | ||
self.stem = stem | ||
|
||
def generate_grammar_features(self, raw_grammar): | ||
grammar_counts = {} | ||
for k in self.speech_parts: | ||
grammar_counts[k] = 0 | ||
|
||
for t in raw_grammar: | ||
for k in self.speech_parts: | ||
if t[1] in self.speech_parts[k]: | ||
grammar_counts[k] += 1 | ||
for k in grammar_counts: | ||
grammar_counts[k] /= len(raw_grammar) | ||
return grammar_counts | ||
|
||
def clean_spell_corrected_tags(self, spelling_markup): | ||
return re.sub("<[^>]+>", '', spelling_markup) | ||
|
||
def generate_text_features(self, text): | ||
feats = {} | ||
feats['length'] = len(text) | ||
feats['word_length'] = len(text.split()) | ||
feats['sentence_length'] = len(text.split(".")) | ||
feats['chars_per_sentence'] = feats['length'] / (feats['sentence_length'] + 1) | ||
feats['words_per_sentence'] = feats['word_length'] / (feats['sentence_length'] + 1) | ||
feats['chars_per_word'] = feats['length'] / (feats['word_length'] + 1) | ||
return feats | ||
|
||
def generate_clean_stem_text(self, text): | ||
spelling_errors, spelling_markup, raw_spelling = self.spelling.correct_string(text) | ||
clean_text = self.clean_spell_corrected_tags(spelling_markup) | ||
clean_text = re.sub("[^A-Za-z0-9 \.,\'\":;]", " ", clean_text.lower()) | ||
clean_text = re.sub("\s+", " ", clean_text) | ||
clean_text = ' '.join([self.stem(t) for t in clean_text.split(' ')]) | ||
return clean_text | ||
|
||
def generate_meta_features(self, text): | ||
features = {} | ||
grammar_errors, grammar_markup, raw_grammar = self.grammar.correct_string(text) | ||
features['grammar_errors'] = grammar_errors | ||
grammar_feats = self.generate_grammar_features(raw_grammar) | ||
features.update(grammar_feats) | ||
|
||
spelling_errors, spelling_markup, raw_spelling = self.spelling.correct_string(text) | ||
features['spelling_errors'] = spelling_errors | ||
|
||
text_feats = self.generate_text_features(text) | ||
features.update(text_feats) | ||
|
||
return features |
Oops, something went wrong.