Skip to content
This repository has been archived by the owner on Mar 17, 2023. It is now read-only.

Commit

Permalink
Initial commit. Working engine, web app running, but the two are not …
Browse files Browse the repository at this point in the history
…yet connected.
  • Loading branch information
VikParuchuri committed Mar 3, 2014
0 parents commit 2cb4030
Show file tree
Hide file tree
Showing 2,231 changed files with 466,391 additions and 0 deletions.
11 changes: 11 additions & 0 deletions .gitignore
@@ -0,0 +1,11 @@
.idea/*
*~
*.pyc
scan.db
test.db
celerybeat-schedule
scan/private.py
.vagrant
.coverage
data/models/*
!.vc
43 changes: 43 additions & 0 deletions README.md
@@ -0,0 +1,43 @@
Scan
-----------------------------------------

Scan is a lightweight server that allows for automated scoring of essays.

Installation
-----------------------------------------

# Vagrant



# Manual

Linux is currently the best supported platform, but it is also possible to install on windows.

## Ubuntu

```
xargs -a apt-packages.txt install -y
pip install -r pre-requirements.txt
pip install -r requirements.txt
```

## Windows


#. Install the scipy stack from [here](http://www.lfd.uci.edu/~gohlke/pythonlibs/#scipy-stack).
#. Install scikit-learn from the [same place](http://www.lfd.uci.edu/~gohlke/pythonlibs/#scikit-learn)



Please see install instructions here:

http://scikit-learn.org/0.9/install.html


Usage
------------------------------------------

```
nosetests --with-coverage --cover-package="core" --logging-level="INFO"
```
54 changes: 54 additions & 0 deletions alembic.ini
@@ -0,0 +1,54 @@
# A generic, single database configuration.

[alembic]
# path to migration scripts
script_location = alembic

# template used to generate migration files
# file_template = %%(rev)s_%%(slug)s

# max length of characters to apply to the
# "slug" field
#truncate_slug_length = 40

# set to 'true' to run the environment during
# the 'revision' command, regardless of autogenerate
# revision_environment = false

sqlalchemy.url = sqlite:///scan.db


# Logging configuration
[loggers]
keys = root,sqlalchemy,alembic

[handlers]
keys = console

[formatters]
keys = generic

[logger_root]
level = WARN
handlers = console
qualname =

[logger_sqlalchemy]
level = WARN
handlers =
qualname = sqlalchemy.engine

[logger_alembic]
level = INFO
handlers =
qualname = alembic

[handler_console]
class = StreamHandler
args = (sys.stderr,)
level = NOTSET
formatter = generic

[formatter_generic]
format = %(levelname)-5.5s [%(name)s] %(message)s
datefmt = %H:%M:%S
1 change: 1 addition & 0 deletions alembic/README
@@ -0,0 +1 @@
Generic single-database configuration.
72 changes: 72 additions & 0 deletions alembic/env.py
@@ -0,0 +1,72 @@
from __future__ import with_statement
from alembic import context
from sqlalchemy import engine_from_config, pool
from logging.config import fileConfig
from app import db

# this is the Alembic Config object, which provides
# access to the values within the .ini file in use.
config = context.config

# Interpret the config file for Python logging.
# This line sets up loggers basically.
fileConfig(config.config_file_name)

# add your model's MetaData object here
# for 'autogenerate' support
# from myapp import mymodel
# target_metadata = mymodel.Base.metadata
target_metadata = db.Model.metadata

# other values from the config, defined by the needs of env.py,
# can be acquired:
# my_important_option = config.get_main_option("my_important_option")
# ... etc.

def run_migrations_offline():
"""Run migrations in 'offline' mode.
This configures the context with just a URL
and not an Engine, though an Engine is acceptable
here as well. By skipping the Engine creation
we don't even need a DBAPI to be available.
Calls to context.execute() here emit the given string to the
script output.
"""
url = config.get_main_option("sqlalchemy.url")
context.configure(url=url)

with context.begin_transaction():
context.run_migrations()

def run_migrations_online():
"""Run migrations in 'online' mode.
In this scenario we need to create an Engine
and associate a connection with the context.
"""
engine = engine_from_config(
config.get_section(config.config_ini_section),
prefix='sqlalchemy.',
poolclass=pool.NullPool)

connection = engine.connect()
context.configure(
connection=connection,
target_metadata=target_metadata
)

try:
with context.begin_transaction():
context.run_migrations()
finally:
connection.close()

if context.is_offline_mode():
run_migrations_offline()
else:
run_migrations_online()

22 changes: 22 additions & 0 deletions alembic/script.py.mako
@@ -0,0 +1,22 @@
"""${message}

Revision ID: ${up_revision}
Revises: ${down_revision}
Create Date: ${create_date}

"""

# revision identifiers, used by Alembic.
revision = ${repr(up_revision)}
down_revision = ${repr(down_revision)}

from alembic import op
import sqlalchemy as sa
${imports if imports else ""}

def upgrade():
${upgrades if upgrades else "pass"}


def downgrade():
${downgrades if downgrades else "pass"}
47 changes: 47 additions & 0 deletions app.py
@@ -0,0 +1,47 @@
from flask import Flask
from flask.ext.security import Security, SQLAlchemyUserDatastore
from flask.ext.babel import Babel
from scan import settings
from celery import Celery
from core.database.models import db, User, Role
from flask.ext.cache import Cache
from core.web.main_views import main_views

def make_celery(app):
celery = Celery(app.import_name, broker=app.config['BROKER_URL'])
celery.conf.update(app.config)
TaskBase = celery.Task
class ContextTask(TaskBase):
abstract = True
def __call__(self, *args, **kwargs):
with app.app_context():
return TaskBase.__call__(self, *args, **kwargs)
celery.Task = ContextTask
return celery

def create_app():
app = Flask(__name__, template_folder='templates')
app.config.from_object('scan.settings')
db.app = app
db.init_app(app)
return app

def create_test_app():
app = create_app()
app.config.from_object('scan.test_settings')
db.app = app
db.init_app(app)
return app

app = create_app()
app.register_blueprint(main_views)
cache = Cache(app)

babel = Babel(app)
celery = make_celery(app)

if __name__ == '__main__':
db.create_all(app=app)
user_datastore = SQLAlchemyUserDatastore(db, User, Role)
security = Security(app, user_datastore)
app.run(debug=settings.DEBUG, host="0.0.0.0")
12 changes: 12 additions & 0 deletions apt-packages.txt
@@ -0,0 +1,12 @@
python-pip
libfreetype6
libfreetype6-dev
zlib1g-dev
openssl
memcached
python-dev
libssl-dev
python-setuptools
libatlas-dev
libatlas3-base
redis-server
1 change: 1 addition & 0 deletions core/__init__.py
@@ -0,0 +1 @@
__author__ = 'vik'
1 change: 1 addition & 0 deletions core/algo/__init__.py
@@ -0,0 +1 @@
__author__ = 'vik'
113 changes: 113 additions & 0 deletions core/algo/features.py
@@ -0,0 +1,113 @@
from core.preprocessors.grammar import GrammarCorrector
from core.preprocessors.spelling import SpellCorrector
from stemming.porter2 import stem
from core.algo.vectorizer import Vectorizer
import re
import numpy as np

class FeatureGenerator(object):
def __init__(self, normal_vectorizer=None, clean_vectorizer=None):
self.mf_generator = MetaFeatureGenerator()
self.fit_complete = False
if normal_vectorizer and clean_vectorizer:
self.fit_complete = True

if normal_vectorizer:
self.normal_vectorizer = normal_vectorizer
else:
self.normal_vectorizer = Vectorizer()

if clean_vectorizer:
self.clean_vectorizer = clean_vectorizer
else:
self.clean_vectorizer = Vectorizer()

def fit(self, input_text, input_scores):
self.normal_vectorizer.fit(input_text, input_scores)
clean_text = [self.mf_generator.generate_clean_stem_text(t) for t in input_text]
self.clean_vectorizer.fit(clean_text, input_scores)

def get_features(self, text):
vec_feats = self.generate_vectorizer_features(text)
vec_keys = self.normal_vectorizer.vocab + self.clean_vectorizer.vocab

meta_feats = self.generate_meta_features(text)
meta_keys = meta_feats.keys()
meta_keys.sort()
meta_feat_arr = np.matrix([meta_feats[k] for k in meta_keys])

self.colnames = vec_keys + meta_keys

return np.hstack([vec_feats, meta_feat_arr])

def generate_meta_features(self, text):
feats = self.mf_generator.generate_meta_features(text)
return feats

def generate_vectorizer_features(self, text):
clean_text = self.mf_generator.generate_clean_stem_text(text)
feats = self.normal_vectorizer.get_features([text])
clean_feats = self.clean_vectorizer.get_features([clean_text])
return np.hstack([feats, clean_feats])

class MetaFeatureGenerator(object):
speech_parts = dict(
nouns=["NN", "NNP", "NNPS", "NNS"],
verbs=["VB", "VBD", "VBG", "VBN", "VBP", "VBZ"],
adjectives=["JJ", "JJR", "JJS"],
adverbs=["RB", "RBR", "RBS"]
)

def __init__(self):
self.grammar = GrammarCorrector()
self.spelling = SpellCorrector()
self.stem = stem

def generate_grammar_features(self, raw_grammar):
grammar_counts = {}
for k in self.speech_parts:
grammar_counts[k] = 0

for t in raw_grammar:
for k in self.speech_parts:
if t[1] in self.speech_parts[k]:
grammar_counts[k] += 1
for k in grammar_counts:
grammar_counts[k] /= len(raw_grammar)
return grammar_counts

def clean_spell_corrected_tags(self, spelling_markup):
return re.sub("<[^>]+>", '', spelling_markup)

def generate_text_features(self, text):
feats = {}
feats['length'] = len(text)
feats['word_length'] = len(text.split())
feats['sentence_length'] = len(text.split("."))
feats['chars_per_sentence'] = feats['length'] / (feats['sentence_length'] + 1)
feats['words_per_sentence'] = feats['word_length'] / (feats['sentence_length'] + 1)
feats['chars_per_word'] = feats['length'] / (feats['word_length'] + 1)
return feats

def generate_clean_stem_text(self, text):
spelling_errors, spelling_markup, raw_spelling = self.spelling.correct_string(text)
clean_text = self.clean_spell_corrected_tags(spelling_markup)
clean_text = re.sub("[^A-Za-z0-9 \.,\'\":;]", " ", clean_text.lower())
clean_text = re.sub("\s+", " ", clean_text)
clean_text = ' '.join([self.stem(t) for t in clean_text.split(' ')])
return clean_text

def generate_meta_features(self, text):
features = {}
grammar_errors, grammar_markup, raw_grammar = self.grammar.correct_string(text)
features['grammar_errors'] = grammar_errors
grammar_feats = self.generate_grammar_features(raw_grammar)
features.update(grammar_feats)

spelling_errors, spelling_markup, raw_spelling = self.spelling.correct_string(text)
features['spelling_errors'] = spelling_errors

text_feats = self.generate_text_features(text)
features.update(text_feats)

return features

0 comments on commit 2cb4030

Please sign in to comment.