Permalink
Browse files

live changes

  • Loading branch information...
1 parent 9616ce0 commit c3f1eb5d5cd316e30b79d29bd399a9e832bfb8b7 Simon Roe committed Dec 2, 2010
View
@@ -1,32 +1,41 @@
-import sys
+import sys
import site
import os
-vepath = '/var/www/stage.farmsubsidy.org/lib/python2.6/site-packages'
+# The absolute path to the app
+root_path = '/var/www/stage.farmsubsidy.org/'
+# python path to the django settings
+django_settings_module = 'web.settings'
+# Python version
+python_version = "2.6"
+
+
+
+# Everything below this line should be left alone.
+# ================================================
+vepath = root_path + 'lib/python%s/site-packages' % python_version
prev_sys_path = list(sys.path)
-# add the site-packages of our virtualenv as a site dir
+
+# add the site-packages of our virtualenv as a site dir
site.addsitedir(vepath)
+
# add the app's directory to the PYTHONPATH
-sys.path.append('/var/www/stage.farmsubsidy.org/')
-sys.path.append('/var/www/stage.farmsubsidy.org/web/')
+sys.path.append(root_path)
+sys.path.append(root_path + 'web')
# reorder sys.path so new directories from the addsitedir show up first
-# new_sys_path = [p for p in sys.path if p not in prev_sys_path]
-# for item in new_sys_path:
-# sys.path.remove(item)
-# sys.path[:0] = new_sys_path
+new_sys_path = [p for p in sys.path if p not in prev_sys_path]
+for item in new_sys_path:
+ sys.path.remove(item)
+sys.path[:0] = new_sys_path
sys.stdout = sys.stderr
+print >> sys.stderr, sys.path
# import from down here to pull in possible virtualenv django install
from django.core.handlers.wsgi import WSGIHandler
-from web import settings
-os.environ['DJANGO_SETTINGS_MODULE'] = 'settings'
-# print dir(settings)
-# print settings.DATABASE_PORT
-os.environ['PYTHON_EGG_CACHE'] = '/var/www/stage.farmsubsidy.org/eggs'
-
-
+os.environ['DJANGO_SETTINGS_MODULE'] = django_settings_module
application = WSGIHandler()
+
View
No changes.
View
@@ -1,91 +0,0 @@
-"""
-Used redis to remove duplicated rows from a CSV file.
-
-Duplicates are identified by a selected field in the row tuple, rather than all
-fields.
-
-In cases where more than one field needs to be checked for duplicates, the file
-will need to be run through the script n times
-
-
-Usage: By default the first field in the csv file is used. If you want to
-change this, pass --field (or -f) followed by an int.
-
-The other two (required) options are --in and --out, and should be self
-explanitry!
-
-A Redis server and the python redis client is required.
-
-"""
-
-import sys
-from optparse import OptionParser
-import csv
-import StringIO
-
-import redis
-
-parser = OptionParser()
-parser.add_option("-f", "--field",
- dest="field", default=0, type="int",
- help="(int) Duplicate field offset")
-parser.add_option("-i", "--in",
- dest="IN_FILE",
- help="Path to the input file.")
-parser.add_option("-o", "--out",
- dest="OUT_FILE",
- help="Path to the output file.")
-parser.add_option("-s", "--same-as",
- dest="SAME_AS", type="int",
- help="""(int) Only add lines where [duplicate field] is the same as
- this.""")
-
-
-
-(options, args) = parser.parse_args()
-
-r = redis.Redis()
-ID_FIELD = options.field
-SAME_AS = options.SAME_AS
-
-try:
- in_file = csv.reader(open(options.IN_FILE), delimiter=';')
-except:
- raise Exception('Input file not found')
-
-if options.OUT_FILE:
- out_file = open(options.OUT_FILE, 'w')
-else:
- out_file = open("%s.out" % options.IN_FILE, 'w')
-
-key_prefix = "csv-clean-%s" % options.IN_FILE.replace('/', '-')
-r.delete(key_prefix)
-deleted = 0
-
-def process_line(line):
- s = StringIO.StringIO()
- w = csv.writer(s, delimiter=';')
- w.writerow(line)
- return r.hset(key_prefix, line[ID_FIELD], s.getvalue())
- # return r.hdel(key_prefix, line[ID_FIELD])
-
-
-print "Removing Duplicates"
-for line in in_file:
- if SAME_AS:
- if line[SAME_AS] == line[ID_FIELD]:
- deleted += process_line(line)
- else:
- deleted += process_line(line)
-
-
-print "Deleted %s rows, writing %s lines to out file" % (deleted,
- r.hlen(key_prefix))
-
-for row in r.hvals(key_prefix):
- out_file.write(row)
-
-
-r.delete(key_prefix)
-
-
View
@@ -1,40 +0,0 @@
-#!/usr/bin/env python
-# encoding: utf-8
-
-"""
-connection.py
-
-Created by Sym on 2009-08-02.
-
-Manages connections to the farmsubsidy database, for use by the indexing scripts.
-
-"""
-import psycopg2
-from config import *
-import ConfigParser
-config = ConfigParser.ConfigParser()
-config.read('pgloader.conf')
-
-HOST = config.get('pgsql', 'host')
-PORT = config.get('pgsql', 'port')
-DBNAME = config.get('pgsql', 'base')
-USER = config.get('pgsql', 'user')
-PASSWORD = config.get('pgsql', 'pass')
-
-def connect():
- conn = psycopg2.connect(
- "dbname='%(dbname)s' user='%(user)s' host='%(host)s' port=%(PORT)s password='%(password)s'"
- % {
- 'dbname' : DBNAME,
- 'user' : USER,
- 'host' : HOST,
- 'password' : PASSWORD,
- 'PORT' : PORT,
- })
- cur = conn.cursor()
- return conn, cur
-
-
-if __name__ == "__main__":
- conn, cur = connect()
- print conn, cur
View
@@ -1,109 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-This script takes a file name and a table name along with other database
-connection settings.
-
-It then uses Postgresql's server side COPY command to import the CSV file.
-
-Note, that the postgres user use to connect must be a superuser.
-
-"""
-
-import sys
-import os
-sys.path.append('..')
-
-from optparse import OptionParser
-import os.path
-from django.db import connection, backend, models
-
-import psycopg2
-
-parser = OptionParser()
-parser.add_option("-c", "--country", dest="country",
- help="country to COPY",
- metavar="COUNTRY")
-
-parser.add_option("-t", "--data_type", dest="data_type",
- help="Type of data to index, scheme, recpient, payment",
- metavar="TABLE")
-
-parser.add_option("-r", "--reindex", dest="reindex",
- action="store_true",
- help="Reindex all tables",
- metavar="INDEX")
-
-(options, args) = parser.parse_args()
-
-os.environ['DJANGO_SETTINGS_MODULE'] = 'web.settings'
-
-import django
-from django.conf import settings
-
-
-class Copier():
- def __init__(self, options):
-
- cursor = connection.cursor()
-
- self.country = options.country
- print self.country
-
- self.indexes = {
- 'data_payment' : [
- ('data_payment_amounteuro', 'amounteuro'),
- ('data_payment_countrypayment', 'countrypayment'),
- ('data_payment_globalrecipientidx', 'globalrecipientidx'),
- ('data_payment_globalschemeid', 'globalschemeid'),
- ('data_payment_year', 'year'),
- ],
- 'data_recipient' : [
- ('data_recipient_countrypayment', 'countrypayment'),
- ('data_recipient_countryrecipient', 'countryrecipient'),
- ('data_recipient_total', 'total'),
- ]
- }
-
- def file_paths(self):
- return ""
-
- def drop_indexes(self):
- if self.table in self.indexes:
-
- for index in self.indexes[self.table]:
- print "dropping %s" % index
- sql = "DROP INDEX %s CASCADE;" % index
- try:
- self.cur.execute(sql)
- except:
- self.connect()
- pass
-
- def create_indexes(self):
- if self.table in self.indexes:
-
- for index in self.indexes[self.table]:
- print "CREATING %s" % index
- sql = "CREATE INDEX %s;" % index
-
- self.cur.execute(sql)
-
- def copy_file(self):
-
- sql = """
- COPY %(table)s
- FROM '%(filename)s'
- DELIMITERS ';'
- CSV;
- COMMIT;
- """ % {
- 'filename' : self.filename,
- 'table' : self.table,
- }
- print "COPYING"
- #self.cur.execute(sql)
- self.create_indexes()
-
-
-c = Copier(options)
-c.copy_file()
View
@@ -1,87 +0,0 @@
-#!/usr/bin/env python
-# encoding: utf-8
-
-import os, sys, string, commands, traceback, xapian
-sys.path.append('..')
-import fsconf
-from optparse import OptionParser
-from lib import progressbar
-import countryCodes
-import pprint
-import cPickle
-import collections
-import re
-import psycopg2
-import psycopg2.extras
-
-
-import connection
-
-
-def index(country):
- conn, c = connection.connect()
- c = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
- database = xapian.WritableDatabase(fsconf.xapianDbPath, xapian.DB_CREATE_OR_OPEN)
- indexer = xapian.TermGenerator()
- stemmer = xapian.Stem("english")
- indexer.set_stemmer(stemmer)
- indexer.set_database(database)
- indexer.set_flags(indexer.FLAG_SPELLING)
-
-
- sql = """
- SELECT t.global_id, t.countrypayment, r.name, r.address1, r.address2, r.town, r.geo1, r.geo2, r.geo3, r.geo4, t.amount_euro
- FROM data_recipients r
- JOIN data_totals t
- ON t.global_id=r.globalrecipientidx
- WHERE t.year=0 AND r.name IS NOT NULL AND r.countrypayment='%(country)s'
- """ % locals()
-
-
- c.execute(sql)
- if c.rowcount == 0:
- raise ValueError, "No records matched for %s" % country
- pbar = progressbar.ProgressBar(maxval=c.rowcount).start()
- row = c.fetchone()
- while row:
- # print "\rAdding Document %s (%s)" % (i, row['global_id']),
- doc = xapian.Document()
- indexer.set_document(doc)
-
-
- doc.add_value(1, xapian.sortable_serialise(float(row['amount_euro'])) )
-
- geo_fields = ['address1','address2','town','geo1','geo2','geo3','geo4', 'countrypayment',]
- location_text = ""
- for field in geo_fields:
- if row[field]:
- location_text += " " + row[field]
- # Index terms with prefixes
- indexer.index_text(location_text.lower(),1,'XLOC:')
- indexer.index_text(row['name'].lower(),1000,'XNAME:')
- doc.add_term("XCOUNTRY:%s" % row['countrypayment'].lower())
-
- # Index the same terms without prefixes
- indexer.index_text(location_text.lower(), 1)
- indexer.index_text(row['name'].lower(), 1000)
-
-
- docid = "XDOCID:%s" % row['global_id'].lower()
- doc.add_term(docid)
- doc.set_data(cPickle.dumps(dict(row)))
- database.replace_document(docid,doc)
-
-
- # for term in doc.termlist():
- # print term.term
- # sys.exit()
- pbar.update(c.rownumber)
- row = c.fetchone()
- pbar.finish()
-
-if __name__ == "__main__":
- index('LU')
-
-
-
-
Oops, something went wrong.

0 comments on commit c3f1eb5

Please sign in to comment.