Skip to content

Commit

Permalink
Merge pull request #223 from rgao/dev
Browse files Browse the repository at this point in the history
PostgreSQL -> MySQL
  • Loading branch information
ryanmswan committed Feb 1, 2020
2 parents 5b69eeb + 0540eb8 commit 089f7af
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 36 deletions.
50 changes: 25 additions & 25 deletions server/src/services/databaseOrm.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,40 +43,40 @@ class Ingest(Base):
policeprecinct = Column(String)


insertFields = {'srnumber': String,
insertFields = {'srnumber': String(50),
'createddate': DateTime,
'updateddate': DateTime,
'actiontaken': String,
'owner': String,
'requesttype': String,
'status': String,
'requestsource': String,
'createdbyuserorganization': String,
'mobileos': String,
'anonymous': String,
'assignto': String,
'servicedate': String,
'closeddate': String,
'addressverified': String,
'approximateaddress': String,
'address': String,
'housenumber': String,
'direction': String,
'streetname': String,
'suffix': String,
'actiontaken': String(30),
'owner': String(10),
'requesttype': String(30),
'status': String(20),
'requestsource': String(30),
'createdbyuserorganization': String(16),
'mobileos': String(10),
'anonymous': String(10),
'assignto': String(20),
'servicedate': String(30),
'closeddate': String(30),
'addressverified': String(16),
'approximateaddress': String(20),
'address': String(100),
'housenumber': String(10),
'direction': String(10),
'streetname': String(30),
'suffix': String(6),
'zipcode': Integer,
'latitude': Float,
'longitude': Float,
'location': String,
'location': String(100),
'tbmpage': Integer,
'tbmcolumn': String,
'tbmcolumn': String(10),
'tbmrow': Float,
'apc': String,
'apc': String(30),
'cd': Float,
'cdmember': String,
'cdmember': String(30),
'nc': Float,
'ncname': String,
'policeprecinct': String}
'ncname': String(100),
'policeprecinct': String(30)}


readFields = {'SRNumber': str,
Expand Down
25 changes: 15 additions & 10 deletions server/src/services/sqlIngest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import numpy as np
from sodapy import Socrata
import time
from . import databaseOrm # Contains database specs and field definitions
import databaseOrm # Contains database specs and field definitions


class DataHandler:
Expand All @@ -20,6 +20,7 @@ def __init__(self, config=None, configFilePath=None, separator=','):
self.fields = databaseOrm.tableFields
self.insertParams = databaseOrm.insertFields
self.readParams = databaseOrm.readFields
self.dialect = None

def loadConfig(self, configFilePath):
'''Load and parse config data'''
Expand All @@ -33,6 +34,7 @@ def loadConfig(self, configFilePath):
config.read(configFilePath)
self.config = config
self.dbString = config['Database']['DB_CONNECTION_STRING']
self.dialect = self.dbString.split(':')[0]
self.token = None if config['Socrata']['TOKEN'] == 'None' \
else config['Socrata']['TOKEN']

Expand Down Expand Up @@ -84,19 +86,22 @@ def cleanData(self):

def ingestData(self, ingestMethod='replace'):
'''Set up connection to database'''
print('Inserting data into Postgres instance...')
asdf = 'Inserting data into ' + self.dialect + ' instance...'
print(asdf)
ingestTimer = time.time()
data = self.data.copy() # shard deepcopy for other endpoint operations
engine = db.create_engine(self.dbString)
newColumns = [column.replace(' ', '_').lower() for column in data]
data.columns = newColumns
# Ingest data
# Schema is same as database in MySQL;
# schema here is set to db name in connection string
data.to_sql("ingest_staging_table",
engine,
if_exists=ingestMethod,
schema='public',
index=False,
chunksize=10000,
chunksize=10,
dtype=self.insertParams)
print('\tIngest Complete: %.1f minutes' %
self.elapsedTimer(ingestTimer))
Expand Down Expand Up @@ -171,7 +176,7 @@ def populateFullDatabase(self, yearRange=range(2015, 2021)):
Default operation is to fetch data from 2015-2020
!!! Be aware that each fresh import will wipe the
existing staging table'''
print('Performing fresh Postgres population from Socrata data sources')
print('Performing fresh ' + self.dialect + ' population from Socrata data sources')
tableInit = False
globalTimer = time.time()
for y in yearRange:
Expand Down Expand Up @@ -238,11 +243,11 @@ def fix_nan_vals(resultDict):
'''Class DataHandler workflow from initial load to SQL population'''
loader = DataHandler()
loader.loadConfig(configFilePath='../settings.cfg')
loader.fetchSocrataFull(limit=10000)
loader.fetchSocrataFull()
loader.cleanData()
loader.ingestData()
loader.saveCsvFile('testfile.csv')
loader.dumpFilteredCsvFile(dataset="",
startDate='2018-05-01',
requestType='Bulky Items',
councilName='VOICES OF 90037')
# loader.saveCsvFile('testfile.csv')
# loader.dumpFilteredCsvFile(dataset="",
# startDate='2018-05-01',
# requestType='Bulky Items',
# councilName='VOICES OF 90037')
2 changes: 1 addition & 1 deletion server/src/settings.example.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ HOST = 0.0.0.0
PORT = 5000

[Database]
DB_CONNECTION_STRING = postgres://REDACTED:REDACTED@localhost:5432/postgres
DB_CONNECTION_STRING = mysql://REDACTED:REDACTED@localhost:5432/public
DATA_DIRECTORY = static

[Api]
Expand Down

0 comments on commit 089f7af

Please sign in to comment.