Skip to content

Commit

Permalink
v3.0.1
Browse files Browse the repository at this point in the history
  • Loading branch information
answerquest committed Oct 1, 2018
1 parent 3f29b4e commit b44fcb5
Show file tree
Hide file tree
Showing 50 changed files with 16,893 additions and 15,854 deletions.
4 changes: 4 additions & 0 deletions GTFSManager.py
Expand Up @@ -39,6 +39,7 @@
sequenceDBfile = 'db/sequence.json'
passwordFile = 'pw/rsa_key.bin'
chunkRulesFile = 'chunkRules.json'
configFile = 'config.json'
thisURL = ''

debugMode = False # using this flag at various places to do or not do things based on whether we're in development or production
Expand All @@ -48,8 +49,11 @@
# for checking imported ZIP against
# to do: don't make this a HARD requirement. Simply logmessage about it.

# load parameters from config folder
with open(configFolder + chunkRulesFile) as f:
chunkRules = json.load(f)
with open(configFolder + configFile) as f:
configRules = json.load(f)

# create folders if they don't exist
for folder in [uploadFolder, xmlFolder, logFolder, configFolder, dbFolder, exportFolder]:
Expand Down
82 changes: 55 additions & 27 deletions GTFSserverfunctions.py
Expand Up @@ -28,7 +28,7 @@ def exportGTFS (folder):
tablename = h5File[:-3] # remove last 3 chars, .h5

try:
df = pd.read_hdf(dbFolder + h5File,'df').fillna('').astype(str)
df = pd.read_hdf(dbFolder + h5File).fillna('').astype(str)
except KeyError as e:
df = pd.DataFrame()
logmessage('Note: {} does not have any data.'.format(h5File))
Expand Down Expand Up @@ -58,7 +58,7 @@ def exportGTFS (folder):
columnsList = set()
for count,h5File in enumerate(filenames):
try:
df = pd.read_hdf(dbFolder + h5File,'df',stop=0)
df = pd.read_hdf(dbFolder + h5File,stop=0)
except KeyError as e:
df = pd.DataFrame()
logmessage('Note: {} does not have any data.'.format(h5File))
Expand All @@ -76,7 +76,7 @@ def exportGTFS (folder):
for count,h5File in enumerate(filenames):
logmessage('Writing {} to csv'.format(h5File))
try:
df1 = pd.read_hdf(dbFolder + h5File,'df').fillna('').astype(str)
df1 = pd.read_hdf(dbFolder + h5File).fillna('').astype(str)
except KeyError as e:
df1 = pd.DataFrame()
logmessage('Note: {} does not have any data.'.format(h5File))
Expand Down Expand Up @@ -416,7 +416,7 @@ def readTableDB(tablename, key=None, value=None):
continue

try:
df = pd.read_hdf(dbFolder + h5File,'df').fillna('').astype(str)
df = pd.read_hdf(dbFolder + h5File).fillna('').astype(str)
# typecasting as str, keeping NA values blank ''
except KeyError as e:
df = pd.DataFrame()
Expand Down Expand Up @@ -478,15 +478,15 @@ def replaceTableDB(tablename, data, key=None, value=None):
elif ((key is not None) and (value is not None) ):
# remove entries matching the key and value
try:
df = pd.read_hdf(h5File,'df').fillna('').astype(str)
df = pd.read_hdf(dbFolder+h5File).fillna('').astype(str)
except KeyError as e:
df = pd.DataFrame()
logmessage('Note: {} does not have any data.'.format(h5File))
oldLen = len( df[ df[key] == str(value)])
df.query(key + ' != "' + str(value) + '"', inplace=True)

df3 = pd.concat([df,xdf], ignore_index=True)
df3.to_hdf(h5File, 'df', format='table', mode='w', complevel=1)
df3.to_hdf(dbFolder+h5File, 'df', format='table', mode='w', complevel=1)

logmessage('Replaced {} entries for {}={} with {} new entries in {}.'\
.format(oldLen,key,str(value),str(len(xdf)),tablename ) )
Expand Down Expand Up @@ -1084,7 +1084,7 @@ def replaceTableCell(h5File,column,valueFrom,valueTo):
return False

try:
df = pd.read_hdf(dbFolder + h5File,'df').fillna('').astype(str)
df = pd.read_hdf(dbFolder + h5File).fillna('').astype(str)
except KeyError as e:
df = pd.DataFrame()
logmessage('Note: {} does not have any data.'.format(h5File))
Expand Down Expand Up @@ -1216,7 +1216,7 @@ def replaceChunkyTableDB(xdf, value, tablename='stop_times'):
if chunkFile:
logmessage('Editing ' + chunkFile)
try:
df = pd.read_hdf(dbFolder + chunkFile,'df').fillna('').astype(str)
df = pd.read_hdf(dbFolder + chunkFile).fillna('').astype(str)
except KeyError as e:
df = pd.DataFrame()
logmessage('Note: {} does not have any data.'.format(chunkFile))
Expand All @@ -1232,23 +1232,23 @@ def replaceChunkyTableDB(xdf, value, tablename='stop_times'):
else:
logmessage('{} older entries for id {} removed.'.format( str( initLen - reducedLen ),value ))

newdf = pd.concat([df,xdf],ignore_index=True)
logmessage('{} new entries for id {} added. Now writing to {}.'.format( str( len(xdf) ),value, chunkFile ))
newdf.to_hdf(dbFolder+chunkFile, 'df', format='table', mode='w', complevel=1)


else:
# if the trip wasn't previously existing, take the smallest chunk and add in there.
chunkFile = smallestChunk(tablename)
try:
df = pd.read_hdf(dbFolder + chunkFile,'df').fillna('').astype(str)
df = pd.read_hdf(dbFolder + chunkFile).fillna('').astype(str)
except KeyError as e:
df = pd.DataFrame()
logmessage('Note: {} does not have any data.'.format(chunkFile))

newdf = pd.concat([df,xdf],ignore_index=True)
logmessage('{} new entries for id {} added. Now writing to {}.'.format( str( len(xdf) ),value, chunkFile ))

newdf.to_hdf(dbFolder+chunkFile, 'df', format='table', mode='w', complevel=1)
except FileNotFoundError as e:
df = pd.DataFrame()
logmessage('Note: {} does not exist yet, so we will likely create it.'.format(chunkFile))

# next 3 lines to be done in either case
newdf = pd.concat([df,xdf],ignore_index=True)
logmessage('{} new entries for id {} added. Now writing to {}.'.format( str( len(xdf) ),value, chunkFile ))
newdf.to_hdf(dbFolder+chunkFile, 'df', format='table', mode='w', complevel=1)


# add entry for new trip in stop_times_lookup.json
Expand Down Expand Up @@ -1277,19 +1277,43 @@ def findChunk(value, tablename="stop_times"):
lookupJSONFile = chunkRules[tablename]['lookup']
print('lookup for {}: {}'.format(tablename,lookupJSONFile))

with open(dbFolder + lookupJSONFile) as f:
table_lookup = json.load(f)
try:
with open(dbFolder + lookupJSONFile) as f:
table_lookup = json.load(f)
except FileNotFoundError:
logmessage(dbFolder + lookupJSONFile,'not found so creating it as empty json.')
with open(dbFolder + lookupJSONFile, 'a') as f:
f.write('{}')
table_lookup = {}

chunkFile = table_lookup.get(value,None)
print('Found chunk for id {}: {}'.format(value,chunkFile) )
return chunkFile


def smallestChunk(prefix):
filenames = [f for f in os.listdir(dbFolder) if f.lower().endswith('.h5') and ( f.lower().startswith(prefix) ) and os.path.isfile(os.path.join(dbFolder, f))]
def smallestChunk(prefix, maxSizeMB=configRules.get('MAX_CHUNK_SIZE',1) ):
'''
Find the smallest chunk of a chunked table, by size, as the place to put a new set of records in.
This helps to balance the sizes out over time.
In case ALL the chunks are too heavy (set some limit), then christen the next chunk.
'''
# filenames = [f for f in os.listdir(dbFolder) if f.lower().endswith('.h5') and ( f.lower().startswith(prefix) ) and os.path.isfile(os.path.join(dbFolder, f))]
filenames = findFiles(dbFolder, prefix=prefix, chunk='y')

chunkFile = sorted(filenames, key=lambda filename: os.path.getsize(os.path.join(dbFolder, filename)))[0]
if not len(filenames):
# no chunks present, return tablename_1
return prefix + '_1.h5'

# chunkFile = sorted(filenames, key=lambda filename: os.path.getsize(os.path.join(dbFolder, filename)))[0]
# sort the list of files by size and pick first one. From https://stackoverflow.com/a/44215088/4355695

sizeList = [ os.path.getsize(os.path.join(dbFolder, filename))/(2**20) for filename in filenames ]
# get sizes in MB
if min(sizeList) < maxSizeMB:
chunkFile = filenames[ sizeList.index(min(sizeList)) ]
else:
nextChunkNum = len(filenames) + 1
chunkFile = '{}_{}.h5'.format(prefix, nextChunkNum)
logmessage('smallestChunk: All chunks for {} too big, lets create a new one, {}'.format(prefix,chunkFile))
return chunkFile


Expand Down Expand Up @@ -1373,8 +1397,7 @@ def readChunkTableDB(tablename, key, value):
chunksHaving = []
for i,h5File in enumerate( findFiles(dbFolder, ext='.h5', prefix=tablename, chunk='y') ):
try:
df = pd.read_hdf(dbFolder+h5File,'df')\
.fillna('').astype(str)\
df = pd.read_hdf(dbFolder+h5File).fillna('').astype(str)\
.query( '{}=="{}"'.format(key,value) )
except KeyError as e:
df = pd.DataFrame()
Expand Down Expand Up @@ -1459,7 +1482,7 @@ def deleteInTable(tablename, key, value, action="delete"):
returnMessage = ''
for h5File in h5Files:
try:
df = pd.read_hdf(dbFolder + h5File,'df').fillna('').astype(str)
df = pd.read_hdf(dbFolder + h5File).fillna('').astype(str)
except KeyError as e:
df = pd.DataFrame()
logmessage('Note: {} does not have any data.'.format(h5File))
Expand Down Expand Up @@ -1487,6 +1510,11 @@ def deleteInTable(tablename, key, value, action="delete"):
logmessage(returnMessage)
return returnMessage

##########################
# Redo the delete functions to accommodate multiple values.
# For pandas it doesn't make any difference whether its one value or multiple

##########################

def sequenceDel(column,value):
content = []
Expand Down
2 changes: 1 addition & 1 deletion README.md
Expand Up @@ -5,7 +5,7 @@

A browser-based user interface for creating, editing, exporting of static GTFS (General Transit Feed Specification Reference) feeds for a public transit authority.

**Development Status** : V 3.0.0
**Development Status** : V 3.0.1
~And Windows binary is available too now.~ Download from [Releases page](https://github.com/WRI-Cities/static-GTFS-manager/releases/).

## Intro
Expand Down
3 changes: 3 additions & 0 deletions config/config.json
@@ -0,0 +1,3 @@
{
"MAX_CHUNK_SIZE":1
}
8 changes: 7 additions & 1 deletion js/hyd-config.json → config/hmrl-config.json
Expand Up @@ -35,6 +35,12 @@

"fares": [10,15,25,30,35,40,45,50,55,60],

"transfers": [["AME2","AME4"]]
"transfers": [["AME2","AME4"]],

"feed_info": {
"feed_publisher_name": "Telangana Open Data Portal",
"feed_publisher_url": "http://www.data.telangana.gov.in/",
"feed_lang": "en"
}

}
2 changes: 1 addition & 1 deletion config/settings.js
@@ -1,6 +1,6 @@
// from commonfuncs.js

const VERSION = 'v3.0.0';
const VERSION = 'v3.0.1';
const APIpath = 'API/';
const CURRENCY = 'INR';
const route_type_options = {0:"0-Tram, Streetcar, Light rail", 1:"1-Subway, Metro", 2:"2-Rail", 3:"3-Bus",4:"4-Ferry" };
Expand Down
Binary file modified db/agency.h5
Binary file not shown.
Binary file modified db/calendar.h5
Binary file not shown.
Binary file removed db/calendar_dates.h5
Binary file not shown.
Binary file added db/feed_info.h5
Binary file not shown.
Binary file modified db/routes.h5
Binary file not shown.
129 changes: 128 additions & 1 deletion db/sequence.json
@@ -1 +1,128 @@
{}
{
"_default": {
"1": {
"0": [
"LINGA1",
"CHAND1",
"GANGA1",
"HUDAC1",
"DEEPT1",
"MYTHR1",
"ALLWY2",
"ALLWY3",
"MIYAP1",
"MIYAP2",
"MIYAP3",
"HYDER1",
"HYDER2",
"NIZAM1",
"JNTU01",
"KPHBV1",
"KPHBM3",
"VIVEK3",
"SUMIT2",
"KUKAT8",
"SANGE2",
"KUKAT9",
"MOOSA3",
"BHARA3",
"PREMN2",
"ERRAG6",
"ERRAG5",
"RYTHU1",
"ESIBU3",
"SRNAG3",
"MAITH1",
"AMEER4",
"PANJA2",
"NIMS01",
"ERRAM1",
"KHAIR1",
"KHAIR2",
"CHINT2",
"LAKDI1",
"ASSEM1",
"NIZAM2",
"ABIDS1",
"ABIDS2",
"BANKS1",
"SHANK1",
"KOTIW1",
"CHADE1",
"NALGO1",
"MALAK1",
"SUPER1",
"MOOSA4",
"DILSU1",
"DILSU2"
],
"1": [
"DILSU2",
"DILSU3",
"MOOSA5",
"SUPER2",
"MALAK2",
"YASHO1",
"NALGO2",
"CHADE2",
"KOTIW2",
"SHANK2",
"KOTIB1",
"KOTIB2",
"KOTI01",
"MOZAM1",
"GPOBU1",
"ABIDS2",
"NAMPA1",
"NAMPA2",
"NAMPA3",
"PUBLI1",
"NAMPA4",
"ASSEM2",
"LAKDI2",
"CHINT2",
"CHINT3",
"KHAIR3",
"EENAD1",
"ERRAM2",
"NIMSB1",
"PANJA3",
"AMEER5",
"MAITR1",
"SRNAG4",
"ESIBU4",
"RYTHU2",
"ERRAG7",
"ERRAG2",
"ALLWY1",
"ERRAG8",
"PREMN1",
"BHARA4",
"MOOSA6",
"SANGE1",
"KUKAT10",
"KUKAT11",
"SUMIT3",
"VIVEK4",
"FOODW1",
"KPHBM4",
"JNTUB1",
"NIZAM3",
"VASAN1",
"HYDER3",
"HYDER4",
"MIYAP4",
"ALLWY4",
"MADIN1",
"DEEPT2",
"HUDAC2",
"GANGA2",
"CHAND2",
"LINGA2"
],
"route_id": "218D",
"shape0": "218D_1",
"shape1": "218D_0"
}
}
}
Binary file modified db/shapes_1.h5
Binary file not shown.
Binary file removed db/shapes_2.h5
Binary file not shown.
Binary file removed db/shapes_3.h5
Binary file not shown.

0 comments on commit b44fcb5

Please sign in to comment.