v3.0.1

WRI-Cities · Oct 1, 2018 · b44fcb5 · b44fcb5
1 parent 3f29b4e
commit b44fcb5
Show file tree

Hide file tree

Showing 50 changed files with 16,893 additions and 15,854 deletions.
diff --git a/GTFSManager.py b/GTFSManager.py
@@ -39,6 +39,7 @@
 sequenceDBfile = 'db/sequence.json'
 passwordFile = 'pw/rsa_key.bin'
 chunkRulesFile = 'chunkRules.json'
+configFile = 'config.json'
 thisURL = ''
 
 debugMode = False # using this flag at various places to do or not do things based on whether we're in development or production
@@ -48,8 +49,11 @@
 # for checking imported ZIP against
 # to do: don't make this a HARD requirement. Simply logmessage about it.
 
+# load parameters from config folder
 with open(configFolder + chunkRulesFile) as f:
 	chunkRules = json.load(f)
+with open(configFolder + configFile) as f:
+	configRules = json.load(f)
 
 # create folders if they don't exist
 for folder in [uploadFolder, xmlFolder, logFolder, configFolder, dbFolder, exportFolder]:

diff --git a/GTFSserverfunctions.py b/GTFSserverfunctions.py
@@ -28,7 +28,7 @@ def exportGTFS (folder):
 		tablename = h5File[:-3] # remove last 3 chars, .h5
 
 		try:
-			df = pd.read_hdf(dbFolder + h5File,'df').fillna('').astype(str)
+			df = pd.read_hdf(dbFolder + h5File).fillna('').astype(str)
 		except KeyError as e:
 			df = pd.DataFrame()
 			logmessage('Note: {} does not have any data.'.format(h5File))
@@ -58,7 +58,7 @@ def exportGTFS (folder):
 		columnsList = set()
 		for count,h5File in enumerate(filenames):
 			try:
-				df = pd.read_hdf(dbFolder + h5File,'df',stop=0)
+				df = pd.read_hdf(dbFolder + h5File,stop=0)
 			except KeyError as e:
 				df = pd.DataFrame()
 				logmessage('Note: {} does not have any data.'.format(h5File))
@@ -76,7 +76,7 @@ def exportGTFS (folder):
 		for count,h5File in enumerate(filenames):
 			logmessage('Writing {} to csv'.format(h5File))
 			try:
-				df1 = pd.read_hdf(dbFolder + h5File,'df').fillna('').astype(str)
+				df1 = pd.read_hdf(dbFolder + h5File).fillna('').astype(str)
 			except KeyError as e:
 				df1 = pd.DataFrame()
 				logmessage('Note: {} does not have any data.'.format(h5File))
@@ -416,7 +416,7 @@ def readTableDB(tablename, key=None, value=None):
 			continue
 
 		try:
-			df = pd.read_hdf(dbFolder + h5File,'df').fillna('').astype(str)
+			df = pd.read_hdf(dbFolder + h5File).fillna('').astype(str)
 			# typecasting as str, keeping NA values blank ''
 		except KeyError as e:
 			df = pd.DataFrame()
@@ -478,15 +478,15 @@ def replaceTableDB(tablename, data, key=None, value=None):
 	elif ((key is not None) and (value is not None) ):
 		# remove entries matching the key and value
 		try:
-			df = pd.read_hdf(h5File,'df').fillna('').astype(str)
+			df = pd.read_hdf(dbFolder+h5File).fillna('').astype(str)
 		except KeyError as e:
 			df = pd.DataFrame()
 			logmessage('Note: {} does not have any data.'.format(h5File))
 		oldLen = len( df[ df[key] == str(value)])
 		df.query(key + ' != "' + str(value) + '"', inplace=True)
 
 		df3 = pd.concat([df,xdf], ignore_index=True)
-		df3.to_hdf(h5File, 'df', format='table', mode='w', complevel=1)
+		df3.to_hdf(dbFolder+h5File, 'df', format='table', mode='w', complevel=1)
 
 		logmessage('Replaced {} entries for {}={} with {} new entries in {}.'\
 			.format(oldLen,key,str(value),str(len(xdf)),tablename ) )
@@ -1084,7 +1084,7 @@ def replaceTableCell(h5File,column,valueFrom,valueTo):
 		return False
 
 	try:
-		df = pd.read_hdf(dbFolder + h5File,'df').fillna('').astype(str)
+		df = pd.read_hdf(dbFolder + h5File).fillna('').astype(str)
 	except KeyError as e:
 		df = pd.DataFrame()
 		logmessage('Note: {} does not have any data.'.format(h5File))
@@ -1216,7 +1216,7 @@ def replaceChunkyTableDB(xdf, value, tablename='stop_times'):
 	if chunkFile:
 		logmessage('Editing ' + chunkFile)
 		try:
-			df = pd.read_hdf(dbFolder + chunkFile,'df').fillna('').astype(str)
+			df = pd.read_hdf(dbFolder + chunkFile).fillna('').astype(str)
 		except KeyError as e:
 			df = pd.DataFrame()
 			logmessage('Note: {} does not have any data.'.format(chunkFile))
@@ -1232,23 +1232,23 @@ def replaceChunkyTableDB(xdf, value, tablename='stop_times'):
 		else:
 			logmessage('{} older entries for id {} removed.'.format( str( initLen - reducedLen ),value ))
 
-		newdf = pd.concat([df,xdf],ignore_index=True)
-		logmessage('{} new entries for id {} added. Now writing to {}.'.format( str( len(xdf) ),value, chunkFile ))
-		newdf.to_hdf(dbFolder+chunkFile, 'df', format='table', mode='w', complevel=1)
-
+
 	else:
 		# if the trip wasn't previously existing, take the smallest chunk and add in there.
 		chunkFile = smallestChunk(tablename)
 		try:
-			df = pd.read_hdf(dbFolder + chunkFile,'df').fillna('').astype(str)
+			df = pd.read_hdf(dbFolder + chunkFile).fillna('').astype(str)
 		except KeyError as e:
 			df = pd.DataFrame()
 			logmessage('Note: {} does not have any data.'.format(chunkFile))
-
-		newdf = pd.concat([df,xdf],ignore_index=True)
-		logmessage('{} new entries for id {} added. Now writing to {}.'.format( str( len(xdf) ),value, chunkFile ))
-
-		newdf.to_hdf(dbFolder+chunkFile, 'df', format='table', mode='w', complevel=1)
+		except FileNotFoundError as e:
+			df = pd.DataFrame()
+			logmessage('Note: {} does not exist yet, so we will likely create it.'.format(chunkFile))
+
+	# next 3 lines to be done in either case
+	newdf = pd.concat([df,xdf],ignore_index=True)
+	logmessage('{} new entries for id {} added. Now writing to {}.'.format( str( len(xdf) ),value, chunkFile ))
+	newdf.to_hdf(dbFolder+chunkFile, 'df', format='table', mode='w', complevel=1)
 
 
 	# add entry for new trip in stop_times_lookup.json
@@ -1277,19 +1277,43 @@ def findChunk(value, tablename="stop_times"):
 	lookupJSONFile = chunkRules[tablename]['lookup']
 	print('lookup for {}: {}'.format(tablename,lookupJSONFile))
 
-	with open(dbFolder + lookupJSONFile) as f:
-		table_lookup = json.load(f)
+	try:
+		with open(dbFolder + lookupJSONFile) as f:
+			table_lookup = json.load(f)
+	except FileNotFoundError:
+		logmessage(dbFolder + lookupJSONFile,'not found so creating it as empty json.')
+		with open(dbFolder + lookupJSONFile, 'a') as f:
+			f.write('{}')
+		table_lookup = {}
+
 	chunkFile = table_lookup.get(value,None)
 	print('Found chunk for id {}: {}'.format(value,chunkFile) )
 	return chunkFile
 
 
-def smallestChunk(prefix):
-	filenames = [f for f in os.listdir(dbFolder) if f.lower().endswith('.h5') and ( f.lower().startswith(prefix) ) and os.path.isfile(os.path.join(dbFolder, f))]
+def smallestChunk(prefix, maxSizeMB=configRules.get('MAX_CHUNK_SIZE',1) ):
+	'''
+	Find the smallest chunk of a chunked table, by size, as the place to put a new set of records in.
+	This helps to balance the sizes out over time.
+	In case ALL the chunks are too heavy (set some limit), then christen the next chunk.
+	'''
+	# filenames = [f for f in os.listdir(dbFolder) if f.lower().endswith('.h5') and ( f.lower().startswith(prefix) ) and os.path.isfile(os.path.join(dbFolder, f))]
+	filenames = findFiles(dbFolder, prefix=prefix, chunk='y')
 
-	chunkFile = sorted(filenames, key=lambda filename: os.path.getsize(os.path.join(dbFolder, filename)))[0]
+	if not len(filenames):
+		# no chunks present, return tablename_1
+		return prefix + '_1.h5'
+
+	# chunkFile = sorted(filenames, key=lambda filename: os.path.getsize(os.path.join(dbFolder, filename)))[0]
 	# sort the list of files by size and pick first one. From https://stackoverflow.com/a/44215088/4355695
-
+	sizeList = [ os.path.getsize(os.path.join(dbFolder, filename))/(2**20) for filename in filenames ]
+	# get sizes in MB
+	if min(sizeList) < maxSizeMB:
+		chunkFile = filenames[ sizeList.index(min(sizeList)) ]
+	else:
+		nextChunkNum = len(filenames) + 1
+		chunkFile = '{}_{}.h5'.format(prefix, nextChunkNum)
+		logmessage('smallestChunk: All chunks for {} too big, lets create a new one, {}'.format(prefix,chunkFile))
 	return chunkFile
 
 
@@ -1373,8 +1397,7 @@ def readChunkTableDB(tablename, key, value):
 	chunksHaving = []
 	for i,h5File in enumerate( findFiles(dbFolder, ext='.h5', prefix=tablename, chunk='y') ):
 		try:
-			df = pd.read_hdf(dbFolder+h5File,'df')\
-					.fillna('').astype(str)\
+			df = pd.read_hdf(dbFolder+h5File).fillna('').astype(str)\
 					.query( '{}=="{}"'.format(key,value) )
 		except KeyError as e:
 			df = pd.DataFrame()
@@ -1459,7 +1482,7 @@ def deleteInTable(tablename, key, value, action="delete"):
 	returnMessage = ''
 	for h5File in h5Files:
 		try:
-			df = pd.read_hdf(dbFolder + h5File,'df').fillna('').astype(str)
+			df = pd.read_hdf(dbFolder + h5File).fillna('').astype(str)
 		except KeyError as e:
 			df = pd.DataFrame()
 			logmessage('Note: {} does not have any data.'.format(h5File))
@@ -1487,6 +1510,11 @@ def deleteInTable(tablename, key, value, action="delete"):
 	logmessage(returnMessage)
 	return returnMessage
 
+##########################
+# Redo the delete functions to accommodate multiple values. 
+# For pandas it doesn't make any difference whether its one value or multiple
+
+##########################
 
 def sequenceDel(column,value):
 	content = []

diff --git a/README.md b/README.md
@@ -5,7 +5,7 @@
 
 A browser-based user interface for creating, editing, exporting of static GTFS (General Transit Feed Specification Reference) feeds for a public transit authority.
 
-**Development Status** : V 3.0.0   
+**Development Status** : V 3.0.1   
 ~And Windows binary is available too now.~ Download from [Releases page](https://github.com/WRI-Cities/static-GTFS-manager/releases/).
 
 ## Intro

diff --git a/config/config.json b/config/config.json
@@ -0,0 +1,3 @@
+{ 
+	"MAX_CHUNK_SIZE":1
+}
diff --git a/js/hyd-config.json → config/hmrl-config.json b/js/hyd-config.json → config/hmrl-config.json
@@ -35,6 +35,12 @@
 
 	"fares": [10,15,25,30,35,40,45,50,55,60],
 
-	"transfers": [["AME2","AME4"]]
+	"transfers": [["AME2","AME4"]],
+
+	"feed_info": {
+		"feed_publisher_name": "Telangana Open Data Portal",
+		"feed_publisher_url": "http://www.data.telangana.gov.in/",
+		"feed_lang": "en"
+	}
 
 }
diff --git a/config/settings.js b/config/settings.js
@@ -1,6 +1,6 @@
 // from commonfuncs.js
 
-const VERSION = 'v3.0.0';
+const VERSION = 'v3.0.1';
 const APIpath = 'API/';
 const CURRENCY = 'INR';
 const route_type_options = {0:"0-Tram, Streetcar, Light rail", 1:"1-Subway, Metro", 2:"2-Rail", 3:"3-Bus",4:"4-Ferry" };

diff --git a/db/agency.h5 b/db/agency.h5
diff --git a/db/calendar.h5 b/db/calendar.h5
diff --git a/db/calendar_dates.h5 b/db/calendar_dates.h5
diff --git a/db/feed_info.h5 b/db/feed_info.h5
diff --git a/db/routes.h5 b/db/routes.h5
diff --git a/db/sequence.json b/db/sequence.json
@@ -1 +1,128 @@
-{}
+{
+  "_default": {
+    "1": {
+      "0": [
+        "LINGA1",
+        "CHAND1",
+        "GANGA1",
+        "HUDAC1",
+        "DEEPT1",
+        "MYTHR1",
+        "ALLWY2",
+        "ALLWY3",
+        "MIYAP1",
+        "MIYAP2",
+        "MIYAP3",
+        "HYDER1",
+        "HYDER2",
+        "NIZAM1",
+        "JNTU01",
+        "KPHBV1",
+        "KPHBM3",
+        "VIVEK3",
+        "SUMIT2",
+        "KUKAT8",
+        "SANGE2",
+        "KUKAT9",
+        "MOOSA3",
+        "BHARA3",
+        "PREMN2",
+        "ERRAG6",
+        "ERRAG5",
+        "RYTHU1",
+        "ESIBU3",
+        "SRNAG3",
+        "MAITH1",
+        "AMEER4",
+        "PANJA2",
+        "NIMS01",
+        "ERRAM1",
+        "KHAIR1",
+        "KHAIR2",
+        "CHINT2",
+        "LAKDI1",
+        "ASSEM1",
+        "NIZAM2",
+        "ABIDS1",
+        "ABIDS2",
+        "BANKS1",
+        "SHANK1",
+        "KOTIW1",
+        "CHADE1",
+        "NALGO1",
+        "MALAK1",
+        "SUPER1",
+        "MOOSA4",
+        "DILSU1",
+        "DILSU2"
+      ],
+      "1": [
+        "DILSU2",
+        "DILSU3",
+        "MOOSA5",
+        "SUPER2",
+        "MALAK2",
+        "YASHO1",
+        "NALGO2",
+        "CHADE2",
+        "KOTIW2",
+        "SHANK2",
+        "KOTIB1",
+        "KOTIB2",
+        "KOTI01",
+        "MOZAM1",
+        "GPOBU1",
+        "ABIDS2",
+        "NAMPA1",
+        "NAMPA2",
+        "NAMPA3",
+        "PUBLI1",
+        "NAMPA4",
+        "ASSEM2",
+        "LAKDI2",
+        "CHINT2",
+        "CHINT3",
+        "KHAIR3",
+        "EENAD1",
+        "ERRAM2",
+        "NIMSB1",
+        "PANJA3",
+        "AMEER5",
+        "MAITR1",
+        "SRNAG4",
+        "ESIBU4",
+        "RYTHU2",
+        "ERRAG7",
+        "ERRAG2",
+        "ALLWY1",
+        "ERRAG8",
+        "PREMN1",
+        "BHARA4",
+        "MOOSA6",
+        "SANGE1",
+        "KUKAT10",
+        "KUKAT11",
+        "SUMIT3",
+        "VIVEK4",
+        "FOODW1",
+        "KPHBM4",
+        "JNTUB1",
+        "NIZAM3",
+        "VASAN1",
+        "HYDER3",
+        "HYDER4",
+        "MIYAP4",
+        "ALLWY4",
+        "MADIN1",
+        "DEEPT2",
+        "HUDAC2",
+        "GANGA2",
+        "CHAND2",
+        "LINGA2"
+      ],
+      "route_id": "218D",
+      "shape0": "218D_1",
+      "shape1": "218D_0"
+    }
+  }
+}
diff --git a/db/shapes_1.h5 b/db/shapes_1.h5
diff --git a/db/shapes_2.h5 b/db/shapes_2.h5
diff --git a/db/shapes_3.h5 b/db/shapes_3.h5