changing dataset so that in computes dependencies

UDST · Jun 2, 2014 · fc17e0c · fc17e0c
1 parent ef93bbf
commit fc17e0c
Show file tree

Hide file tree

Showing 3 changed files with 84 additions and 13 deletions.
diff --git a/urbansim/urbanchoice/mnl.py b/urbansim/urbanchoice/mnl.py
@@ -22,6 +22,8 @@
 def mnl_probs(data, beta, numalts):
     clamp = data.typ == 'numpy'
     utilities = beta.multiply(data)
+    if numalts == 0:
+        raise Exception("Number of alternatives is zero")
     utilities.reshape(numalts, utilities.size() / numalts)
 
     exponentiated_utility = utilities.exp(inplace=True)

diff --git a/urbansim/utils/dataset.py b/urbansim/utils/dataset.py
@@ -1,13 +1,10 @@
-import copy
-import os
-import time
 import warnings
 
 import numpy as np
 import pandas as pd
-import simplejson
 
 from urbansim.utils import misc
+reindex = misc.reindex
 
 warnings.filterwarnings('ignore', category=pd.io.pytables.PerformanceWarning)
 
@@ -23,6 +20,14 @@ def __init__(self, filename, scenario="baseline"):
         # not to load multiple times form disk
         self.d = {}
         self.scenario = scenario
+        self.clear_views()
+        self.debug = False
+
+    def view(self, name):
+        return self.views[name]
+
+    def clear_views(self):
+        self.views = {}
 
     def list_tbls(self):
         return list(set([x[1:] for x in self.store.keys()] + self.d.keys()))
@@ -69,12 +74,6 @@ def __getattr__(self, name):
             raise Exception()
         return self.fetch(name)
 
-    def compute_range(self, attr, dist, agg=np.sum):
-        travel_data = self.fetch('travel_data').reset_index(level=1)
-        travel_data = travel_data[travel_data.travel_time < dist]
-        travel_data["attr"] = attr[travel_data.to_zone_id].values
-        return travel_data.groupby(level=0).attr.apply(agg)
-
     def add_xy(self, df):
 
         assert 'building_id' in df
@@ -89,9 +88,10 @@ def add_xy(self, df):
         return df
 
 
-class CustomDataFrame:
-    def __init__(self):
-        pass
+class CustomDataFrame(object):
+    def __init__(self, dset, name):
+        self.dset = dset
+        self.name = name
 
     def build_df(obj, flds=None):
         if flds is None:
@@ -100,3 +100,45 @@ def build_df(obj, flds=None):
         df = pd.concat(columns, axis=1)
         df.columns = flds
         return df
+
+    def __getattr__(self, name):
+        try:
+            return super(CustomDataFrame, "__getattr__")(name)
+        except:
+            df = self.dset.fetch(self.name)
+            attr = getattr(df, name)
+            if self.dset.debug is True:
+                print "Returning primary attribute: %s of %s" % (name, self.name)
+            return attr
+
+
+def variable(func):
+    @property
+    def _decorator(self):
+        if hasattr(self, "_property_cache") and func in self._property_cache:
+            val = self._property_cache[func]
+            if self.dset.debug is True:
+                print "Returning from cache: %s of %s" % \
+                      (func.__name__, self.name)
+            return val
+
+        s = func(self)
+
+        if self.dset.debug is True:
+            print "Computing: %s of %s as" % (func.__name__, self.name)
+            print "    %s" % s
+        try:
+            r = eval(s, globals(), self.dset.views)
+        except Exception as e:
+            print "Variable computation failed!!"
+            print s
+            print e, "\n\n\n"
+
+        r[np.isinf(r)] = np.nan
+
+        if not hasattr(self, "_property_cache"):
+            self._property_cache = {}
+        self._property_cache[func] = r
+
+        return r
+    return _decorator
diff --git a/urbansim/utils/misc.py b/urbansim/utils/misc.py
@@ -89,6 +89,33 @@ def get_run_number():
     return num
 
 
+def compute_range(travel_data, attr, travel_time_attr, dist, agg=np.sum):
+    """
+    Compute a zone-based accessibility query using the urbansim format
+    travel data dataframe.
+
+    Parameters
+    ----------
+    travel_data : dataframe
+        The dataframe of urbansim format travel data.  Has from_zone_id as
+        first index, to_zone_id as second index, and different impedances
+        between zones as columns.
+    attr : series
+        The attr to aggregate.  Should be indexed by zone_id and the values
+        will be aggregated.
+    travel_time_attr : string
+        The column name in travel_data to use as the impedance.
+    dist : float
+        The max distance to aggregate up to
+    agg : function, optional, np.sum by default
+        The numpy function to use for aggregation
+    """
+    travel_data = travel_data.reset_index(level=1)
+    travel_data = travel_data[travel_data[travel_time_attr] < dist]
+    travel_data["attr"] = attr[travel_data.to_zone_id].values
+    return travel_data.groupby(level=0).attr.apply(agg)
+
+
 def reindex(series1, series2):
     """
     This reindexes the first series by the second series.  This is an extremely