alteryx · thehomebrewnerd · Jun 30, 2022 · Jun 23, 2022 · Jun 23, 2022 · Jun 23, 2022
diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
@@ -12,14 +12,15 @@ Future Release
         * Allow dfs kwargs to be passed into ``get_valid_primitives`` (:pr:`2157`)
     * Fixes
     * Changes
+        * Improve serialization and deserialization to reduce storage of duplicate primitive information (:pr:`2136`, :pr:`2127`, :pr:`2142`, :pr:`2144`)
         * Sort core requirements and test requirements in setup cfg (:pr:`2152`)
     * Documentation Changes
     * Testing Changes
         * Fix pandas warning and reduce dask .apply warnings (:pr:`2145`)
         * Pin graphviz version used in windows tests (:pr:`2159`)
 
     Thanks to the following people for contributing to this release:
-    :user:`gsheni`, :user:`ozzieD`, :user:`rwedge`, :user:`sbadithe`, :user:`tamargrey`
+    :user:`gsheni`, :user:`ozzieD`, :user:`rwedge`, :user:`sbadithe`, :user:`tamargrey`, :user:`thehomebrewnerd`
 
 v1.10.0 June 23, 2022
 =====================

diff --git a/featuretools/feature_base/feature_base.py b/featuretools/feature_base/feature_base.py
@@ -10,7 +10,6 @@
     PrimitiveBase,
     TransformPrimitive,
 )
-from featuretools.primitives.utils import serialize_primitive
 from featuretools.utils.wrangle import _check_time_against_column, _check_timedelta
 
 _ES_REF = {}
@@ -70,13 +69,12 @@ def __getitem__(self, key):
         return FeatureOutputSlice(self, key)
 
     @classmethod
-    def from_dictionary(
-        cls, arguments, entityset, dependencies, primitives_deserializer
-    ):
+    def from_dictionary(cls, arguments, entityset, dependencies, primitive):
         raise NotImplementedError("Must define from_dictionary on FeatureBase subclass")
 
     def rename(self, name):
-        """Rename Feature, returns copy"""
+        """Rename Feature, returns copy. Will reset any custom feature column names
+        to their default value."""
         feature_copy = self.copy()
         feature_copy._name = name
         feature_copy._names = None
@@ -103,6 +101,33 @@ def get_feature_names(self):
                     ]
         return self._names
 
+    def set_feature_names(self, names):
+        """Set new values for the feature column names, overriding the default values.
+        Number of names provided much match the number of output columns defined for
+        the feature. Only works for features that have more than one output column. Use
+        ``Feature.rename`` to change the column name for single output features.
+
+        Args:
+            names (list[str]): List of names to use for the output feature columns. Provided
+                names must be unique.
+        """
+        if self.number_output_features == 1:
+            raise ValueError(
+                "The set_feature_names can only be used on features that have more than one output column."
+            )
+
+        num_new_names = len(names)
+        if self.number_output_features != num_new_names:
+            raise ValueError(
+                "Number of names provided must match the number of output features:"
+                f" {num_new_names} name(s) provided, {self.number_output_features} expected."
+            )
+
+        if len(set(names)) != num_new_names:
+            raise ValueError("Provided output feature names must be unique.")
+
+        self._names = names
+
     def get_function(self, **kwargs):
         return self.primitive.get_function(**kwargs)
 
@@ -423,9 +448,7 @@ def __init__(self, column, name=None):
         )
 
     @classmethod
-    def from_dictionary(
-        cls, arguments, entityset, dependencies, primitives_deserializer
-    ):
+    def from_dictionary(cls, arguments, entityset, dependencies, primitive):
         dataframe_name = arguments["dataframe_name"]
         column_name = arguments["column_name"]
         column = entityset[dataframe_name].ww[column_name]
@@ -516,9 +539,7 @@ def _handle_relationship(self, entityset, child_dataframe_name, relationship):
         return relationship
 
     @classmethod
-    def from_dictionary(
-        cls, arguments, entityset, dependencies, primitives_deserializer
-    ):
+    def from_dictionary(cls, arguments, entityset, dependencies, primitive):
         base_feature = dependencies[arguments["base_feature"]]
         relationship = Relationship.from_dictionary(
             arguments["relationship"], entityset
@@ -683,9 +704,7 @@ def _handle_relationship_path(
         return relationship_path, path_is_unique
 
     @classmethod
-    def from_dictionary(
-        cls, arguments, entityset, dependencies, primitives_deserializer
-    ):
+    def from_dictionary(cls, arguments, entityset, dependencies, primitive):
         base_features = [dependencies[name] for name in arguments["base_features"]]
         relationship_path = [
             Relationship.from_dictionary(r, entityset)
@@ -694,10 +713,6 @@ def from_dictionary(
         parent_dataframe_name = relationship_path[0].parent_dataframe.ww.name
         relationship_path = RelationshipPath([(False, r) for r in relationship_path])
 
-        primitive = primitives_deserializer.deserialize_primitive(
-            arguments["primitive"]
-        )
-
         use_previous_data = arguments["use_previous"]
         use_previous = use_previous_data and Timedelta.from_dictionary(
             use_previous_data
@@ -706,7 +721,7 @@ def from_dictionary(
         where_name = arguments["where"]
         where = where_name and dependencies[where_name]
 
-        return cls(
+        feat = cls(
             base_features=base_features,
             parent_dataframe_name=parent_dataframe_name,
             primitive=primitive,
@@ -715,6 +730,8 @@ def from_dictionary(
             where=where,
             name=arguments["name"],
         )
+        feat._names = arguments.get("feature_names")
+        return feat
 
     def copy(self):
         return AggregationFeature(
@@ -759,14 +776,17 @@ def generate_names(self):
         )
 
     def get_arguments(self):
-        return {
+        arg_dict = {
             "name": self._name,
             "base_features": [feat.unique_name() for feat in self.base_features],
             "relationship_path": [r.to_dictionary() for _, r in self.relationship_path],
-            "primitive": serialize_primitive(self.primitive),
+            "primitive": self.primitive,
             "where": self.where and self.where.unique_name(),
             "use_previous": self.use_previous and self.use_previous.get_arguments(),
         }
+        if self._names:
+            arg_dict["feature_names"] = self._names
+        return arg_dict
 
     def relationship_path_name(self):
         if self._path_is_unique:
@@ -792,16 +812,13 @@ def __init__(self, base_features, primitive, name=None):
         )
 
     @classmethod
-    def from_dictionary(
-        cls, arguments, entityset, dependencies, primitives_deserializer
-    ):
+    def from_dictionary(cls, arguments, entityset, dependencies, primitive):
         base_features = [dependencies[name] for name in arguments["base_features"]]
-        primitive = primitives_deserializer.deserialize_primitive(
-            arguments["primitive"]
-        )
-        return cls(
+        feat = cls(
             base_features=base_features, primitive=primitive, name=arguments["name"]
         )
+        feat._names = arguments.get("feature_names")
+        return feat
 
     def copy(self):
         return TransformFeature(self.base_features, self.primitive)
@@ -817,11 +834,14 @@ def generate_names(self):
         )
 
     def get_arguments(self):
-        return {
+        arg_dict = {
             "name": self._name,
             "base_features": [feat.unique_name() for feat in self.base_features],
-            "primitive": serialize_primitive(self.primitive),
+            "primitive": self.primitive,
         }
+        if self._names:
+            arg_dict["feature_names"] = self._names
+        return arg_dict
 
 
 class GroupByTransformFeature(TransformFeature):
@@ -841,20 +861,17 @@ def __init__(self, base_features, primitive, groupby, name=None):
         )
 
     @classmethod
-    def from_dictionary(
-        cls, arguments, entityset, dependencies, primitives_deserializer
-    ):
+    def from_dictionary(cls, arguments, entityset, dependencies, primitive):
         base_features = [dependencies[name] for name in arguments["base_features"]]
-        primitive = primitives_deserializer.deserialize_primitive(
-            arguments["primitive"]
-        )
         groupby = dependencies[arguments["groupby"]]
-        return cls(
+        feat = cls(
             base_features=base_features,
             primitive=primitive,
             groupby=groupby,
             name=arguments["name"],
         )
+        feat._names = arguments.get("feature_names")
+        return feat
 
     def copy(self):
         # the groupby feature is appended to base_features in the __init__
@@ -883,12 +900,15 @@ def get_arguments(self):
             for feat in self.base_features
             if feat.unique_name() != self.groupby.unique_name()
         ]
-        return {
+        arg_dict = {
             "name": self._name,
             "base_features": feature_names,
-            "primitive": serialize_primitive(self.primitive),
+            "primitive": self.primitive,
             "groupby": self.groupby.unique_name(),
         }
+        if self._names:
+            arg_dict["feature_names"] = self._names
+        return arg_dict
 
 
 class Feature(object):
@@ -981,9 +1001,7 @@ def get_arguments(self):
         }
 
     @classmethod
-    def from_dictionary(
-        cls, arguments, entityset, dependencies, primitives_deserializer
-    ):
+    def from_dictionary(cls, arguments, entityset, dependencies, primitive):
         base_feature_name = arguments["base_feature"]
         base_feature = dependencies[base_feature_name]
         n = arguments["n"]

diff --git a/featuretools/feature_base/features_deserializer.py b/featuretools/feature_base/features_deserializer.py
@@ -46,14 +46,21 @@ def load_features(features, profile_name=None):
 
         .. code-block:: python
 
+            # Option 1
             filepath = os.path.join('/Home/features/', 'list.json')
-            ft.load_features(filepath)
+            features = ft.load_features(filepath)
 
-            f = open(filepath, 'r')
-            ft.load_features(f)
+            # Option 2
+            filepath = os.path.join('/Home/features/', 'list.json')
+            with open(filepath, 'r') as f:
+                features = ft.load_features(f)
+
+            # Option 3
+            filepath = os.path.join('/Home/features/', 'list.json')
+            with open(filepath, 'r') as :
+                feature_str = f.read()
+            features = ft.load_features(feature_str)
 
-            feature_str = f.read()
-            ft.load_features(feature_str)
 
     .. seealso::
         :func:`.save_features`
@@ -78,7 +85,12 @@ def __init__(self, features_dict):
         self._check_schema_version()
         self.entityset = deserialize_es(features_dict["entityset"])
         self._deserialized_features = {}  # name -> feature
-        self._primitives_deserializer = PrimitivesDeserializer()
+        primitive_deserializer = PrimitivesDeserializer()
+        primitive_definitions = features_dict["primitive_definitions"]
+        self._deserialized_primitives = {
+            k: primitive_deserializer.deserialize_primitive(v)
+            for k, v in primitive_definitions.items()
+        }
 
     @classmethod
     def load(cls, features, profile_name):
@@ -109,6 +121,10 @@ def _deserialize_feature(self, feature_name):
 
         feature_dict = self.features_dict["feature_definitions"][feature_name]
         dependencies_list = feature_dict["dependencies"]
+        primitive = None
+        primitive_id = feature_dict["arguments"].get("primitive")
+        if primitive_id is not None:
+            primitive = self._deserialized_primitives[primitive_id]
 
         # Collect dependencies into a dictionary of name -> feature.
         dependencies = {
@@ -122,9 +138,7 @@ def _deserialize_feature(self, feature_name):
             raise RuntimeError('Unrecognized feature type "%s"' % type)
 
         args = feature_dict["arguments"]
-        feature = cls.from_dictionary(
-            args, self.entityset, dependencies, self._primitives_deserializer
-        )
+        feature = cls.from_dictionary(args, self.entityset, dependencies, primitive)
 
         self._deserialized_features[feature_name] = feature
         return feature