From 1e5b80119b5a1e2a81654d170bd3d410f6017ca7 Mon Sep 17 00:00:00 2001 From: Davies Liu Date: Fri, 1 Aug 2014 00:55:36 -0700 Subject: [PATCH] improve cache of classes --- python/pyspark/sql.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/python/pyspark/sql.py b/python/pyspark/sql.py index 1d35853c7841c..d72782ff08214 100644 --- a/python/pyspark/sql.py +++ b/python/pyspark/sql.py @@ -674,10 +674,15 @@ def _restore_object(dataType, obj): # use id(dataType) as key to speed up lookup in dict # Because of batched pickling, dataType will be the # same object in mose cases. - cls = _cached_cls.get(id(dataType)) + k = id(dataType) + cls = _cached_cls.get(k) if cls is None: - cls = _create_cls(dataType) - _cached_cls[id(dataType)] = cls + # use dataType as key to avoid create multiple class + cls = _cached_cls.get(dataType) + if cls is None: + cls = _create_cls(dataType) + _cached_cls[dataType] = cls + _cached_cls[k] = cls return cls(obj)