# Python Object Serialization


## Pickle 

Pickle is a very powerful library to serialize and deserialize python objects.

Use Cases :

* Save state of program to disk
* Transfer data over network connection
* Store python objects in database



In [32]:
import pickle
import pandas as ps


Note - Python 3 automatically will try to use cpickle if available


In [33]:
# Helper functions
def savePickle(object, filename, protocol = pickle.HIGHEST_PROTOCOL):
    pickle.dump(object, open(filename, "wb",),protocol)

def loadPickle(filename):
    return pickle.load(open(filename, "rb"))

## Dictionary Sample

In [34]:
myDict = {'Mammal': { 'Feline':['Cat','Tiger'],'Canine':['Dog','Wolf']},
          'Reptile':{'Snakes':['Rattler','King Cobra','Mamba'],'Lizard':['Monitor','Gila Monster']  }
         }

print(myDict)
print(myDict["Mammal"])


{'Mammal': {'Feline': ['Cat', 'Tiger'], 'Canine': ['Dog', 'Wolf']}, 'Reptile': {'Snakes': ['Rattler', 'King Cobra', 'Mamba'], 'Lizard': ['Monitor', 'Gila Monster']}}
{'Feline': ['Cat', 'Tiger'], 'Canine': ['Dog', 'Wolf']}


In [35]:
savePickle(myDict,"MyDictionary.p")


%ls
%cat "MyDictionary.p"

ERROR:root:Line magic function `%cat` not found.


 Volume in drive G is New Volume
 Volume Serial Number is 22C9-4C20

 Directory of G:\dev\python\PySerialization

11/16/2017  08:17 PM    <DIR>          .
11/16/2017  08:17 PM    <DIR>          ..
11/12/2017  06:23 PM             1,258 .gitignore
11/12/2017  06:26 PM    <DIR>          .ipynb_checkpoints
11/16/2017  08:17 PM           551,417 DecissionTree.p
11/12/2017  06:23 PM            11,558 LICENSE
11/16/2017  08:18 PM               178 MyDictionary.p
11/16/2017  08:16 PM            11,144 Python Serialization.ipynb
11/12/2017  06:23 PM                69 README.md
               6 File(s)        575,624 bytes
               3 Dir(s)  688,629,551,104 bytes free


In [36]:
# Delete variable from Memory
del(myDict)
#print(myDict)

In [37]:
newDict = loadPickle("MyDictionary.p")

print(newDict)
print(newDict["Mammal"])


{'Mammal': {'Feline': ['Cat', 'Tiger'], 'Canine': ['Dog', 'Wolf']}, 'Reptile': {'Snakes': ['Rattler', 'King Cobra', 'Mamba'], 'Lizard': ['Monitor', 'Gila Monster']}}
{'Feline': ['Cat', 'Tiger'], 'Canine': ['Dog', 'Wolf']}


# Save model 

In [38]:
from sklearn import datasets
from sklearn import tree
from sklearn.cross_validation import train_test_split
from sklearn.cross_validation import  cross_val_score

diabetes = datasets.load_diabetes()
X = diabetes.data
y = diabetes.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

print ("* Training sample size : ", len(X_train))
print ("* Validation sample size : ", len(X_test))

dt = tree.DecisionTreeClassifier(criterion='gini',min_samples_split=5, random_state=1024)
dt.fit(X_train, y_train)

print(dt)
savePickle(dt,"DecissionTree.p")


* Training sample size :  353
* Validation sample size :  89
DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=5,
            min_weight_fraction_leaf=0.0, presort=False, random_state=1024,
            splitter='best')


## Load the Model 

In [39]:
newDT = loadPickle("DecissionTree.p")

print(newDT)


DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=5,
            min_weight_fraction_leaf=0.0, presort=False, random_state=1024,
            splitter='best')


## References


 * https://docs.python.org/3/library/pickle.html
 * [Scikit Learn Model persistance](http://scikit-learn.org/stable/modules/model_persistence.html)
 * [Understanding Python pickling and how to use it securely](https://www.synopsys.com/blogs/software-security/python-pickling/)
 
### Other options
 
  * [MessagePack](https://msgpack.org/)
  * [Json Seralizer](https://docs.python.org/2/library/json.html)
  * [CloudPickle](https://github.com/cloudpipe/cloudpickle)
 