### Lists

In [2]:
import pickle 
student_names = ['Alice','Bob','Elena','Jane','Kyle']

In [3]:
with open('student_file.pkl', 'wb') as f:  # open a text file
    pickle.dump(student_names, f) # serialize the list

In [4]:
f.close()

In [5]:
with open('student_file.pkl', 'rb') as f:

    student_names_loaded = pickle.load(f) # deserialize using load()
    print(student_names_loaded) # print student names

['Alice', 'Bob', 'Elena', 'Jane', 'Kyle']


In [6]:
type(student_names_loaded)

list

### Numpy Arrays

In [7]:
import numpy as np
numpy_array = np.ones((10,10)) # 10x10 array

In [8]:
with open('my_array.pkl','wb') as f:
    pickle.dump(numpy_array, f)

In [9]:
with open('my_array.pkl','rb') as f:
    unpickled_array = pickle.load(f)
    print('Array shape: '+str(unpickled_array.shape))
    print('Data type: '+str(type(unpickled_array)))

Array shape: (10, 10)
Data type: <class 'numpy.ndarray'>


### pandas DataFrame

In [10]:
import pandas as pd
import numpy as np

# Set random seed
np.random.seed(123)

data = {'Column1': np.random.randint(0, 10, size=100000),
        'Column2': np.random.choice(['A', 'B', 'C'], size=100000),
        'Column3': np.random.rand(100000)}


# Create Pandas dataframe
df = pd.DataFrame(data)

In [14]:
import time

start = time.time()

df.to_csv('my_pandas_dataframe.csv')

end = time.time()
print(end - start)

0.31750035285949707


In [12]:
start = time.time()


df.to_pickle("my_pandas_dataframe.pkl")


end = time.time()
print(end - start)

0.01307988166809082


In [15]:
# Reading the csv file into Pandas:

start1 = time.time()
df_csv = pd.read_csv("my_pandas_dataframe.csv")
end1 = time.time()
print("Time taken to read the csv file: " + str(end1 - start1) + "\n")

# Reading the Pickle file into Pandas:

start2 = time.time()
df_pkl = pd.read_pickle("my_pandas_dataframe.pkl")
end2 = time.time()
print("Time taken to read the Pickle file: " + str(end2 - start2))

Time taken to read the csv file: 0.05666756629943848

Time taken to read the Pickle file: 0.0042002201080322266


### Dictionaries

In [16]:
students = {
  'Student 1': {
        'Name': "Alice", 'Age' :10, 'Grade':4,
    },
   
    'Student 2': {
        'Name':'Bob', 'Age':11, 'Grade':5
    },
   
    'Student 3': {
        'Name':'Elena', 'Age':14, 'Grade':8
    }
}

In [17]:
# serialize the dictionary to a pickle file

with open("student_dict.pkl", "wb") as f:
    pickle.dump(students, f)
   
# deserialize the dictionary and print it out

with open("student_dict.pkl", "rb") as f:
    deserialized_dict = pickle.load(f)
    print(deserialized_dict)

{'Student 1': {'Name': 'Alice', 'Age': 10, 'Grade': 4}, 'Student 2': {'Name': 'Bob', 'Age': 11, 'Grade': 5}, 'Student 3': {'Name': 'Elena', 'Age': 14, 'Grade': 8}}


In [18]:
type(deserialized_dict)

dict

In [19]:
print(
    "The first student's name is "
    + deserialized_dict["Student 1"]["Name"]
    + " and she is "
    + (str(deserialized_dict["Student 1"]["Age"]))
    + " years old."
)

The first student's name is Alice and she is 10 years old.


### Serializing Machine Learning Models with Pickle

In [20]:
from sklearn.linear_model import LinearRegression
from sklearn.datasets import make_regression

# generate regression dataset
X, y = make_regression(n_samples=100, n_features=3, noise=0.1, random_state=1)

# train regression model
linear_model = LinearRegression()
linear_model.fit(X, y)

In [21]:
# summary of the model
print('Model intercept :', linear_model.intercept_)
print('Model coefficients : ', linear_model.coef_)
print('Model score : ', linear_model.score(X, y))

Model intercept : -0.010109549594691458
Model coefficients :  [44.18793068 98.97389468 58.17121618]
Model score :  0.9999993081899219


In [22]:
with open("linear_regression.pkl", "wb") as f:
    pickle.dump(linear_model, f)

In [23]:
with open("linear_regression.pkl", "rb") as f:
    unpickled_linear_model = pickle.load(f)

In [24]:
# summary of the model
print('Model intercept :', unpickled_linear_model.intercept_)
print('Model coefficients : ', unpickled_linear_model.coef_)
print('Model score : ', unpickled_linear_model.score(X, y))

Model intercept : -0.010109549594691458
Model coefficients :  [44.18793068 98.97389468 58.17121618]
Model score :  0.9999993081899219
