## JSON

In [None]:
import json
value = {'a': True, 'b': 3}

In [None]:
encoded = json.dumps(value)
encoded

In [None]:
decoded = json.loads(encoded)
decoded

In [None]:
decoded == value

## NumPy
* Written in C, but exposed through Python
* Extremely fast and memory efficient for numeric processing
* Primary feature is the ndarray - a machine-typed n-dimensional array

In [None]:
import numpy as np
x = np.array([1, 2, 3, 4])
x

In [None]:
print(
    x.sum(),
    x.mean()
)

In [None]:
print(
    x * 2, # arithmetic on ndarrays is element-wise
    x + 2,
    x > 2 # same for boolean expressions
)

In [None]:
# Advanced indexing with integer or boolean lists / arrays
print (
    x[[1, 2]],
    x[[False, True, True, False]],
    x[x > 2]
)

In [None]:
# Multidimensional array

mx = np.array([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]])
mx.shape

In [None]:
# Array slicing works for each dimension

for nda in [
    mx[0, 0],
    mx[:, 0],
    mx[:, 1:3],
    mx[:, 1:],
    mx[:, :3],
]:
    print(nda)

In [None]:
# np.arange() works like the Python builtin range()

np.arange(10).reshape((2, 5))

In [None]:
np.zeros((2, 5))

In [None]:
np.ones((2, 5))

## Pandas
* Also written in C and exposed in Python
* Builds upon NumPy and extends it in a lot of great ways
* Biggest features are data frames / series, transformations, and file handling

In [None]:
import pandas as pd

# Valid URL schemes include http, ftp, s3, gs, and file.
df = pd.read_csv('file://localhost/home/jovyan/work/penguins.csv')
df

In [None]:
# Columns can be accessed as series. Series behave like NumPy arrays.
df['bill_length_mm']

In [None]:
df['bill_length_mm'].describe()

In [None]:
df['bill_length_mm'][df['species'] == 'Adelie'].describe()

In [None]:
df['bill_diff'] = df['bill_length_mm'] - df['bill_depth_mm']
df

In [None]:
df[['species', 'bill_length_mm', 'bill_depth_mm']].groupby('species').mean()

##  matplotlib

* Python's dominant 2D and 3D visualization platform
* Flexible
* Fairly accessible to new users, extremely powerful for pro users

In [None]:
import matplotlib as mpl
import matplotlib.pyplot as plt

plt.hist(df['bill_length_mm'])
plt.show()

In [None]:
species = df['species'].astype('category').cat

for s in species.categories:
    plt.title(s)
    plt.hist(df['bill_length_mm'][df['species'] == s])
    plt.show()

In [None]:
plt.scatter(df['bill_length_mm'], df['bill_depth_mm'])
plt.show()

In [None]:
plt.title('Bill Shape')
plt.xlabel('Bill length (mm)')
plt.ylabel('Bill depth (mm)')

scatter = plt.scatter(
    df['bill_length_mm'], 
    df['bill_depth_mm'], 
    c=species.codes
)

for i, label in enumerate(species.categories):
    plt.plot([], color=scatter.cmap(scatter.norm(i)), label=label)

plt.legend()
plt.grid(b=True, which='both', color='grey', linestyle='--')
plt.axis('equal')

plt.show()
plt.close()