## Video 3.2 - Essential Operations with Data Frames

In [None]:
import pandas as pd

### Loading data from files

In [None]:
%cat data.csv

In [None]:
data = pd.read_csv('data.csv')
data

In [None]:
%cat movie.json

In [None]:
data = pd.read_json('movie.json')
data

In [None]:
%cat movies-90s.jsonl

In [None]:
data = pd.read_json('movies-90s.jsonl', lines=True)
data

### Reindexing

Reindexing is the process of creating a new object with the data conformed to a new index

In [None]:
data = pd.Series([3, 1, 2], index=['b', 'a', 'd'])
data

In [None]:
new_data = data.reindex(['a', 'b', 'c', 'd'])
new_data

### Applying a function



In [None]:
data = pd.DataFrame([[4, 36, 1], [9, 25, 16]],
                    columns=['A', 'B', 'C'],
                    index=['Red', 'Blue'])
data

In [None]:
import numpy as np

np.sqrt(data)

In [None]:
def double_up(x):
    return x * 2

data.applymap(double_up)

In [None]:
data

In [None]:
def difference(x):
    return x.max() - x.min()

data.apply(difference)

In [None]:
data.apply(difference, axis=1)

### Sorting

In [None]:
data

In [None]:
data.sort_index()  # sort by row labels, ascending

In [None]:
data.sort_index(axis=1,           # sort by column labels
                ascending=False)  # descending

In [None]:
data

In [None]:
data.sort_values(by='B')

In [None]:
data.sort_values(by='Blue', axis=1)

### Handling missing data

In [None]:
data = pd.Series([1, 2, np.nan, 3, np.nan])

data

In [None]:
data == None

In [None]:
data.isnull()

In [None]:
data.notnull()

#### Filtering out missing data

In [None]:
data.dropna()

In [None]:
data[data.notnull()]

#### Filling in missing data

In [None]:
data.fillna(0)

In [None]:
data.fillna(data.mean())

In [None]:
data.fillna({2: 100, 4: 500})

#### Notice

All the functions discussed here return a *new* pandas object

If we need to change the object in place, we need `inplace=True`