# Intro to Pandas

## Reindex

In [17]:
# Basic Imports
import numpy as np
from pandas import Series, DataFrame
import pandas as pd
import numpy as np

#### Small review about Index Objects

In [8]:
# Create a Series with proper index
my_ser = Series([1,2,3,4],index=['A','B','C','D'])

# Get the index
my_index = my_ser.index

In [9]:
# Show
my_index

Index(['A', 'B', 'C', 'D'], dtype='object')

In [10]:
# Grab an index value
my_index[1]

'B'

In [11]:
# Slice by the index
my_index[1:]

Index(['B', 'C', 'D'], dtype='object')

### What happen if we try to change the index value?

In [12]:
my_index[1] = 'M'

TypeError: Index does not support mutable operations

### Important!

As you can see the Series's Index is not mutable by using the **"[ ]"** index method. We will learn ho to reindex a Data Frame in the following code. 

## Reindex

### The .reindex() method

In [13]:
#Lets create a new series
ser1 = Series([1,2,3,4],index=['A','B','C','D'])

In [14]:
#Show
ser1

A    1
B    2
C    3
D    4
dtype: int64

In [15]:
#Call reindex to rearrange the data to a new index

ser2 = ser1.reindex(data= ['A','B','C','D','E','F'])

In [16]:
#Show
ser2

A    1.0
B    2.0
C    3.0
D    4.0
E    NaN
F    NaN
dtype: float64

**Note:** As you can see, pandas manage new index values by inserting 'NaN to every new index added.

#### Reindexing and filling new values

In [39]:
# We can alos fill in values for new indexes
ser2.reindex(index = ['A','B','C','D','E','F','G'], fill_value = 0)

A    1.0
B    2.0
C    3.0
D    4.0
E    NaN
F    NaN
G    0.0
dtype: float64

In [41]:
#Using a particular method for filling values
ser3 = Series(data= ['Spain','Germany','France'], index=[0,5,10])

#Show
ser3

0       Spain
5     Germany
10     France
dtype: object

In [50]:
#Can use a forward fill for interploating values vetween indices 

ser3.reindex(index = range(15)) #, method='ffill')

0       Spain
1         NaN
2         NaN
3         NaN
4         NaN
5     Germany
6         NaN
7         NaN
8         NaN
9         NaN
10     France
11        NaN
12        NaN
13        NaN
14        NaN
dtype: object

### Reindexing rows, columns or both

In [58]:
# Import numpy random.randn
from numpy.random import randn
randn_data = randn(16).reshape((4,4))

#Lets make a datafram ewith some random values
d_frame1 = DataFrame(data = randn_data, index=['A','B','D','E'], columns=['col1','col2','col3','col4'])

#Show
d_frame1

Unnamed: 0,col1,col2,col3,col4
A,0.007688,-1.822659,-0.493726,2.865824
B,0.185577,-0.086747,-0.379661,-1.557917
D,1.10534,0.514553,0.518896,-0.295623
E,0.994789,0.401426,-0.735166,0.497225


#### Adding an index value that doesn't exist

In [55]:
#Notice we forgot 'C' , lets reindex it into dframe

d_frame2 = dframe.reindex(['A','B','C','D','E'])
d_frame2

Unnamed: 0,col1,col2,col3,col4
A,1.564413,-0.610172,-0.822442,-0.657845
B,1.662627,-0.082555,1.106353,0.566515
C,,,,
D,0.144643,-0.569737,-1.025016,0.180731
E,1.149377,0.69016,0.099953,1.594594


#### Reindex column

Adding a column that doesn't exist

In [52]:
#Can also explicitly reindex columns
new_columns = ['col1','col2','col3','col4','col5']

d_frame2.reindex(columns = new_columns)

Unnamed: 0,col1,col2,col3,col4,col5
A,1.564413,-0.610172,-0.822442,-0.657845,
B,1.662627,-0.082555,1.106353,0.566515,
C,,,,,
D,0.144643,-0.569737,-1.025016,0.180731,
E,1.149377,0.69016,0.099953,1.594594,


**Note:** Here we keep all the columns name (from 1 to 4) in the same string formatt!

#### What happen if we rename all columns?

In [61]:
# Here we change the string case 
upper_columns = ['COL1','COL2','COL3','COL4','COL5']

d_frame2.reindex(columns = upper_columns)

Unnamed: 0,COL1,COL2,COL3,COL4,COL5
A,,,,,
B,,,,,
C,,,,,
D,,,,,
E,,,,,


In [59]:
#Reindex quickly using the label-indexing with iloc (we'll see this more in the future)

#Show original
d_frame1

Unnamed: 0,col1,col2,col3,col4
A,0.007688,-1.822659,-0.493726,2.865824
B,0.185577,-0.086747,-0.379661,-1.557917
D,1.10534,0.514553,0.518896,-0.295623
E,0.994789,0.401426,-0.735166,0.497225


In [62]:
new_columns = ['col1','col2','col3','col4','col5']

d_frame1.reindex(index= ['A','B','D','E'], columns = new_columns)

Unnamed: 0,col1,col2,col3,col4,col5
A,0.007688,-1.822659,-0.493726,2.865824,
B,0.185577,-0.086747,-0.379661,-1.557917,
D,1.10534,0.514553,0.518896,-0.295623,
E,0.994789,0.401426,-0.735166,0.497225,


### How to rename columns?

#### The .rename() method

In [64]:
# show the original DF
d_frame1

Unnamed: 0,col1,col2,col3,col4
A,0.007688,-1.822659,-0.493726,2.865824
B,0.185577,-0.086747,-0.379661,-1.557917
D,1.10534,0.514553,0.518896,-0.295623
E,0.994789,0.401426,-0.735166,0.497225


In [63]:
# The following method is NOT in-place

d_frame1.rename(columns= {'col1':'A1', 'col2':'A2'})

Unnamed: 0,A1,A2,col3,col4
A,0.007688,-1.822659,-0.493726,2.865824
B,0.185577,-0.086747,-0.379661,-1.557917
D,1.10534,0.514553,0.518896,-0.295623
E,0.994789,0.401426,-0.735166,0.497225


In [65]:
d_frame1

Unnamed: 0,col1,col2,col3,col4
A,0.007688,-1.822659,-0.493726,2.865824
B,0.185577,-0.086747,-0.379661,-1.557917
D,1.10534,0.514553,0.518896,-0.295623
E,0.994789,0.401426,-0.735166,0.497225


# Let's do some exercise!