### Lecture 16 Index Object

In [1]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

import webbrowser as wb

In [3]:
my_ser = Series([1,2,3,4], index=["A", "B", "C", "D"])
my_ser

A    1
B    2
C    3
D    4
dtype: int64

In [4]:
# index

In [5]:
my_ind = my_ser.index
my_ind

Index(['A', 'B', 'C', 'D'], dtype='object')

In [6]:
my_ind[2]

'C'

In [7]:
my_ind[2:]

Index(['C', 'D'], dtype='object')

In [8]:
my_ind[:2]

Index(['A', 'B'], dtype='object')

In [9]:
# change index value
#indexes do NOT support mutable operations
my_ind[0]='Z'

TypeError: Index does not support mutable operations

### Lecture 17 - Reindexing, i.e. change the value of index

In [10]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

# import random normal distribution
from numpy.random import randn

In [12]:
ser1 = Series([1,2,3,4], index=["A", "B", "C", "D"])
ser1

A    1
B    2
C    3
D    4
dtype: int64

In [13]:
ser2 = ser1.reindex(["A", "B", "C", "D", "E", "F"])

In [14]:
ser2

A     1
B     2
C     3
D     4
E   NaN
F   NaN
dtype: float64

In [15]:
ser3 = ser1.reindex(["AA", "B", "C", "D", "E", "F"])

In [16]:
ser3

AA   NaN
B      2
C      3
D      4
E    NaN
F    NaN
dtype: float64

In [17]:
# fill new index with values
ser3 = ser1.reindex(["AA", "B", "C", "D", "E", "F"], fill_value=0)
ser3

AA    0
B     2
C     3
D     4
E     0
F     0
dtype: int64

In [2]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

# import random normal distribution
from numpy.random import randn

In [3]:
ser1 = Series([1,2,3,4], index=["A", "B", "C", "D"])
ser1

A    1
B    2
C    3
D    4
dtype: int64

In [4]:
# fill new index with values
ser3 = ser1.reindex(["AA", "B", "C", "D", "E", "F"], fill_value=0)
ser3

AA    0
B     2
C     3
D     4
E     0
F     0
dtype: int64

In [5]:
# check whether reindex affects ser1  ---- answer is NO
ser1

A    1
B    2
C    3
D    4
dtype: int64

In [6]:
# specific methods for filling value
ser2 = Series(["USA", "China", "France"], index=[0,5,10])
ser2

0        USA
5      China
10    France
dtype: object

In [11]:
# in Python 2, it is ranger = range(15)
ranger = list(range(15))
ranger

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]

In [13]:
# user forward filling method
ser2.reindex(ranger, method='ffill')


0        USA
1        USA
2        USA
3        USA
4        USA
5      China
6      China
7      China
8      China
9      China
10    France
11    France
12    France
13    France
14    France
dtype: object

In [14]:
ranger2 = np.arange(15)
ranger2

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [15]:
# check if we can use array as parameter --- YES
ser2.reindex(ranger2, method='ffill')

0        USA
1        USA
2        USA
3        USA
4        USA
5      China
6      China
7      China
8      China
9      China
10    France
11    France
12    France
13    France
14    France
dtype: object

In [17]:
# multiple ways to construct DataFrame
# 1 - pd.read_clipboard()
# 2 - DataFrame(dframe, columns=[])   --- add columns
# 3 - DataFrame(dictionary)
# 4 - DataFrame(2-dim-arrays, index, columns)
dframe = DataFrame(randn(25).reshape(5,5))
dframe

Unnamed: 0,0,1,2,3,4
0,-0.160711,0.549386,-0.37576,-0.16867,-0.636109
1,-2.021025,0.43425,-1.186656,-0.426919,1.151511
2,1.433093,-0.859887,0.799327,0.394835,-0.966597
3,0.873041,-0.574617,0.673871,0.899021,0.682228
4,0.787318,-1.263933,-0.6329,1.689204,-1.181865


In [18]:
dframe = DataFrame(randn(25).reshape(5,5), index=['A', 'B', 'D', 'E', 'F'], columns=["col1", "col2", "col3", "col4", "col5"])
dframe

Unnamed: 0,col1,col2,col3,col4,col5
A,0.383501,-0.726242,1.63131,-0.849568,-1.472689
B,-0.111999,2.845942,0.626573,0.191707,-0.393648
D,1.447825,-0.628054,-1.792582,-0.716669,0.921618
E,0.591149,0.172461,1.241516,1.828207,0.269823
F,0.797148,-0.289446,-1.243458,-1.881539,-0.126401


In [19]:
# reindex on DF
# add row C
dframe2 = dframe.reindex(["A", "B", "C", "D", "E", "F"])
dframe2

Unnamed: 0,col1,col2,col3,col4,col5
A,0.383501,-0.726242,1.63131,-0.849568,-1.472689
B,-0.111999,2.845942,0.626573,0.191707,-0.393648
C,,,,,
D,1.447825,-0.628054,-1.792582,-0.716669,0.921618
E,0.591149,0.172461,1.241516,1.828207,0.269823
F,0.797148,-0.289446,-1.243458,-1.881539,-0.126401


In [20]:
# reindex column -- has to specify parameter columns
new_columns = ["col1", "col2", "col3", "col4", "col5", "col6"]
dframe2 = dframe.reindex(columns=new_columns, fill_value=1)
dframe2

Unnamed: 0,col1,col2,col3,col4,col5,col6
A,0.383501,-0.726242,1.63131,-0.849568,-1.472689,1
B,-0.111999,2.845942,0.626573,0.191707,-0.393648,1
D,1.447825,-0.628054,-1.792582,-0.716669,0.921618,1
E,0.591149,0.172461,1.241516,1.828207,0.269823,1
F,0.797148,-0.289446,-1.243458,-1.881539,-0.126401,1


In [21]:
# use .ix to do quick reindex
dframe.ix[['A','B','C','D','E','F'], new_columns]

Unnamed: 0,col1,col2,col3,col4,col5,col6
A,0.383501,-0.726242,1.63131,-0.849568,-1.472689,
B,-0.111999,2.845942,0.626573,0.191707,-0.393648,
C,,,,,,
D,1.447825,-0.628054,-1.792582,-0.716669,0.921618,
E,0.591149,0.172461,1.241516,1.828207,0.269823,
F,0.797148,-0.289446,-1.243458,-1.881539,-0.126401,


### Lecture 18 - Drop Entry

In [22]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

# import random normal distribution
from numpy.random import randn

In [23]:
ser1 = Series(np.arange(3), index=['a', 'b', 'c'])
ser1

a    0
b    1
c    2
dtype: int32

In [24]:
# drop entry
ser1.drop('b')

a    0
c    2
dtype: int32

In [25]:
dframe1 = DataFrame(np.arange(9).reshape(3,3), index=['SF', 'LA', 'NYC'], columns=["pop", "size", "year"])
dframe1

Unnamed: 0,pop,size,year
SF,0,1,2
LA,3,4,5
NYC,6,7,8


In [26]:
#drop a row from DF
dframe2 = dframe1.drop('LA')
dframe1

Unnamed: 0,pop,size,year
SF,0,1,2
LA,3,4,5
NYC,6,7,8


In [27]:
dframe2

Unnamed: 0,pop,size,year
SF,0,1,2
NYC,6,7,8


In [28]:
# drop a column --- has to specifiy axis#
dframe1.drop('year', axis=1)

Unnamed: 0,pop,size
SF,0,1
LA,3,4
NYC,6,7


In [29]:
# try to see if DF is mutable --- YES
dframe1 = dframe1.drop('year', axis=1)
dframe1

Unnamed: 0,pop,size
SF,0,1
LA,3,4
NYC,6,7
