In [8]:
import numpy as np
import pandas as pd
#Index objects
#Index objects are responsible for the labels on the axes and name of axes

ser = pd.Series([0,5,3,8,4], index =["red","blue","yellow","white","green"])
ser.index

Index(['red', 'blue', 'yellow', 'white', 'green'], dtype='object')

In [9]:
#Indexes once declared cannot be changed; ensures secure sharing between
# the various data structures

In [10]:
#Methods on Index
print(ser.idxmin()) # maximum index
ser.idxmax() #minimum index

red


'white'

In [11]:
#Index with duplicate labels:
serd = pd.Series(range(6),index =["white","white","blue",
                                  "green","green","yellow"]) 
                                 #Duplicate indices
print(serd,"\n")
print(serd["white"])

white     0
white     1
blue      2
green     3
green     4
yellow    5
dtype: int64 

white    0
white    1
dtype: int64


In [12]:
print(serd.index.is_unique)

frame4 = pd.DataFrame(np.arange(0,16).reshape((4,4)),
                     index = ["red","blue","yellow","white"],
                     columns=["ball","pen","pencil","paper"])
print(frame4.index.is_unique)

False
True


In [13]:
#Other functonalities on index
#Reindexing

#Indexing once assigned cannot be changed; reindexing is a way to overcome
ser  = pd.Series([1,2,3,4],index=["one","two","three","four"])
print(ser)

ser.reindex(["three","four","five","one"]) #Reindex creates a new series
              # object  with the values of previous Series rearranged
              # according to the new sequence of labels
              # In this operation: Can re-order, add and delete indexes

one      1
two      2
three    3
four     4
dtype: int64


three    3.0
four     4.0
five     NaN
one      1.0
dtype: float64

In [14]:
ser3 = pd.Series([1,5,6,3], index =[0,3,5,6]) # A common task would be to 
                            #interpolate missing values
print(ser3)

ser3.reindex(range(6), method = "ffill") #to achieve interpolation of
                #complete sequence of numbers, use reindex with ffill option
    #indices that were not present were added  and the value was set to 
    # the value of lowest given index within interpolated indices

0    1
3    5
5    6
6    3
dtype: int64


0    1
1    1
2    1
3    5
4    5
5    6
dtype: int64

In [15]:
ser3 = pd.Series([1,5,6,3,2], index =[0,3,5,6,9]) # A common task would be to 
                            #interpolate missing values
ser3.reindex(range(9), method = "bfill") #fill with value of higher known
                            #index for interpolation

0    1
1    5
2    5
3    5
4    6
5    6
6    3
7    2
8    2
dtype: int64

In [18]:
#Samething can be done for DataFrames

#create a dictionary object
data = {'color': ['blue','green',"yellow","red","white"],
       "object": ["ball",'pen',"pencil","paper","mug"],
       "price":[1.2,1.0,0.6,0.9,1.7]}
frame = pd.DataFrame(data) #pass the dicitionary objects to a dataframe
print(frame)

frame.reindex(range(5), method = "ffill",columns = ["color", "price",
                                                   "new", "object"])
# In dataFrames, we can rearrage rows and columns or both. 
# Addition of new row(index) or column is done and since we have missing
# values for "new" column NaN is added

    color  object  price
0    blue    ball    1.2
1   green     pen    1.0
2  yellow  pencil    0.6
3     red   paper    0.9
4   white     mug    1.7


Unnamed: 0,color,price,new,object
0,blue,1.2,,ball
1,green,1.0,,pen
2,yellow,0.6,,pencil
3,red,0.9,,paper
4,white,1.7,,mug


In [26]:
#Dropping Indices

ser = pd.Series(np.arange(4.), index = ["red","blue","yellow","white"])
print(ser,"\n")
print(ser.drop("yellow"),"\n")
print(ser.drop(["blue","white"]))


red       0.0
blue      1.0
yellow    2.0
white     3.0
dtype: float64 

red      0.0
blue     1.0
white    3.0
dtype: float64 

red       0.0
yellow    2.0
dtype: float64


In [28]:
#Dropping indices for DataFrame
frame = pd.DataFrame(np.arange(16).reshape((4,4)), index = ["red","blue",
            "yellow","white"], columns=["ball","pen","pencil","paper"])
print(frame,"\n")
frame.drop(["blue","yellow"]) #multiple argument can be passed in array

        ball  pen  pencil  paper
red        0    1       2      3
blue       4    5       6      7
yellow     8    9      10     11
white     12   13      14     15 



Unnamed: 0,ball,pen,pencil,paper
red,0,1,2,3
white,12,13,14,15


In [30]:
#deleting columns from frame
frame.drop(['pen',"pencil"], axis =1) #axis =1 for dropping row.

Unnamed: 0,ball,paper
red,0,3
blue,4,7
yellow,8,11
white,12,15


In [31]:
#Arithmetic and Data Alignment
# Pandas can perform alignment of indexes coming from different data struct

s1 = pd.Series([3,2,5,1],["white","yellow","green","blue"])
s2 = pd.Series([1,4,7,2,1],["white","yellow","black","blue","brown"])

s1+s2

black     NaN
blue      3.0
brown     NaN
green     NaN
white     4.0
yellow    6.0
dtype: float64

In [34]:
frame1 = pd.DataFrame(np.arange(16).reshape((4,4)),index = ["red","blue",
            "yellow","white"], columns = ["ball","pen","pencil","paper"])

frame2 = pd.DataFrame(np.arange(12).reshape((4,3)),index = ["blue","green",
            "white","yellow"], columns = ["mug","pen","ball"])
print(frame1,"\n")
print(frame2,"\n")

frame1+ frame2 #Math indices and columns. Only values that have entries
# for the same column and index name in both frames have the + operation
# performed upon

        ball  pen  pencil  paper
red        0    1       2      3
blue       4    5       6      7
yellow     8    9      10     11
white     12   13      14     15 

        mug  pen  ball
blue      0    1     2
green     3    4     5
white     6    7     8
yellow    9   10    11 



Unnamed: 0,ball,mug,paper,pen,pencil
blue,6.0,,,6.0,
green,,,,,
red,,,,,
white,20.0,,,20.0,
yellow,19.0,,,19.0,
