# Pandas

In [1]:
import numpy as np

In [2]:
narr = np.random.rand(5,4)

In [3]:
narr

array([[0.25958032, 0.34748582, 0.86983714, 0.22604212],
       [0.5832231 , 0.71345308, 0.86861694, 0.03489178],
       [0.81080577, 0.94396327, 0.80004208, 0.84172727],
       [0.84022353, 0.85870278, 0.46947311, 0.88936809],
       [0.32951002, 0.07032083, 0.99302295, 0.02606537]])

In [4]:
row_list = 'A B C D E'.split()
col_list = 'W X Y Z'.split()

In [5]:
row_list

['A', 'B', 'C', 'D', 'E']

In [6]:
col_list

['W', 'X', 'Y', 'Z']

In [7]:
import pandas as pd

In [8]:
df = pd.DataFrame(data=narr, index=row_list, columns=col_list)

In [9]:
df

Unnamed: 0,W,X,Y,Z
A,0.25958,0.347486,0.869837,0.226042
B,0.583223,0.713453,0.868617,0.034892
C,0.810806,0.943963,0.800042,0.841727
D,0.840224,0.858703,0.469473,0.889368
E,0.32951,0.070321,0.993023,0.026065


In [10]:
#SERIES 
#-> single row or column of a dataframe is a series
#-> represented vertically

In [11]:
df['W'] #Series

A    0.259580
B    0.583223
C    0.810806
D    0.840224
E    0.329510
Name: W, dtype: float64

In [12]:
type(df['W'])

pandas.core.series.Series

In [13]:
df[['W','X']] #dataframe

Unnamed: 0,W,X
A,0.25958,0.347486
B,0.583223,0.713453
C,0.810806,0.943963
D,0.840224,0.858703
E,0.32951,0.070321


In [14]:
df.loc['A'] #to fetch single row value -> series

W    0.259580
X    0.347486
Y    0.869837
Z    0.226042
Name: A, dtype: float64

In [15]:
type(df.loc['A'] )

pandas.core.series.Series

In [16]:
df.loc[['A','B']] #dataframe

Unnamed: 0,W,X,Y,Z
A,0.25958,0.347486,0.869837,0.226042
B,0.583223,0.713453,0.868617,0.034892


In [17]:
#select multiple rows and columns

In [18]:
df.loc[['A','B']][['W','X']]

Unnamed: 0,W,X
A,0.25958,0.347486
B,0.583223,0.713453


In [19]:
#select multiple rows and columns using df.loc[]
df.loc[['A','B'],['W','X']]

Unnamed: 0,W,X
A,0.25958,0.347486
B,0.583223,0.713453


In [20]:
#select multiple rows and columns using index

In [21]:
df.iloc[:3,:3]

Unnamed: 0,W,X,Y
A,0.25958,0.347486,0.869837
B,0.583223,0.713453,0.868617
C,0.810806,0.943963,0.800042


In [22]:
#add a new column to dataframe

In [23]:
df['new'] = [1,2,3,4,5]

In [24]:
df

Unnamed: 0,W,X,Y,Z,new
A,0.25958,0.347486,0.869837,0.226042,1
B,0.583223,0.713453,0.868617,0.034892,2
C,0.810806,0.943963,0.800042,0.841727,3
D,0.840224,0.858703,0.469473,0.889368,4
E,0.32951,0.070321,0.993023,0.026065,5


In [25]:
#drop a column from dataframe

In [26]:
df.drop('new')

KeyError: "['new'] not found in axis"

In [27]:
df.drop('new', axis=1)

Unnamed: 0,W,X,Y,Z
A,0.25958,0.347486,0.869837,0.226042
B,0.583223,0.713453,0.868617,0.034892
C,0.810806,0.943963,0.800042,0.841727
D,0.840224,0.858703,0.469473,0.889368
E,0.32951,0.070321,0.993023,0.026065


In [28]:
df

Unnamed: 0,W,X,Y,Z,new
A,0.25958,0.347486,0.869837,0.226042,1
B,0.583223,0.713453,0.868617,0.034892,2
C,0.810806,0.943963,0.800042,0.841727,3
D,0.840224,0.858703,0.469473,0.889368,4
E,0.32951,0.070321,0.993023,0.026065,5


In [29]:
df.drop('new',axis=1, inplace=True)

In [30]:
df

Unnamed: 0,W,X,Y,Z
A,0.25958,0.347486,0.869837,0.226042
B,0.583223,0.713453,0.868617,0.034892
C,0.810806,0.943963,0.800042,0.841727
D,0.840224,0.858703,0.469473,0.889368
E,0.32951,0.070321,0.993023,0.026065


In [31]:
#RESET INDEX

In [32]:
df.reset_index()

Unnamed: 0,index,W,X,Y,Z
0,A,0.25958,0.347486,0.869837,0.226042
1,B,0.583223,0.713453,0.868617,0.034892
2,C,0.810806,0.943963,0.800042,0.841727
3,D,0.840224,0.858703,0.469473,0.889368
4,E,0.32951,0.070321,0.993023,0.026065


In [33]:
#permanently reset the index

In [34]:
df.reset_index(inplace=True)

In [35]:
df

Unnamed: 0,index,W,X,Y,Z
0,A,0.25958,0.347486,0.869837,0.226042
1,B,0.583223,0.713453,0.868617,0.034892
2,C,0.810806,0.943963,0.800042,0.841727
3,D,0.840224,0.858703,0.469473,0.889368
4,E,0.32951,0.070321,0.993023,0.026065


In [36]:
df.reset_index(inplace=True)

In [37]:
df

Unnamed: 0,level_0,index,W,X,Y,Z
0,0,A,0.25958,0.347486,0.869837,0.226042
1,1,B,0.583223,0.713453,0.868617,0.034892
2,2,C,0.810806,0.943963,0.800042,0.841727
3,3,D,0.840224,0.858703,0.469473,0.889368
4,4,E,0.32951,0.070321,0.993023,0.026065


In [38]:
df.reset_index(inplace=True) #maximum twice alloed to reset the index in dataframe
df

ValueError: cannot insert level_0, already exists

In [39]:
#SET INDEX

In [40]:
df.set_index('Z', inplace=True)

In [41]:
df

Unnamed: 0_level_0,level_0,index,W,X,Y
Z,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0.226042,0,A,0.25958,0.347486,0.869837
0.034892,1,B,0.583223,0.713453,0.868617
0.841727,2,C,0.810806,0.943963,0.800042
0.889368,3,D,0.840224,0.858703,0.469473
0.026065,4,E,0.32951,0.070321,0.993023
