In [2]:
import numpy as np
import pandas as pd

In [3]:
from numpy.random import randn
np.random.seed(101) # here seed=101, is the starting seed, Random number algorithm will generate from this seed onwards.

In [4]:
df = pd.DataFrame(randn(5,4), ['A','B','C','D','E'],['W','X','Y','Z'])
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


### Here W,X,Y,Z are columns and A,B,C,D and E are rows
### W, X, Y, Z all are Panda Series and all are sharing the same indexes - A, B, C, D and E

## Pandas Indexing

In [5]:
df['W'] # Here, output is Pandas Series

A    2.706850
B    0.651118
C   -2.018168
D    0.188695
E    0.190794
Name: W, dtype: float64

In [6]:
type(df['W']) #type in-built function gives the object type.

pandas.core.series.Series

In [7]:
type(df)

pandas.core.frame.DataFrame

In [8]:
df.W # Not preferable way, as we sometimes get confuse with the available method names

A    2.706850
B    0.651118
C   -2.018168
D    0.188695
E    0.190794
Name: W, dtype: float64

In [9]:
df[['W','Z']] # List of Columns, output is the DataFrame

Unnamed: 0,W,Z
A,2.70685,0.503826
B,0.651118,0.605965
C,-2.018168,-0.589001
D,0.188695,0.955057
E,0.190794,0.683509


In [10]:
df['new'] = df['W'] + df['Y'] # Inserting new Columns in the DataFrame

In [11]:
df

Unnamed: 0,W,X,Y,Z,new
A,2.70685,0.628133,0.907969,0.503826,3.614819
B,0.651118,-0.319318,-0.848077,0.605965,-0.196959
C,-2.018168,0.740122,0.528813,-0.589001,-1.489355
D,0.188695,-0.758872,-0.933237,0.955057,-0.744542
E,0.190794,1.978757,2.605967,0.683509,2.796762


In [12]:
df.drop('new',axis=1) #drop method will remove the column name

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [13]:
df.drop('new',axis=1) #axis=1, will point to the column but it will not drop in-place

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [14]:
df

Unnamed: 0,W,X,Y,Z,new
A,2.70685,0.628133,0.907969,0.503826,3.614819
B,0.651118,-0.319318,-0.848077,0.605965,-0.196959
C,-2.018168,0.740122,0.528813,-0.589001,-1.489355
D,0.188695,-0.758872,-0.933237,0.955057,-0.744542
E,0.190794,1.978757,2.605967,0.683509,2.796762


In [15]:
df.drop('new',axis=1,inplace=True) # inplace=True will reflect the changes

In [16]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [17]:
df.drop('E',axis=0) #axis=0, point to the rows

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057


In [18]:
df.shape #Output is tuple, 5 are rows and 4 are columns

(5, 4)

## Selecting the rows

In [19]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [20]:
df.loc['A'] #Here, output is Pandas Series

W    2.706850
X    0.628133
Y    0.907969
Z    0.503826
Name: A, dtype: float64

In [21]:
df.iloc[2] #Index-Based Location

W   -2.018168
X    0.740122
Y    0.528813
Z   -0.589001
Name: C, dtype: float64

In [22]:
df.loc['C']

W   -2.018168
X    0.740122
Y    0.528813
Z   -0.589001
Name: C, dtype: float64

In [23]:
df.loc['B','Y'] #Row,Column Notation

-0.8480769834036315

In [24]:
df.loc[['A','B'],['W','Y']] #Pandas Sub DataFrame

Unnamed: 0,W,Y
A,2.70685,0.907969
B,0.651118,-0.848077


### operations

In [25]:
df = pd.DataFrame({'col1':[1,2,3,4],
                  'col2':[444,555,666,444],
                  'col3':['abc','def','dhi','xyz']})