In [3]:
import numpy as np
import pandas as pd
from numpy.random import randn
np.random.seed(101) ## Seed means that every one will get same random numbers, so that we can match course data

In [4]:
df = pd.DataFrame(data = randn(5,4), index=["A", "B", "C", "D", "E"], columns=["W", "X", "Y", "Z"])

In [7]:
df
## see a dataframe is bunch of series we read earlier. all the W,X,Y,Z have A,B,C,D as keys

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [8]:
df["W"] ## see how it is printed as a sereis.

A    2.706850
B    0.651118
C   -2.018168
D    0.188695
E    0.190794
Name: W, dtype: float64

In [9]:
df["Z"]

A    0.503826
B    0.605965
C   -0.589001
D    0.955057
E    0.683509
Name: Z, dtype: float64

In [10]:
## checking type 
type(df["Z"])

pandas.core.series.Series

In [11]:
type(df)

pandas.core.frame.DataFrame

In [12]:
df[["W", "Z"]] ## A dataframe with two series

Unnamed: 0,W,Z
A,2.70685,0.503826
B,0.651118,0.605965
C,-2.018168,-0.589001
D,0.188695,0.955057
E,0.190794,0.683509


In [13]:
## Creating a new column
df["new_col"] = df["W"] + df["Z"]
df

Unnamed: 0,W,X,Y,Z,new_col
A,2.70685,0.628133,0.907969,0.503826,3.210676
B,0.651118,-0.319318,-0.848077,0.605965,1.257083
C,-2.018168,0.740122,0.528813,-0.589001,-2.607169
D,0.188695,-0.758872,-0.933237,0.955057,1.143752
E,0.190794,1.978757,2.605967,0.683509,0.874303


In [14]:
df["dropable_col"] = df["W"] * 100  ## Adding a new column just to test delete col
df

Unnamed: 0,W,X,Y,Z,new_col,dropable_col
A,2.70685,0.628133,0.907969,0.503826,3.210676,270.684984
B,0.651118,-0.319318,-0.848077,0.605965,1.257083,65.111795
C,-2.018168,0.740122,0.528813,-0.589001,-2.607169,-201.816824
D,0.188695,-0.758872,-0.933237,0.955057,1.143752,18.869531
E,0.190794,1.978757,2.605967,0.683509,0.874303,19.079432


In [15]:
## Droping a column
# df.drop("dropable_col")  ## only writing this gives error ((KeyError: "['dropable_col'] not found in axis")) 
## by default axis is 0 which means the rows . So we have to mention columns by giving axis as 1

df.drop("dropable_col", axis=1)



Unnamed: 0,W,X,Y,Z,new_col
A,2.70685,0.628133,0.907969,0.503826,3.210676
B,0.651118,-0.319318,-0.848077,0.605965,1.257083
C,-2.018168,0.740122,0.528813,-0.589001,-2.607169
D,0.188695,-0.758872,-0.933237,0.955057,1.143752
E,0.190794,1.978757,2.605967,0.683509,0.874303


In [16]:
df ## See how the original table still has the dropable_col

Unnamed: 0,W,X,Y,Z,new_col,dropable_col
A,2.70685,0.628133,0.907969,0.503826,3.210676,270.684984
B,0.651118,-0.319318,-0.848077,0.605965,1.257083,65.111795
C,-2.018168,0.740122,0.528813,-0.589001,-2.607169,-201.816824
D,0.188695,-0.758872,-0.933237,0.955057,1.143752,18.869531
E,0.190794,1.978757,2.605967,0.683509,0.874303,19.079432


In [17]:
## we need to specify (inplace=True). This is a security measure of pandas such that we dont accidently loose our data.
df.drop("dropable_col", axis =1, inplace=True)

In [18]:
df

Unnamed: 0,W,X,Y,Z,new_col
A,2.70685,0.628133,0.907969,0.503826,3.210676
B,0.651118,-0.319318,-0.848077,0.605965,1.257083
C,-2.018168,0.740122,0.528813,-0.589001,-2.607169
D,0.188695,-0.758872,-0.933237,0.955057,1.143752
E,0.190794,1.978757,2.605967,0.683509,0.874303


In [19]:
## Removing row
df.drop("E", axis=0)

Unnamed: 0,W,X,Y,Z,new_col
A,2.70685,0.628133,0.907969,0.503826,3.210676
B,0.651118,-0.319318,-0.848077,0.605965,1.257083
C,-2.018168,0.740122,0.528813,-0.589001,-2.607169
D,0.188695,-0.758872,-0.933237,0.955057,1.143752


In [20]:
##ROWS
df


Unnamed: 0,W,X,Y,Z,new_col
A,2.70685,0.628133,0.907969,0.503826,3.210676
B,0.651118,-0.319318,-0.848077,0.605965,1.257083
C,-2.018168,0.740122,0.528813,-0.589001,-2.607169
D,0.188695,-0.758872,-0.933237,0.955057,1.143752
E,0.190794,1.978757,2.605967,0.683509,0.874303


In [21]:
## SELECTING A ROW IN PANDAS   (Label based index)
df.loc["A"]  ## Check the result. It is also in the form of a series. Hence its final that both rows and columns are in the form of series. 

W          2.706850
X          0.628133
Y          0.907969
Z          0.503826
new_col    3.210676
Name: A, dtype: float64

In [22]:
## selecting Rows as Indexed based location 0, 1, 2, 3 ....
df.iloc[2]

W         -2.018168
X          0.740122
Y          0.528813
Z         -0.589001
new_col   -2.607169
Name: C, dtype: float64

In [23]:
## Accessing a specific place 
df.loc['B', "Y"]

-0.8480769834036315

In [24]:
## Getting subsets from a dataframe
df.loc[['A', 'C'], ['Y', 'Z']]

Unnamed: 0,Y,Z
A,0.907969,0.503826
C,0.528813,-0.589001


In [30]:
df.loc["A"]

W          2.706850
X          0.628133
Y          0.907969
Z          0.503826
new_col    3.210676
Name: A, dtype: float64

In [7]:
df.loc["W"]


KeyError: 'W'