In [2]:
import pandas as pd
import numpy as np
from numpy.random import randn

In [3]:
np.random.seed(101) # set a random seed to get always same from random

In [9]:

df = pd.DataFrame(data=randn(5, 4), index=['A', 'B', 'C', 'D', 'E'], columns=['W', 'X', 'Y', 'Z'])
df
# every columns are just pd.Series
# dataframe = bunch of Series that share an index

Unnamed: 0,W,X,Y,Z
A,-1.467514,-0.494095,-0.162535,0.485809
B,0.392489,0.221491,-0.855196,1.54199
C,0.666319,-0.538235,-0.568581,1.407338
D,0.641806,-0.9051,-0.391157,1.028293
E,-1.972605,-0.866885,0.720788,-1.223082


In [11]:
df['W']
# thats a series

A   -1.467514
B    0.392489
C    0.666319
D    0.641806
E   -1.972605
Name: W, dtype: float64

In [12]:
type(df['W'])

pandas.core.series.Series

In [13]:
type(df)

pandas.core.frame.DataFrame

In [15]:
df[['W', 'Z']]
# thats a dataframe because multiple cols

Unnamed: 0,W,Z
A,-1.467514,0.485809
B,0.392489,1.54199
C,0.666319,1.407338
D,0.641806,1.028293
E,-1.972605,-1.223082


In [16]:
type(df[['W', 'Z']])

pandas.core.frame.DataFrame

Adding columns

In [19]:
df['new'] = df['W'] + df['Y']
df

Unnamed: 0,W,X,Y,Z,new
A,-1.467514,-0.494095,-0.162535,0.485809,-1.630049
B,0.392489,0.221491,-0.855196,1.54199,-0.462707
C,0.666319,-0.538235,-0.568581,1.407338,0.097738
D,0.641806,-0.9051,-0.391157,1.028293,0.250649
E,-1.972605,-0.866885,0.720788,-1.223082,-1.251818


Drop Columns

In [21]:
df.drop('new')
# wont work because axis=0 is default which are rows

KeyError: "['new'] not found in axis"

In [22]:
df.drop('new', axis=1)

Unnamed: 0,W,X,Y,Z
A,-1.467514,-0.494095,-0.162535,0.485809
B,0.392489,0.221491,-0.855196,1.54199
C,0.666319,-0.538235,-0.568581,1.407338
D,0.641806,-0.9051,-0.391157,1.028293
E,-1.972605,-0.866885,0.720788,-1.223082


Inplace

In [24]:
df
# 'new' is still here

Unnamed: 0,W,X,Y,Z,new
A,-1.467514,-0.494095,-0.162535,0.485809,-1.630049
B,0.392489,0.221491,-0.855196,1.54199,-0.462707
C,0.666319,-0.538235,-0.568581,1.407338,0.097738
D,0.641806,-0.9051,-0.391157,1.028293,0.250649
E,-1.972605,-0.866885,0.720788,-1.223082,-1.251818


In [25]:
df.drop('new', axis=1, inplace=True)

In [26]:
df

Unnamed: 0,W,X,Y,Z
A,-1.467514,-0.494095,-0.162535,0.485809
B,0.392489,0.221491,-0.855196,1.54199
C,0.666319,-0.538235,-0.568581,1.407338
D,0.641806,-0.9051,-0.391157,1.028293
E,-1.972605,-0.866885,0.720788,-1.223082


Drop Rows

In [27]:
df.drop('E', axis=0)

Unnamed: 0,W,X,Y,Z
A,-1.467514,-0.494095,-0.162535,0.485809
B,0.392489,0.221491,-0.855196,1.54199
C,0.666319,-0.538235,-0.568581,1.407338
D,0.641806,-0.9051,-0.391157,1.028293


In [28]:
df.shape

(5, 4)

Selecting rows

In [30]:
df.loc['A']
# takes label as parameter

W   -1.467514
X   -0.494095
Y   -0.162535
Z    0.485809
Name: A, dtype: float64

In [32]:
df.iloc[0]
# takes index of the row, even if index is not num

W   -1.467514
X   -0.494095
Y   -0.162535
Z    0.485809
Name: A, dtype: float64

Selecting subsets

In [33]:
df.loc['B', 'Y']
# row B, column Y

-0.8551960407780934

In [34]:
df.loc[['A', 'B'], ['W', 'Y']]

Unnamed: 0,W,Y
A,-1.467514,-0.162535
B,0.392489,-0.855196
