In [1]:
import pandas as pd

## Assign New Columns to a DataFame 

In [2]:
df = pd.DataFrame({'col1': [1,2, 3, 4, 5], 'col2': [3, 4, 5, 6, 7]})
df 

Unnamed: 0,col1,col2
0,1,3
1,2,4
2,3,5
3,4,6
4,5,7


In [3]:
df = (df.assign(col3=lambda data: data.col1 * 100 + data.col2)
     .assign(col4=lambda data: data.col2 + data.col3))
df

Unnamed: 0,col1,col2,col3,col4
0,1,3,103,106
1,2,4,204,208
2,3,5,305,310
3,4,6,406,412
4,5,7,507,514


## Exclude the Outliers in a Column

In [4]:
data = {'col0': [9, -3, 0, -1, 5]}
df = pd.DataFrame(data)
df

Unnamed: 0,col0
0,9
1,-3
2,0
3,-1
4,5


In [5]:
lower = df.col0.quantile(.05)
upper = df.col0.quantile(.95)

In [6]:
df = df.clip(lower=lower, upper=upper)
df 

Unnamed: 0,col0
0,8.2
1,-2.6
2,0.0
3,-1.0
4,5.0


## Select or Drop All Columns that Start with X

In [7]:
df = pd.DataFrame({'col1': [1,2,3, 4, 5], 
                   'col2': [4,5,6, 6, 7], 
                   'year': [2018, 2019, 2020, 2020, 2020]})
df 

Unnamed: 0,col1,col2,year
0,1,4,2018
1,2,5,2019
2,3,6,2020
3,4,6,2020
4,5,7,2020


In [8]:
chosen_cols = df.columns.str.startswith('col')
chosen_cols

array([ True,  True, False])

## Select All Columns that Start with X

In [9]:
filtered_df = df.loc[:, chosen_cols]
filtered_df

Unnamed: 0,col1,col2
0,1,4
1,2,5
2,3,6
3,4,6
4,5,7


## Drop All Columns that Start with X

In [10]:
filtered_df = df.loc[:, ~chosen_cols]
filtered_df

Unnamed: 0,year
0,2018
1,2019
2,2020
3,2020
4,2020


## Filter Rows only if Column Contains Values from another List

In [11]:
df = pd.DataFrame({'col1': [1, 2, 3, 4, 5], 'col2': ['a', 'b', 'c', 'd', 'e']})

In [12]:
l = ['a', 'c', 'd', 'e']

In [13]:
df.col2.isin(l)

0     True
1    False
2     True
3     True
4     True
Name: col2, dtype: bool

In [14]:
df = df[df.col2.isin(l)]
df

Unnamed: 0,col1,col2
0,1,a
2,3,c
3,4,d
4,5,e
