# Pandas Snippets
This Notebook is a collection of pandas snippets and tips and tricks that I've learned over the years of heavy use. Hope you find it helpful!

In [2]:
import pandas as pd
import numpy as np

In [3]:
# create a dummy DataFrame
df = pd.DataFrame(np.random.randn(6,4),columns=list('ABCD')) # fake data
df

Unnamed: 0,A,B,C,D
0,-0.168442,-0.112051,-1.612653,0.211259
1,-0.500286,0.793669,-1.056163,-1.435844
2,-0.347455,-0.093979,-0.807137,0.773167
3,-0.701675,1.930133,0.249761,0.401427
4,-1.059418,-1.761782,0.065577,-0.29442
5,0.298216,-1.719261,0.796989,1.303812


In [4]:
# sometimes, when certain data is corrupted or missing, we end up
# with inf. convert those to nans.
df.replace([np.inf, -np.inf], np.nan, inplace=True) # convert inf to nan

In [5]:
df.rename(columns=lambda x: x.strip()) # remove whitespace from column names

Unnamed: 0,A,B,C,D
0,-0.168442,-0.112051,-1.612653,0.211259
1,-0.500286,0.793669,-1.056163,-1.435844
2,-0.347455,-0.093979,-0.807137,0.773167
3,-0.701675,1.930133,0.249761,0.401427
4,-1.059418,-1.761782,0.065577,-0.29442
5,0.298216,-1.719261,0.796989,1.303812


In [9]:
df['A_lag1'] = df['A'].shift() # create lag
df

Unnamed: 0,A,B,C,D,A_lag1
0,-0.168442,-0.112051,-1.612653,0.211259,
1,-0.500286,0.793669,-1.056163,-1.435844,-0.168442
2,-0.347455,-0.093979,-0.807137,0.773167,-0.500286
3,-0.701675,1.930133,0.249761,0.401427,-0.347455
4,-1.059418,-1.761782,0.065577,-0.29442,-0.701675
5,0.298216,-1.719261,0.796989,1.303812,-1.059418


In [None]:
# generate dataframe
np.random.seed(0)
df = pd.DataFrame({'date': [datetime.datetime(2010,1,1)+datetime.timedelta(days=i*15) 
                            for i in range(0,100)],
                   'invested': np.random.random(100)*1e6,
                   'return': np.random.random(100),
                   'side': np.random.choice([-1, 1], 100)})
idx = pd.date_range(start='01-01-2010', end='01-01-2016') # date_range

np.random.seed(0)
N = 10
df = pd.DataFrame(
    {'X':np.random.uniform(-5,5,N),
     'Y':np.random.uniform(-5,5,N),
     'Z':np.random.uniform(-5,5,N),
    })
df

In [None]:
df['year'] = df['date'].dt.year # extract year