# 10 Minutes to pandas

pandas.pydata.org/pandas-docs/stable/10min.html

In [11]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [3]:
# Object Creation
s = pd.Series([1,3,5,np.nan,6,8])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [4]:
dates = pd.date_range('20130101', periods=6)
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [5]:
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
2013-01-01,0.416433,-0.212231,-1.237885,1.20355
2013-01-02,-1.902428,1.557099,-0.256309,-0.951249
2013-01-03,0.276431,0.917327,0.956368,-1.35366
2013-01-04,-1.593908,-2.005513,-0.441866,-2.476274
2013-01-05,0.243131,-1.043257,-1.518646,-2.114263
2013-01-06,0.014535,0.710019,-0.684214,0.817802


In [12]:
# Viewing Data
df.head()

Unnamed: 0,A,B,C,D
2013-01-01,0.416433,-0.212231,-1.237885,1.20355
2013-01-02,-1.902428,1.557099,-0.256309,-0.951249
2013-01-03,0.276431,0.917327,0.956368,-1.35366
2013-01-04,-1.593908,-2.005513,-0.441866,-2.476274
2013-01-05,0.243131,-1.043257,-1.518646,-2.114263


In [13]:
df.tail(3)

Unnamed: 0,A,B,C,D
2013-01-04,-1.593908,-2.005513,-0.441866,-2.476274
2013-01-05,0.243131,-1.043257,-1.518646,-2.114263
2013-01-06,0.014535,0.710019,-0.684214,0.817802


In [14]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [15]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,-0.424301,-0.012759,-0.530425,-0.812349
std,1.038147,1.336017,0.871122,1.516389
min,-1.902428,-2.005513,-1.518646,-2.476274
25%,-1.191797,-0.8355,-1.099467,-1.924112
50%,0.128833,0.248894,-0.56304,-1.152454
75%,0.268106,0.8655,-0.302698,0.375539
max,0.416433,1.557099,0.956368,1.20355


In [16]:
df.T

Unnamed: 0,2013-01-01 00:00:00,2013-01-02 00:00:00,2013-01-03 00:00:00,2013-01-04 00:00:00,2013-01-05 00:00:00,2013-01-06 00:00:00
A,0.416433,-1.902428,0.276431,-1.593908,0.243131,0.014535
B,-0.212231,1.557099,0.917327,-2.005513,-1.043257,0.710019
C,-1.237885,-0.256309,0.956368,-0.441866,-1.518646,-0.684214
D,1.20355,-0.951249,-1.35366,-2.476274,-2.114263,0.817802


In [17]:
df.values

array([[ 0.41643258, -0.21223068, -1.23788513,  1.20355027],
       [-1.90242814,  1.55709853, -0.25630915, -0.95124891],
       [ 0.27643133,  0.91732694,  0.95636792, -1.35365986],
       [-1.59390794, -2.00551326, -0.44186577, -2.47627424],
       [ 0.24313148, -1.04325679, -1.51864631, -2.11426325],
       [ 0.01453512,  0.71001889, -0.68421387,  0.81780224]])

In [22]:
df.sort_index(axis=0, ascending=False)

Unnamed: 0,A,B,C,D
2013-01-06,0.014535,0.710019,-0.684214,0.817802
2013-01-05,0.243131,-1.043257,-1.518646,-2.114263
2013-01-04,-1.593908,-2.005513,-0.441866,-2.476274
2013-01-03,0.276431,0.917327,0.956368,-1.35366
2013-01-02,-1.902428,1.557099,-0.256309,-0.951249
2013-01-01,0.416433,-0.212231,-1.237885,1.20355


In [23]:
df.sort_values(by='B')

Unnamed: 0,A,B,C,D
2013-01-04,-1.593908,-2.005513,-0.441866,-2.476274
2013-01-05,0.243131,-1.043257,-1.518646,-2.114263
2013-01-01,0.416433,-0.212231,-1.237885,1.20355
2013-01-06,0.014535,0.710019,-0.684214,0.817802
2013-01-03,0.276431,0.917327,0.956368,-1.35366
2013-01-02,-1.902428,1.557099,-0.256309,-0.951249


# Selection

In [24]:
# Getting
df['A']

2013-01-01    0.416433
2013-01-02   -1.902428
2013-01-03    0.276431
2013-01-04   -1.593908
2013-01-05    0.243131
2013-01-06    0.014535
Freq: D, Name: A, dtype: float64

In [25]:
df[0:3]

Unnamed: 0,A,B,C,D
2013-01-01,0.416433,-0.212231,-1.237885,1.20355
2013-01-02,-1.902428,1.557099,-0.256309,-0.951249
2013-01-03,0.276431,0.917327,0.956368,-1.35366


In [28]:
# Selection by Label
df.loc[dates[0]]

A    0.416433
B   -0.212231
C   -1.237885
D    1.203550
Name: 2013-01-01 00:00:00, dtype: float64

In [30]:
df.loc[:,['A','B']]

Unnamed: 0,A,B
2013-01-01,0.416433,-0.212231
2013-01-02,-1.902428,1.557099
2013-01-03,0.276431,0.917327
2013-01-04,-1.593908,-2.005513
2013-01-05,0.243131,-1.043257
2013-01-06,0.014535,0.710019


In [32]:
df.loc['20130102':'20130105', ['A', 'B']]

Unnamed: 0,A,B
2013-01-02,-1.902428,1.557099
2013-01-03,0.276431,0.917327
2013-01-04,-1.593908,-2.005513
2013-01-05,0.243131,-1.043257


In [33]:
df.loc['20130101','A']

0.41643257522814037

In [None]:
# Selection by Posision
