# 10 Minutes to pandas
<http://pandas.pydata.org/pandas-docs/stable/10min.html>

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
s = pd.Series([1, 3, 5, np.nan, 6, 8])


In [3]:
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [4]:
dates = pd.date_range('20130101', periods=6)

In [5]:
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [7]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))

In [8]:
df

Unnamed: 0,A,B,C,D
2013-01-01,-1.649375,-0.61014,0.373953,0.627892
2013-01-02,1.300977,0.651702,-1.991875,0.936231
2013-01-03,-1.159781,0.388314,-0.281565,-0.33217
2013-01-04,-0.600407,-0.50677,2.071598,-0.466982
2013-01-05,0.750102,1.296015,-0.252448,-0.124502
2013-01-06,-1.276042,-1.689749,1.048136,-0.288422


In [10]:
df2 = pd.DataFrame({
    'A': 1.,
    'B': pd.Timestamp('20130102'),
    'C': pd.Series(1, index=list(range(4)), dtype='float32'),
    'D': np.array([3] * 4, dtype='int32'),
    'E': pd.Categorical(["test", "train"] * 2),
    'F': 'foo',
})

In [11]:
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [12]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

In [13]:
df.head()

Unnamed: 0,A,B,C,D
2013-01-01,-1.649375,-0.61014,0.373953,0.627892
2013-01-02,1.300977,0.651702,-1.991875,0.936231
2013-01-03,-1.159781,0.388314,-0.281565,-0.33217
2013-01-04,-0.600407,-0.50677,2.071598,-0.466982
2013-01-05,0.750102,1.296015,-0.252448,-0.124502


In [14]:
df.tail(3)

Unnamed: 0,A,B,C,D
2013-01-04,-0.600407,-0.50677,2.071598,-0.466982
2013-01-05,0.750102,1.296015,-0.252448,-0.124502
2013-01-06,-1.276042,-1.689749,1.048136,-0.288422


In [15]:
df.index

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [16]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [17]:
df.values

array([[-1.64937478, -0.61013992,  0.37395324,  0.62789249],
       [ 1.30097667,  0.65170195, -1.9918754 ,  0.9362309 ],
       [-1.15978106,  0.38831402, -0.2815648 , -0.33216972],
       [-0.60040652, -0.50676953,  2.07159843, -0.46698163],
       [ 0.75010212,  1.29601472, -0.25244816, -0.12450232],
       [-1.27604207, -1.6897489 ,  1.04813648, -0.28842218]])

In [19]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,-0.439088,-0.078438,0.1613,0.058675
std,1.196052,1.067949,1.377274,0.579175
min,-1.649375,-1.689749,-1.991875,-0.466982
25%,-1.246977,-0.584297,-0.274286,-0.321233
50%,-0.880094,-0.059228,0.060753,-0.206462
75%,0.412475,0.585855,0.879591,0.439794
max,1.300977,1.296015,2.071598,0.936231


In [20]:
df.T

Unnamed: 0,2013-01-01 00:00:00,2013-01-02 00:00:00,2013-01-03 00:00:00,2013-01-04 00:00:00,2013-01-05 00:00:00,2013-01-06 00:00:00
A,-1.649375,1.300977,-1.159781,-0.600407,0.750102,-1.276042
B,-0.61014,0.651702,0.388314,-0.50677,1.296015,-1.689749
C,0.373953,-1.991875,-0.281565,2.071598,-0.252448,1.048136
D,0.627892,0.936231,-0.33217,-0.466982,-0.124502,-0.288422


In [21]:
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2013-01-01,0.627892,0.373953,-0.61014,-1.649375
2013-01-02,0.936231,-1.991875,0.651702,1.300977
2013-01-03,-0.33217,-0.281565,0.388314,-1.159781
2013-01-04,-0.466982,2.071598,-0.50677,-0.600407
2013-01-05,-0.124502,-0.252448,1.296015,0.750102
2013-01-06,-0.288422,1.048136,-1.689749,-1.276042


In [22]:
df.sort_values(by='B')

Unnamed: 0,A,B,C,D
2013-01-06,-1.276042,-1.689749,1.048136,-0.288422
2013-01-01,-1.649375,-0.61014,0.373953,0.627892
2013-01-04,-0.600407,-0.50677,2.071598,-0.466982
2013-01-03,-1.159781,0.388314,-0.281565,-0.33217
2013-01-02,1.300977,0.651702,-1.991875,0.936231
2013-01-05,0.750102,1.296015,-0.252448,-0.124502


In [23]:
df['A']

2013-01-01   -1.649375
2013-01-02    1.300977
2013-01-03   -1.159781
2013-01-04   -0.600407
2013-01-05    0.750102
2013-01-06   -1.276042
Freq: D, Name: A, dtype: float64

In [24]:
df[0:3]

Unnamed: 0,A,B,C,D
2013-01-01,-1.649375,-0.61014,0.373953,0.627892
2013-01-02,1.300977,0.651702,-1.991875,0.936231
2013-01-03,-1.159781,0.388314,-0.281565,-0.33217


In [25]:
df['20130102':'20130104']

Unnamed: 0,A,B,C,D
2013-01-02,1.300977,0.651702,-1.991875,0.936231
2013-01-03,-1.159781,0.388314,-0.281565,-0.33217
2013-01-04,-0.600407,-0.50677,2.071598,-0.466982


In [26]:
df.loc[dates[0]]

A   -1.649375
B   -0.610140
C    0.373953
D    0.627892
Name: 2013-01-01 00:00:00, dtype: float64

In [27]:
df.loc[:, ['A', 'B']]

Unnamed: 0,A,B
2013-01-01,-1.649375,-0.61014
2013-01-02,1.300977,0.651702
2013-01-03,-1.159781,0.388314
2013-01-04,-0.600407,-0.50677
2013-01-05,0.750102,1.296015
2013-01-06,-1.276042,-1.689749


In [28]:
df.loc['20130102':'20130104', ['A', 'B']]

Unnamed: 0,A,B
2013-01-02,1.300977,0.651702
2013-01-03,-1.159781,0.388314
2013-01-04,-0.600407,-0.50677


In [29]:
df.loc['20130102', ['A', 'B']]

A    1.300977
B    0.651702
Name: 2013-01-02 00:00:00, dtype: float64

In [30]:
df.loc[dates[0], 'A']

-1.6493747775991099

In [31]:
df.at[dates[0], 'A']

-1.6493747775991099

In [32]:
df.iloc[3]

A   -0.600407
B   -0.506770
C    2.071598
D   -0.466982
Name: 2013-01-04 00:00:00, dtype: float64

In [33]:
df.iloc[3:5, 0:2]

Unnamed: 0,A,B
2013-01-04,-0.600407,-0.50677
2013-01-05,0.750102,1.296015


In [34]:
df.iloc[[1, 2, 4], [0, 2]]

Unnamed: 0,A,C
2013-01-02,1.300977,-1.991875
2013-01-03,-1.159781,-0.281565
2013-01-05,0.750102,-0.252448


In [35]:
df.iloc[1:3, :]

Unnamed: 0,A,B,C,D
2013-01-02,1.300977,0.651702,-1.991875,0.936231
2013-01-03,-1.159781,0.388314,-0.281565,-0.33217


In [36]:
df.iloc[:, 1:3]

Unnamed: 0,B,C
2013-01-01,-0.61014,0.373953
2013-01-02,0.651702,-1.991875
2013-01-03,0.388314,-0.281565
2013-01-04,-0.50677,2.071598
2013-01-05,1.296015,-0.252448
2013-01-06,-1.689749,1.048136


In [37]:
df.iloc[1, 1]

0.65170194819992289

In [38]:
df.iat[1, 1]

0.65170194819992289

In [39]:
df[df.A > 0]

Unnamed: 0,A,B,C,D
2013-01-02,1.300977,0.651702,-1.991875,0.936231
2013-01-05,0.750102,1.296015,-0.252448,-0.124502


In [40]:
df2 = df.copy()
df2['E'] = ['one', 'one','two','three','four','three']
df2

Unnamed: 0,A,B,C,D,E
2013-01-01,-1.649375,-0.61014,0.373953,0.627892,one
2013-01-02,1.300977,0.651702,-1.991875,0.936231,one
2013-01-03,-1.159781,0.388314,-0.281565,-0.33217,two
2013-01-04,-0.600407,-0.50677,2.071598,-0.466982,three
2013-01-05,0.750102,1.296015,-0.252448,-0.124502,four
2013-01-06,-1.276042,-1.689749,1.048136,-0.288422,three


In [42]:
df2[df2.E.isin(['two', 'four'])]

Unnamed: 0,A,B,C,D,E
2013-01-03,-1.159781,0.388314,-0.281565,-0.33217,two
2013-01-05,0.750102,1.296015,-0.252448,-0.124502,four


In [44]:
s1 = pd.Series([1, 2, 3, 4, 5, 6], index=pd.date_range('20130102', periods=6))
s1

2013-01-02    1
2013-01-03    2
2013-01-04    3
2013-01-05    4
2013-01-06    5
2013-01-07    6
Freq: D, dtype: int64

In [50]:
df['F'] = s1
df

Unnamed: 0,A,B,C,D,F
2013-01-01,0.0,-0.61014,0.373953,0.627892,
2013-01-02,1.300977,0.651702,-1.991875,0.936231,1.0
2013-01-03,-1.159781,0.388314,-0.281565,-0.33217,2.0
2013-01-04,-0.600407,-0.50677,2.071598,-0.466982,3.0
2013-01-05,0.750102,1.296015,-0.252448,-0.124502,4.0
2013-01-06,-1.276042,-1.689749,1.048136,-0.288422,5.0


In [52]:
df.at[dates[0], 'A'] = 0
df

Unnamed: 0,A,B,C,D,F
2013-01-01,0.0,-0.61014,0.373953,0.627892,
2013-01-02,1.300977,0.651702,-1.991875,0.936231,1.0
2013-01-03,-1.159781,0.388314,-0.281565,-0.33217,2.0
2013-01-04,-0.600407,-0.50677,2.071598,-0.466982,3.0
2013-01-05,0.750102,1.296015,-0.252448,-0.124502,4.0
2013-01-06,-1.276042,-1.689749,1.048136,-0.288422,5.0


In [53]:
df.iat[0, 1] = 0
df

Unnamed: 0,A,B,C,D,F
2013-01-01,0.0,0.0,0.373953,0.627892,
2013-01-02,1.300977,0.651702,-1.991875,0.936231,1.0
2013-01-03,-1.159781,0.388314,-0.281565,-0.33217,2.0
2013-01-04,-0.600407,-0.50677,2.071598,-0.466982,3.0
2013-01-05,0.750102,1.296015,-0.252448,-0.124502,4.0
2013-01-06,-1.276042,-1.689749,1.048136,-0.288422,5.0


In [55]:
df.loc[:, 'D'] = np.array([5] * len(df))
df

Unnamed: 0,A,B,C,D,F
2013-01-01,0.0,0.0,0.373953,5,
2013-01-02,1.300977,0.651702,-1.991875,5,1.0
2013-01-03,-1.159781,0.388314,-0.281565,5,2.0
2013-01-04,-0.600407,-0.50677,2.071598,5,3.0
2013-01-05,0.750102,1.296015,-0.252448,5,4.0
2013-01-06,-1.276042,-1.689749,1.048136,5,5.0


In [57]:
df2 = df.copy()
df2[df2 > 0] = -df2
df2

Unnamed: 0,A,B,C,D,F
2013-01-01,0.0,0.0,-0.373953,-5,
2013-01-02,-1.300977,-0.651702,-1.991875,-5,-1.0
2013-01-03,-1.159781,-0.388314,-0.281565,-5,-2.0
2013-01-04,-0.600407,-0.50677,-2.071598,-5,-3.0
2013-01-05,-0.750102,-1.296015,-0.252448,-5,-4.0
2013-01-06,-1.276042,-1.689749,-1.048136,-5,-5.0
