# 10 Minutes to pandas
## Object Creation

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
s = pd.Series([1, 3, 5, np.nan, 6, 8])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [3]:
dates = pd.date_range('20171122', periods=6)

In [4]:
dates

DatetimeIndex(['2017-11-22', '2017-11-23', '2017-11-24', '2017-11-25',
               '2017-11-26', '2017-11-27'],
              dtype='datetime64[ns]', freq='D')

In [5]:
df = pd.DataFrame(np.random.randn(6, 4), 
                  index=dates, 
                  columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
2017-11-22,0.008648,-1.923913,0.242064,1.549159
2017-11-23,-0.086024,0.094185,-1.657332,2.147613
2017-11-24,-1.049741,-0.507868,-1.342154,0.674648
2017-11-25,-0.524597,-1.388045,-2.133403,-0.424023
2017-11-26,1.273039,0.262767,0.894762,-0.271638
2017-11-27,-0.919316,-0.15505,-0.242386,-1.284154


In [6]:
df2 = pd.DataFrame({
    'A' : 1.,
    'B' : pd.Timestamp('20171122'),
    'C' : pd.Series(1, index=list(range(4)), dtype='float32'),
    'D' : np.array([3] * 4, dtype='int32'),
    'E' : pd.Categorical(['test', 'train', 'test', 'train']),
    'F' :'foo'
})
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2017-11-22,1.0,3,test,foo
1,1.0,2017-11-22,1.0,3,train,foo
2,1.0,2017-11-22,1.0,3,test,foo
3,1.0,2017-11-22,1.0,3,train,foo


In [7]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

In [8]:
# df2.<TAB>

# Viewing Data

In [9]:
df.head()

Unnamed: 0,A,B,C,D
2017-11-22,0.008648,-1.923913,0.242064,1.549159
2017-11-23,-0.086024,0.094185,-1.657332,2.147613
2017-11-24,-1.049741,-0.507868,-1.342154,0.674648
2017-11-25,-0.524597,-1.388045,-2.133403,-0.424023
2017-11-26,1.273039,0.262767,0.894762,-0.271638


In [10]:
df.tail(3)

Unnamed: 0,A,B,C,D
2017-11-25,-0.524597,-1.388045,-2.133403,-0.424023
2017-11-26,1.273039,0.262767,0.894762,-0.271638
2017-11-27,-0.919316,-0.15505,-0.242386,-1.284154


In [11]:
df.index

DatetimeIndex(['2017-11-22', '2017-11-23', '2017-11-24', '2017-11-25',
               '2017-11-26', '2017-11-27'],
              dtype='datetime64[ns]', freq='D')

In [12]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [13]:
df.values

array([[ 0.00864802, -1.92391344,  0.24206406,  1.54915869],
       [-0.08602437,  0.09418476, -1.65733177,  2.14761291],
       [-1.04974102, -0.50786797, -1.34215356,  0.67464817],
       [-0.52459735, -1.38804457, -2.13340256, -0.4240235 ],
       [ 1.2730389 ,  0.26276653,  0.89476206, -0.2716382 ],
       [-0.91931603, -0.15504999, -0.24238589, -1.28415354]])

In [14]:
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,-0.216332,-0.602987,-0.706408,0.398601
std,0.844939,0.872518,1.185192,1.29743
min,-1.049741,-1.923913,-2.133403,-1.284154
25%,-0.820636,-1.168,-1.578537,-0.385927
50%,-0.305311,-0.331459,-0.79227,0.201505
75%,-0.01502,0.031876,0.120952,1.330531
max,1.273039,0.262767,0.894762,2.147613


In [15]:
df.T

Unnamed: 0,2017-11-22 00:00:00,2017-11-23 00:00:00,2017-11-24 00:00:00,2017-11-25 00:00:00,2017-11-26 00:00:00,2017-11-27 00:00:00
A,0.008648,-0.086024,-1.049741,-0.524597,1.273039,-0.919316
B,-1.923913,0.094185,-0.507868,-1.388045,0.262767,-0.15505
C,0.242064,-1.657332,-1.342154,-2.133403,0.894762,-0.242386
D,1.549159,2.147613,0.674648,-0.424023,-0.271638,-1.284154


In [16]:
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2017-11-22,1.549159,0.242064,-1.923913,0.008648
2017-11-23,2.147613,-1.657332,0.094185,-0.086024
2017-11-24,0.674648,-1.342154,-0.507868,-1.049741
2017-11-25,-0.424023,-2.133403,-1.388045,-0.524597
2017-11-26,-0.271638,0.894762,0.262767,1.273039
2017-11-27,-1.284154,-0.242386,-0.15505,-0.919316


In [17]:
df.sort_values(by='B')

Unnamed: 0,A,B,C,D
2017-11-22,0.008648,-1.923913,0.242064,1.549159
2017-11-25,-0.524597,-1.388045,-2.133403,-0.424023
2017-11-24,-1.049741,-0.507868,-1.342154,0.674648
2017-11-27,-0.919316,-0.15505,-0.242386,-1.284154
2017-11-23,-0.086024,0.094185,-1.657332,2.147613
2017-11-26,1.273039,0.262767,0.894762,-0.271638


## Selection

In [18]:
df.loc[:]

Unnamed: 0,A,B,C,D
2017-11-22,0.008648,-1.923913,0.242064,1.549159
2017-11-23,-0.086024,0.094185,-1.657332,2.147613
2017-11-24,-1.049741,-0.507868,-1.342154,0.674648
2017-11-25,-0.524597,-1.388045,-2.133403,-0.424023
2017-11-26,1.273039,0.262767,0.894762,-0.271638
2017-11-27,-0.919316,-0.15505,-0.242386,-1.284154


### Getting

In [19]:
df['A']

2017-11-22    0.008648
2017-11-23   -0.086024
2017-11-24   -1.049741
2017-11-25   -0.524597
2017-11-26    1.273039
2017-11-27   -0.919316
Freq: D, Name: A, dtype: float64

In [20]:
df[0:3]

Unnamed: 0,A,B,C,D
2017-11-22,0.008648,-1.923913,0.242064,1.549159
2017-11-23,-0.086024,0.094185,-1.657332,2.147613
2017-11-24,-1.049741,-0.507868,-1.342154,0.674648


In [21]:
df['20171125':'20171128']

Unnamed: 0,A,B,C,D
2017-11-25,-0.524597,-1.388045,-2.133403,-0.424023
2017-11-26,1.273039,0.262767,0.894762,-0.271638
2017-11-27,-0.919316,-0.15505,-0.242386,-1.284154


### Selection by Label

In [22]:
df.loc[dates[0]]

A    0.008648
B   -1.923913
C    0.242064
D    1.549159
Name: 2017-11-22 00:00:00, dtype: float64

In [23]:
df.loc[:,['A', 'B']]

Unnamed: 0,A,B
2017-11-22,0.008648,-1.923913
2017-11-23,-0.086024,0.094185
2017-11-24,-1.049741,-0.507868
2017-11-25,-0.524597,-1.388045
2017-11-26,1.273039,0.262767
2017-11-27,-0.919316,-0.15505


In [24]:
df.loc['20171125':'20171128',['A', 'B']]

Unnamed: 0,A,B
2017-11-25,-0.524597,-1.388045
2017-11-26,1.273039,0.262767
2017-11-27,-0.919316,-0.15505


In [25]:
df.loc['20171125',['A', 'B']]

A   -0.524597
B   -1.388045
Name: 2017-11-25 00:00:00, dtype: float64

In [26]:
df.loc[dates[0],['A', 'B']]

A    0.008648
B   -1.923913
Name: 2017-11-22 00:00:00, dtype: float64

In [27]:
df.at[dates[0],'A']

0.0086480193392844858

### Selection by Position

In [28]:
df.iloc[3]

A   -0.524597
B   -1.388045
C   -2.133403
D   -0.424023
Name: 2017-11-25 00:00:00, dtype: float64

In [29]:
df.iloc[3:5, 0:2]

Unnamed: 0,A,B
2017-11-25,-0.524597,-1.388045
2017-11-26,1.273039,0.262767


In [30]:
df.iloc[[1, 2, 4],[0, 2]]

Unnamed: 0,A,C
2017-11-23,-0.086024,-1.657332
2017-11-24,-1.049741,-1.342154
2017-11-26,1.273039,0.894762


In [31]:
df.iloc[1:3, :]

Unnamed: 0,A,B,C,D
2017-11-23,-0.086024,0.094185,-1.657332,2.147613
2017-11-24,-1.049741,-0.507868,-1.342154,0.674648


In [32]:
df.iloc[:, 1:3]

Unnamed: 0,B,C
2017-11-22,-1.923913,0.242064
2017-11-23,0.094185,-1.657332
2017-11-24,-0.507868,-1.342154
2017-11-25,-1.388045,-2.133403
2017-11-26,0.262767,0.894762
2017-11-27,-0.15505,-0.242386


In [33]:
df.iloc[1, 1]

0.094184764007436944

In [34]:
df.iat[1, 1]

0.094184764007436944

### bloolean Indexing

In [35]:
df[df.A > 0]

Unnamed: 0,A,B,C,D
2017-11-22,0.008648,-1.923913,0.242064,1.549159
2017-11-26,1.273039,0.262767,0.894762,-0.271638


In [36]:
df[df > 0]

Unnamed: 0,A,B,C,D
2017-11-22,0.008648,,0.242064,1.549159
2017-11-23,,0.094185,,2.147613
2017-11-24,,,,0.674648
2017-11-25,,,,
2017-11-26,1.273039,0.262767,0.894762,
2017-11-27,,,,


In [37]:
df2 = df.copy()

In [38]:
df2['E'] = ['one', 'one', 'two', 'three', 'four', 'three']
df2

Unnamed: 0,A,B,C,D,E
2017-11-22,0.008648,-1.923913,0.242064,1.549159,one
2017-11-23,-0.086024,0.094185,-1.657332,2.147613,one
2017-11-24,-1.049741,-0.507868,-1.342154,0.674648,two
2017-11-25,-0.524597,-1.388045,-2.133403,-0.424023,three
2017-11-26,1.273039,0.262767,0.894762,-0.271638,four
2017-11-27,-0.919316,-0.15505,-0.242386,-1.284154,three


In [39]:
df2[df2['E'].isin(['two', 'four'])]

Unnamed: 0,A,B,C,D,E
2017-11-24,-1.049741,-0.507868,-1.342154,0.674648,two
2017-11-26,1.273039,0.262767,0.894762,-0.271638,four


### Setting