In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

##### Create a series

In [5]:
s = pd.Series([1,24,12,43,np.nan,86])
s

0     1.0
1    24.0
2    12.0
3    43.0
4     NaN
5    86.0
dtype: float64

##### Creating a DataFrame

In [8]:
dates = pd.date_range('20170101', periods =6)
dates

DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04',
               '2017-01-05', '2017-01-06'],
              dtype='datetime64[ns]', freq='D')

In [13]:
dataframe = pd.DataFrame(np.random.randn(6,4), index =dates, columns = list('ABCD'))
dataframe

Unnamed: 0,A,B,C,D
2017-01-01,0.118687,0.031187,0.803835,0.220399
2017-01-02,0.136052,0.204893,0.172129,0.117117
2017-01-03,-2.046273,0.690854,-0.507534,-0.392468
2017-01-04,-0.090481,-0.614135,0.556893,-0.170836
2017-01-05,-0.063897,1.543748,0.039912,-1.752209
2017-01-06,-0.332118,-0.370947,0.357127,0.146019


##### Creating a DataFrame using dictionary objects


In [59]:
df2 = pd.DataFrame({'A': list('1324'),
                    'B':pd.Timestamp(20170415),
                    'C':pd.Series(np.random.rand(4)),
                    'D':np.array([3] * 4,dtype='int32')
                   } )
df2

Unnamed: 0,A,B,C,D
0,1,1970-01-01 00:00:00.020170415,0.027402,3
1,3,1970-01-01 00:00:00.020170415,0.667314,3
2,2,1970-01-01 00:00:00.020170415,0.492924,3
3,4,1970-01-01 00:00:00.020170415,0.120578,3


In [30]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float64
D             int32
dtype: object

##### Viewing Data

In [37]:
df2.head(2)


Unnamed: 0,A,B,C,D
0,1.0,1970-01-01 00:00:00.020170415,0.817827,3
1,1.0,1970-01-01 00:00:00.020170415,0.68028,3


In [39]:
df2.tail(1)

Unnamed: 0,A,B,C,D
3,1.0,1970-01-01 00:00:00.020170415,0.183806,3


In [42]:
df2.index

RangeIndex(start=0, stop=4, step=1)

In [45]:
df2.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [48]:
df2.values

array([[1.0, Timestamp('1970-01-01 00:00:00.020170415'),
        0.8979396049752766, 3],
       [1.0, Timestamp('1970-01-01 00:00:00.020170415'),
        0.15185904168991649, 3],
       [1.0, Timestamp('1970-01-01 00:00:00.020170415'),
        0.050986087713825956, 3],
       [1.0, Timestamp('1970-01-01 00:00:00.020170415'),
        0.13446127451244538, 3]], dtype=object)

In [50]:
df2.describe()

Unnamed: 0,A,C,D
count,4.0,4.0,4.0
mean,1.0,0.308812,3.0
std,0.0,0.395212,0.0
min,1.0,0.050986,3.0
25%,1.0,0.113592,3.0
50%,1.0,0.14316,3.0
75%,1.0,0.338379,3.0
max,1.0,0.89794,3.0


In [51]:
df2.T

Unnamed: 0,0,1,2,3
A,1,1,1,1
B,1970-01-01 00:00:00.020170415,1970-01-01 00:00:00.020170415,1970-01-01 00:00:00.020170415,1970-01-01 00:00:00.020170415
C,0.89794,0.151859,0.0509861,0.134461
D,3,3,3,3


In [53]:
?df2.sort_index()


In [64]:
df2.sort_index(axis=1)

Unnamed: 0,A,B,C,D
0,1,1970-01-01 00:00:00.020170415,0.027402,3
1,3,1970-01-01 00:00:00.020170415,0.667314,3
2,2,1970-01-01 00:00:00.020170415,0.492924,3
3,4,1970-01-01 00:00:00.020170415,0.120578,3


In [66]:
df2.sort_values(by='A')

Unnamed: 0,A,B,C,D
0,1,1970-01-01 00:00:00.020170415,0.027402,3
2,2,1970-01-01 00:00:00.020170415,0.492924,3
1,3,1970-01-01 00:00:00.020170415,0.667314,3
3,4,1970-01-01 00:00:00.020170415,0.120578,3


##### Selection

In [67]:
df2['A']

0    1
1    3
2    2
3    4
Name: A, dtype: object

In [68]:
df2.A

0    1
1    3
2    2
3    4
Name: A, dtype: object

In [69]:
df2[0:2]

Unnamed: 0,A,B,C,D
0,1,1970-01-01 00:00:00.020170415,0.027402,3
1,3,1970-01-01 00:00:00.020170415,0.667314,3


##### Selections By Label

In [76]:
dataframe.loc[dates[0]]

A    0.118687
B    0.031187
C    0.803835
D    0.220399
Name: 2017-01-01 00:00:00, dtype: float64

In [77]:
df2.loc[:,['A','B']]

Unnamed: 0,A,B
0,1,1970-01-01 00:00:00.020170415
1,3,1970-01-01 00:00:00.020170415
2,2,1970-01-01 00:00:00.020170415
3,4,1970-01-01 00:00:00.020170415


In [83]:
df2.loc[0:2,['A','B']]

Unnamed: 0,A,B
0,1,1970-01-01 00:00:00.020170415
1,3,1970-01-01 00:00:00.020170415
2,2,1970-01-01 00:00:00.020170415


In [85]:
df2.loc[0,'A']

'1'

In [87]:
df2.at[1,'A']

'3'

##### Selection by Position

In [88]:
df2.iloc[2]

A                                2
B    1970-01-01 00:00:00.020170415
C                         0.492924
D                                3
Name: 2, dtype: object

In [95]:
df2.iloc[0:2,0:2]

Unnamed: 0,A,B
0,1,1970-01-01 00:00:00.020170415
1,3,1970-01-01 00:00:00.020170415


In [99]:
df2.iloc[[0,2,3],0:2]

Unnamed: 0,A,B
0,1,1970-01-01 00:00:00.020170415
2,2,1970-01-01 00:00:00.020170415
3,4,1970-01-01 00:00:00.020170415


##### Boolean Indexing

In [102]:
dataframe[dataframe.A > 0]

Unnamed: 0,A,B,C,D
2017-01-01,0.118687,0.031187,0.803835,0.220399
2017-01-02,0.136052,0.204893,0.172129,0.117117


In [111]:
df = dataframe.copy()
df['E'] = ['one','one','two','three','four','three']
df

Unnamed: 0,A,B,C,D,E
2017-01-01,0.118687,0.031187,0.803835,0.220399,one
2017-01-02,0.136052,0.204893,0.172129,0.117117,one
2017-01-03,-2.046273,0.690854,-0.507534,-0.392468,two
2017-01-04,-0.090481,-0.614135,0.556893,-0.170836,three
2017-01-05,-0.063897,1.543748,0.039912,-1.752209,four
2017-01-06,-0.332118,-0.370947,0.357127,0.146019,three


In [112]:
df[df['E'].isin(['three'])]

Unnamed: 0,A,B,C,D,E
2017-01-04,-0.090481,-0.614135,0.556893,-0.170836,three
2017-01-06,-0.332118,-0.370947,0.357127,0.146019,three


##### Setting

In [114]:
s1 = pd.Series([1,2,3,4,5,6], 
               index = pd.date_range('20170415',periods =6))
s1

2017-04-15    1
2017-04-16    2
2017-04-17    3
2017-04-18    4
2017-04-19    5
2017-04-20    6
Freq: D, dtype: int64

In [115]:
df['F'] = s1