In [3]:
import pandas as pd
import numpy as np

In [2]:
df = pd.DataFrame(np.array(np.random.randn(6, 4)*10,dtype='int64'), columns=list('ABCD'))
print("created dataframe:\n{}\n shape: {}".format(df, df.shape))

created dataframe:
    A   B   C   D
0  -7   3   4   5
1  -9 -11 -20   9
2  20  15  -2  -3
3  10   2 -13  23
4   9  -6   8  -6
5   2  11   3   2
 shape: (6, 4)


In [3]:
# slicing dataframe
df.iloc[2:5,1:3]

Unnamed: 0,B,C
2,15,-2
3,2,-13
4,-6,8


In [4]:
df1 = pd.DataFrame(np.array([np.nan for _ in range(25)]).reshape(5,5))
df1.loc[2] = np.random.choice(range(1,70),5)
df1

Unnamed: 0,0,1,2,3,4
0,,,,,
1,,,,,
2,5.0,27.0,32.0,37.0,26.0
3,,,,,
4,,,,,


In [5]:
df1.dropna()

Unnamed: 0,0,1,2,3,4
2,5.0,27.0,32.0,37.0,26.0


**10 Minutes to Pandas:**
- NumPy arrays have one dtype for the entire array, while pandas DataFrames have one dtype per column. 


In [13]:
s = pd.Series(np.random.choice(range(1,70),5), index=list('ABCDE'))
s

A    42
B    17
C    59
D    39
E    53
dtype: int64

In [14]:
dates = pd.date_range('20130101', periods=6)
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [21]:
df2 = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list('ABCD'))

print(df2,df2.dtypes,df2.index,df2.columns,sep='\n\n')

                   A         B         C         D
2013-01-01 -0.063156 -0.616466  2.075197  0.357861
2013-01-02 -0.094448  1.915172 -0.848282  0.002974
2013-01-03  0.204898 -0.661232  1.625689  1.064422
2013-01-04 -0.257371  2.111292 -0.987040  0.110638
2013-01-05  1.034717  0.004153 -1.355525  0.759535
2013-01-06  1.301206 -0.171885  0.511670  0.922483

A    float64
B    float64
C    float64
D    float64
dtype: object

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

Index(['A', 'B', 'C', 'D'], dtype='object')


In [22]:
df2.to_numpy()

array([[-0.06315649, -0.6164662 ,  2.07519723,  0.3578612 ],
       [-0.09444774,  1.91517239, -0.84828238,  0.00297439],
       [ 0.204898  , -0.66123211,  1.62568942,  1.06442157],
       [-0.25737069,  2.11129203, -0.9870402 ,  0.1106376 ],
       [ 1.0347166 ,  0.00415335, -1.35552507,  0.75953544],
       [ 1.30120552, -0.17188536,  0.51167   ,  0.92248306]])

In [23]:
df2.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.354308,0.430172,0.170285,0.536319
std,0.652948,1.253951,1.453851,0.441667
min,-0.257371,-0.661232,-1.355525,0.002974
25%,-0.086625,-0.505321,-0.952351,0.172443
50%,0.070871,-0.083866,-0.168306,0.558698
75%,0.827262,1.437418,1.347185,0.881746
max,1.301206,2.111292,2.075197,1.064422


In [25]:
# transpose
df2.T

Unnamed: 0,2013-01-01,2013-01-02,2013-01-03,2013-01-04,2013-01-05,2013-01-06
A,-0.063156,-0.094448,0.204898,-0.257371,1.034717,1.301206
B,-0.616466,1.915172,-0.661232,2.111292,0.004153,-0.171885
C,2.075197,-0.848282,1.625689,-0.98704,-1.355525,0.51167
D,0.357861,0.002974,1.064422,0.110638,0.759535,0.922483


In [31]:
df2.sort_values(by='A', ascending=False)

Unnamed: 0,A,B,C,D
2013-01-06,1.301206,-0.171885,0.51167,0.922483
2013-01-05,1.034717,0.004153,-1.355525,0.759535
2013-01-03,0.204898,-0.661232,1.625689,1.064422
2013-01-01,-0.063156,-0.616466,2.075197,0.357861
2013-01-02,-0.094448,1.915172,-0.848282,0.002974
2013-01-04,-0.257371,2.111292,-0.98704,0.110638


In [41]:
df2.loc[:,['A','B']]

Unnamed: 0,A,B
2013-01-01,-0.063156,-0.616466
2013-01-02,-0.094448,1.915172
2013-01-03,0.204898,-0.661232
2013-01-04,-0.257371,2.111292
2013-01-05,1.034717,0.004153
2013-01-06,1.301206,-0.171885


In [42]:
df2.at[dates[0],'A']

-0.06315649491072446

### Boolean indexing


In [45]:
df2[df2[]>0]

Unnamed: 0,A,B,C,D
2013-01-01,,,2.075197,0.357861
2013-01-02,,1.915172,,0.002974
2013-01-03,0.204898,,1.625689,1.064422
2013-01-04,,2.111292,,0.110638
2013-01-05,1.034717,0.004153,,0.759535
2013-01-06,1.301206,,0.51167,0.922483
