In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
s = pd.Series([1, 2, 3, 4, np.nan, 10.0, 100])
s

0      1.0
1      2.0
2      3.0
3      4.0
4      NaN
5     10.0
6    100.0
dtype: float64

In [3]:
pd.Series?

In [4]:
dates = pd.date_range('20170310', periods=100)
dates

DatetimeIndex(['2017-03-10', '2017-03-11', '2017-03-12', '2017-03-13',
               '2017-03-14', '2017-03-15', '2017-03-16', '2017-03-17',
               '2017-03-18', '2017-03-19', '2017-03-20', '2017-03-21',
               '2017-03-22', '2017-03-23', '2017-03-24', '2017-03-25',
               '2017-03-26', '2017-03-27', '2017-03-28', '2017-03-29',
               '2017-03-30', '2017-03-31', '2017-04-01', '2017-04-02',
               '2017-04-03', '2017-04-04', '2017-04-05', '2017-04-06',
               '2017-04-07', '2017-04-08', '2017-04-09', '2017-04-10',
               '2017-04-11', '2017-04-12', '2017-04-13', '2017-04-14',
               '2017-04-15', '2017-04-16', '2017-04-17', '2017-04-18',
               '2017-04-19', '2017-04-20', '2017-04-21', '2017-04-22',
               '2017-04-23', '2017-04-24', '2017-04-25', '2017-04-26',
               '2017-04-27', '2017-04-28', '2017-04-29', '2017-04-30',
               '2017-05-01', '2017-05-02', '2017-05-03', '2017-05-04',
      

In [5]:
dates.size, dates.shape, type(dates), len(dates)

(100, (100,), pandas.tseries.index.DatetimeIndex, 100)

```
Creating a DataFrame by passing a numpy array, with a datetime index and labeled columns:
```

In [6]:
df = pd.DataFrame?

In [7]:
df = pd.DataFrame(np.random.randn(100, 4), index=dates, columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
2017-03-10,-0.178776,0.539217,-0.741046,1.413005
2017-03-11,-0.642461,0.726549,-0.983053,0.022568
2017-03-12,-0.394421,-0.743219,1.250500,0.312310
2017-03-13,-0.479280,-0.811287,0.371920,0.192895
2017-03-14,1.593042,0.974134,0.274854,-2.207133
2017-03-15,-0.950220,1.728647,-0.577922,1.007841
2017-03-16,0.504471,-0.604975,1.249009,-0.329755
2017-03-17,-0.182371,-0.033787,1.369554,-1.129134
2017-03-18,-0.584976,-1.606719,-0.348627,1.644637
2017-03-19,1.190062,-0.686335,-1.288669,0.721466


In [8]:
df.size, df.shape, type(df), len(df)

(400, (100, 4), pandas.core.frame.DataFrame, 100)

```
Creating a DataFrame by passing a dict of objects that can be converted to series-like.
```

In [13]:
df2 = pd.DataFrame({
        'A': 1.,
        'B': pd.Timestamp('20170310'),
        'C': pd.Series(1, index=list(range(4)), dtype='float32'),
        'D': np.array([3] * 4, dtype='int32'),
        'E': pd.Categorical(["test", "train", "python", "javascript", "golang"]),
        'F': 'foo'
})
df2

ValueError: arrays must all be same length

In [14]:
df2 = pd.DataFrame({
        'A': 1.,
        'B': pd.Timestamp('20170310'),
        'C': pd.Series(1, index=list(range(4)), dtype='float32'),
        'D': np.array([3] * 4, dtype='int32'),
        'E': pd.Categorical(["test", "train", "python", "javascript"]),
        'F': 'foo'
})
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2017-03-10,1.0,3,test,foo
1,1.0,2017-03-10,1.0,3,train,foo
2,1.0,2017-03-10,1.0,3,python,foo
3,1.0,2017-03-10,1.0,3,javascript,foo


In [15]:
df2.dtypes?

In [18]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

In [20]:
len(dir(df2))

431

In [22]:
len(dir(pd))

181

In [23]:
len(dir(pd.DataFrame))

425

### Viewing Data

In [24]:
df

Unnamed: 0,A,B,C,D
2017-03-10,-0.178776,0.539217,-0.741046,1.413005
2017-03-11,-0.642461,0.726549,-0.983053,0.022568
2017-03-12,-0.394421,-0.743219,1.250500,0.312310
2017-03-13,-0.479280,-0.811287,0.371920,0.192895
2017-03-14,1.593042,0.974134,0.274854,-2.207133
2017-03-15,-0.950220,1.728647,-0.577922,1.007841
2017-03-16,0.504471,-0.604975,1.249009,-0.329755
2017-03-17,-0.182371,-0.033787,1.369554,-1.129134
2017-03-18,-0.584976,-1.606719,-0.348627,1.644637
2017-03-19,1.190062,-0.686335,-1.288669,0.721466


In [25]:
df.head()

Unnamed: 0,A,B,C,D
2017-03-10,-0.178776,0.539217,-0.741046,1.413005
2017-03-11,-0.642461,0.726549,-0.983053,0.022568
2017-03-12,-0.394421,-0.743219,1.2505,0.31231
2017-03-13,-0.47928,-0.811287,0.37192,0.192895
2017-03-14,1.593042,0.974134,0.274854,-2.207133


In [26]:
df.tail()

Unnamed: 0,A,B,C,D
2017-06-13,-0.5236,1.554992,-0.34287,0.96906
2017-06-14,0.503406,0.067988,0.649567,0.090299
2017-06-15,-0.884793,-0.382621,-0.565265,0.329218
2017-06-16,1.31417,1.562244,1.30336,-0.661457
2017-06-17,-0.465856,0.653827,-1.369861,-1.220598


In [27]:
df.head(10)

Unnamed: 0,A,B,C,D
2017-03-10,-0.178776,0.539217,-0.741046,1.413005
2017-03-11,-0.642461,0.726549,-0.983053,0.022568
2017-03-12,-0.394421,-0.743219,1.2505,0.31231
2017-03-13,-0.47928,-0.811287,0.37192,0.192895
2017-03-14,1.593042,0.974134,0.274854,-2.207133
2017-03-15,-0.95022,1.728647,-0.577922,1.007841
2017-03-16,0.504471,-0.604975,1.249009,-0.329755
2017-03-17,-0.182371,-0.033787,1.369554,-1.129134
2017-03-18,-0.584976,-1.606719,-0.348627,1.644637
2017-03-19,1.190062,-0.686335,-1.288669,0.721466


In [35]:
df.tail(3)

Unnamed: 0,A,B,C,D
2017-06-15,-0.884793,-0.382621,-0.565265,0.329218
2017-06-16,1.31417,1.562244,1.30336,-0.661457
2017-06-17,-0.465856,0.653827,-1.369861,-1.220598


```
Display the index, columns, and the underlying numpy data
```

In [29]:
df.index

DatetimeIndex(['2017-03-10', '2017-03-11', '2017-03-12', '2017-03-13',
               '2017-03-14', '2017-03-15', '2017-03-16', '2017-03-17',
               '2017-03-18', '2017-03-19', '2017-03-20', '2017-03-21',
               '2017-03-22', '2017-03-23', '2017-03-24', '2017-03-25',
               '2017-03-26', '2017-03-27', '2017-03-28', '2017-03-29',
               '2017-03-30', '2017-03-31', '2017-04-01', '2017-04-02',
               '2017-04-03', '2017-04-04', '2017-04-05', '2017-04-06',
               '2017-04-07', '2017-04-08', '2017-04-09', '2017-04-10',
               '2017-04-11', '2017-04-12', '2017-04-13', '2017-04-14',
               '2017-04-15', '2017-04-16', '2017-04-17', '2017-04-18',
               '2017-04-19', '2017-04-20', '2017-04-21', '2017-04-22',
               '2017-04-23', '2017-04-24', '2017-04-25', '2017-04-26',
               '2017-04-27', '2017-04-28', '2017-04-29', '2017-04-30',
               '2017-05-01', '2017-05-02', '2017-05-03', '2017-05-04',
      

In [30]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [34]:
df.values

array([[ -1.78775533e-01,   5.39216661e-01,  -7.41046042e-01,
          1.41300465e+00],
       [ -6.42461270e-01,   7.26548653e-01,  -9.83052938e-01,
          2.25678802e-02],
       [ -3.94421340e-01,  -7.43219012e-01,   1.25050005e+00,
          3.12310427e-01],
       [ -4.79279567e-01,  -8.11287347e-01,   3.71920040e-01,
          1.92895082e-01],
       [  1.59304236e+00,   9.74133810e-01,   2.74854184e-01,
         -2.20713290e+00],
       [ -9.50220088e-01,   1.72864740e+00,  -5.77921613e-01,
          1.00784095e+00],
       [  5.04471240e-01,  -6.04975475e-01,   1.24900908e+00,
         -3.29754877e-01],
       [ -1.82370980e-01,  -3.37874908e-02,   1.36955439e+00,
         -1.12913446e+00],
       [ -5.84976317e-01,  -1.60671867e+00,  -3.48627355e-01,
          1.64463650e+00],
       [  1.19006186e+00,  -6.86334766e-01,  -1.28866947e+00,
          7.21466164e-01],
       [ -2.18155840e+00,   1.88929718e+00,   1.91965190e-01,
         -3.24963589e-01],
       [ -2.14662849e

```
Describe shows a quick statistic summary of your data


```

In [38]:
df.describe()

Unnamed: 0,A,B,C,D
count,100.0,100.0,100.0,100.0
mean,-0.00391,0.19463,0.181343,0.194909
std,0.975611,0.986082,1.038781,1.063164
min,-2.339861,-2.641285,-2.500271,-2.207133
25%,-0.678548,-0.466505,-0.390736,-0.455382
50%,-0.016729,0.173434,0.14715,0.197039
75%,0.785422,0.885596,0.728301,0.966271
max,1.953092,2.741329,2.685029,3.134975


In [39]:
df.T

Unnamed: 0,2017-03-10 00:00:00,2017-03-11 00:00:00,2017-03-12 00:00:00,2017-03-13 00:00:00,2017-03-14 00:00:00,2017-03-15 00:00:00,2017-03-16 00:00:00,2017-03-17 00:00:00,2017-03-18 00:00:00,2017-03-19 00:00:00,...,2017-06-08 00:00:00,2017-06-09 00:00:00,2017-06-10 00:00:00,2017-06-11 00:00:00,2017-06-12 00:00:00,2017-06-13 00:00:00,2017-06-14 00:00:00,2017-06-15 00:00:00,2017-06-16 00:00:00,2017-06-17 00:00:00
A,-0.178776,-0.642461,-0.394421,-0.47928,1.593042,-0.95022,0.504471,-0.182371,-0.584976,1.190062,...,-0.148133,-0.915631,1.016768,1.880632,0.508111,-0.5236,0.503406,-0.884793,1.31417,-0.465856
B,0.539217,0.726549,-0.743219,-0.811287,0.974134,1.728647,-0.604975,-0.033787,-1.606719,-0.686335,...,1.409515,-0.167008,1.437786,0.957549,-1.275609,1.554992,0.067988,-0.382621,1.562244,0.653827
C,-0.741046,-0.983053,1.2505,0.37192,0.274854,-0.577922,1.249009,1.369554,-0.348627,-1.288669,...,-0.042224,0.041316,-1.47926,-0.199838,0.699853,-0.34287,0.649567,-0.565265,1.30336,-1.369861
D,1.413005,0.022568,0.31231,0.192895,-2.207133,1.007841,-0.329755,-1.129134,1.644637,0.721466,...,-1.176891,0.965342,-0.772771,0.317153,-0.194908,0.96906,0.090299,0.329218,-0.661457,-1.220598


In [40]:
(df.T).size, (df.T).shape, type(df.T), len(df.T)

(400, (4, 100), pandas.core.frame.DataFrame, 4)

```
Sorting by an axis
```

In [48]:
df.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2017-03-10,1.413005,-0.741046,0.539217,-0.178776
2017-03-11,0.022568,-0.983053,0.726549,-0.642461
2017-03-12,0.312310,1.250500,-0.743219,-0.394421
2017-03-13,0.192895,0.371920,-0.811287,-0.479280
2017-03-14,-2.207133,0.274854,0.974134,1.593042
2017-03-15,1.007841,-0.577922,1.728647,-0.950220
2017-03-16,-0.329755,1.249009,-0.604975,0.504471
2017-03-17,-1.129134,1.369554,-0.033787,-0.182371
2017-03-18,1.644637,-0.348627,-1.606719,-0.584976
2017-03-19,0.721466,-1.288669,-0.686335,1.190062


In [49]:
df.sort_index(axis=1, ascending=0)

Unnamed: 0,D,C,B,A
2017-03-10,1.413005,-0.741046,0.539217,-0.178776
2017-03-11,0.022568,-0.983053,0.726549,-0.642461
2017-03-12,0.312310,1.250500,-0.743219,-0.394421
2017-03-13,0.192895,0.371920,-0.811287,-0.479280
2017-03-14,-2.207133,0.274854,0.974134,1.593042
2017-03-15,1.007841,-0.577922,1.728647,-0.950220
2017-03-16,-0.329755,1.249009,-0.604975,0.504471
2017-03-17,-1.129134,1.369554,-0.033787,-0.182371
2017-03-18,1.644637,-0.348627,-1.606719,-0.584976
2017-03-19,0.721466,-1.288669,-0.686335,1.190062


In [52]:
df.sort_values(by='B', ascending=0)

Unnamed: 0,A,B,C,D
2017-06-01,0.772414,2.741329,1.109213,-0.040620
2017-05-22,-2.339861,2.040201,-0.641177,1.459509
2017-03-20,-2.181558,1.889297,0.191965,-0.324964
2017-03-15,-0.950220,1.728647,-0.577922,1.007841
2017-05-04,-0.835951,1.648181,0.026968,-0.445551
2017-03-24,0.542630,1.612486,1.084542,2.573362
2017-06-16,1.314170,1.562244,1.303360,-0.661457
2017-06-13,-0.523600,1.554992,-0.342870,0.969060
2017-04-05,0.002784,1.527573,0.632101,0.813024
2017-06-10,1.016768,1.437786,-1.479260,-0.772771


### Selection

##### Getting

In [53]:
df.A

2017-03-10   -0.178776
2017-03-11   -0.642461
2017-03-12   -0.394421
2017-03-13   -0.479280
2017-03-14    1.593042
2017-03-15   -0.950220
2017-03-16    0.504471
2017-03-17   -0.182371
2017-03-18   -0.584976
2017-03-19    1.190062
2017-03-20   -2.181558
2017-03-21   -2.146628
2017-03-22   -0.596098
2017-03-23    0.880897
2017-03-24    0.542630
2017-03-25   -0.540684
2017-03-26    0.540082
2017-03-27   -0.231066
2017-03-28    0.582931
2017-03-29    0.026827
2017-03-30   -0.553481
2017-03-31    1.953092
2017-04-01   -0.786808
2017-04-02    0.426848
2017-04-03    0.173966
2017-04-04    0.991944
2017-04-05    0.002784
2017-04-06   -0.348132
2017-04-07    0.324946
2017-04-08    0.607592
                ...   
2017-05-19   -1.029144
2017-05-20    1.623873
2017-05-21   -0.355646
2017-05-22   -2.339861
2017-05-23   -0.279068
2017-05-24    1.094591
2017-05-25    0.593120
2017-05-26    1.172580
2017-05-27   -1.076265
2017-05-28   -1.391650
2017-05-29    0.668351
2017-05-30    0.395204
2017-05-31 

In [54]:
len(df.A)

100

In [56]:
# df["A"]

In [57]:
len(df["A"])

100

In [58]:
df[0:3]

Unnamed: 0,A,B,C,D
2017-03-10,-0.178776,0.539217,-0.741046,1.413005
2017-03-11,-0.642461,0.726549,-0.983053,0.022568
2017-03-12,-0.394421,-0.743219,1.2505,0.31231


In [59]:
df['20170310':'20170312']

Unnamed: 0,A,B,C,D
2017-03-10,-0.178776,0.539217,-0.741046,1.413005
2017-03-11,-0.642461,0.726549,-0.983053,0.022568
2017-03-12,-0.394421,-0.743219,1.2505,0.31231


##### Selection by Label

In [60]:
dates[0]

Timestamp('2017-03-10 00:00:00', freq='D')

In [63]:
df.loc[dates[0]]

A   -0.178776
B    0.539217
C   -0.741046
D    1.413005
Name: 2017-03-10 00:00:00, dtype: float64

In [64]:
df.loc[:, ['A', 'B']]

Unnamed: 0,A,B
2017-03-10,-0.178776,0.539217
2017-03-11,-0.642461,0.726549
2017-03-12,-0.394421,-0.743219
2017-03-13,-0.479280,-0.811287
2017-03-14,1.593042,0.974134
2017-03-15,-0.950220,1.728647
2017-03-16,0.504471,-0.604975
2017-03-17,-0.182371,-0.033787
2017-03-18,-0.584976,-1.606719
2017-03-19,1.190062,-0.686335


In [70]:
df.loc[:, ['A', 'B']]

Unnamed: 0,A,B
2017-03-10,-0.178776,0.539217
2017-03-11,-0.642461,0.726549
2017-03-12,-0.394421,-0.743219
2017-03-13,-0.479280,-0.811287
2017-03-14,1.593042,0.974134
2017-03-15,-0.950220,1.728647
2017-03-16,0.504471,-0.604975
2017-03-17,-0.182371,-0.033787
2017-03-18,-0.584976,-1.606719
2017-03-19,1.190062,-0.686335


In [72]:
df[0:3].loc[:, ['A', 'B']] # slicing first --> label slicing

Unnamed: 0,A,B
2017-03-10,-0.178776,0.539217
2017-03-11,-0.642461,0.726549
2017-03-12,-0.394421,-0.743219


```
Label Slicing
```

In [73]:
df.loc['20170310':'20170312', ['A', 'B']]

Unnamed: 0,A,B
2017-03-10,-0.178776,0.539217
2017-03-11,-0.642461,0.726549
2017-03-12,-0.394421,-0.743219


```
Reduction in the dimensions of the returned object
```

In [74]:
df.loc['20170310', ['A', 'B']]

A   -0.178776
B    0.539217
Name: 2017-03-10 00:00:00, dtype: float64

In [79]:
# selecting a label
dates[0]

Timestamp('2017-03-10 00:00:00', freq='D')

```
For getting a scalar value
```

In [77]:
df.loc[dates[0], ['A', 'B']]

A   -0.178776
B    0.539217
Name: 2017-03-10 00:00:00, dtype: float64

In [78]:
df.loc[dates[0], 'A']

-0.17877553329789397

```
For getting fast access to a scalar (equiv to the prior method)
```

In [80]:
df.at?

In [81]:
# Fast label-based scalar accessor
df.at[dates[0], 'A']

-0.17877553329789397

In [82]:
%timeit df.loc[dates[0], 'A']

The slowest run took 4.11 times longer than the fastest. This could mean that an intermediate result is being cached.
1000 loops, best of 3: 179 µs per loop


In [83]:
%timeit df.at[dates[0], 'A']

The slowest run took 17.76 times longer than the fastest. This could mean that an intermediate result is being cached.
100000 loops, best of 3: 11.9 µs per loop


```
__Selection by Position__
```

```
Select via the position of the passed integers
```

In [94]:
df.iloc[3] # return a label have index == 3

A   -0.479280
B   -0.811287
C    0.371920
D    0.192895
Name: 2017-03-13 00:00:00, dtype: float64

```
integer slices, similar to numpy/python
```

In [90]:
df.iloc[3:5, 0:2] # can use sciling in iloc, not loc, similar to numpy, python style

Unnamed: 0,A,B
2017-03-13,-0.47928,-0.811287
2017-03-14,1.593042,0.974134


In [92]:
df.iloc[3:5, 0:3] 

Unnamed: 0,A,B,C
2017-03-13,-0.47928,-0.811287,0.37192
2017-03-14,1.593042,0.974134,0.274854


In [93]:
df.iloc[3:6, 0:2] 

Unnamed: 0,A,B
2017-03-13,-0.47928,-0.811287
2017-03-14,1.593042,0.974134
2017-03-15,-0.95022,1.728647


```
lists of integer position locations, similar to the numpy/python style
```

In [95]:
df.iloc[[1, 2, 4], [0, 2]]

Unnamed: 0,A,C
2017-03-11,-0.642461,-0.983053
2017-03-12,-0.394421,1.2505
2017-03-14,1.593042,0.274854


```
slicing rows explicitly + slicing columns explicitly
```

In [96]:
df.iloc[1:3, :]

Unnamed: 0,A,B,C,D
2017-03-11,-0.642461,0.726549,-0.983053,0.022568
2017-03-12,-0.394421,-0.743219,1.2505,0.31231


In [97]:
df.iloc[0:4, :]

Unnamed: 0,A,B,C,D
2017-03-10,-0.178776,0.539217,-0.741046,1.413005
2017-03-11,-0.642461,0.726549,-0.983053,0.022568
2017-03-12,-0.394421,-0.743219,1.2505,0.31231
2017-03-13,-0.47928,-0.811287,0.37192,0.192895


In [98]:
df.iloc[0:4, 1:3]

Unnamed: 0,B,C
2017-03-10,0.539217,-0.741046
2017-03-11,0.726549,-0.983053
2017-03-12,-0.743219,1.2505
2017-03-13,-0.811287,0.37192


```
getting a value explicitly
```

In [99]:
df.iloc[1, 1]

0.72654865288896731

In [100]:
df.iloc[[1], [1]]

Unnamed: 0,B
2017-03-11,0.726549


In [101]:
df.loc['20170311', 'B']

0.72654865288896731

```
 getting fast access to a scalar (equiv to the prior method)
```

In [102]:
df.iat[1, 1]

0.72654865288896731

In [103]:
%timeit df.iloc[1, 1]

10000 loops, best of 3: 132 µs per loop


In [104]:
%timeit df.iat[1, 1]

The slowest run took 13.13 times longer than the fastest. This could mean that an intermediate result is being cached.
100000 loops, best of 3: 5.71 µs per loop


```
Boolean Indexing
```

```
Using a single column’s values to select data.
```

In [106]:
len(df.A)

100

In [107]:
len(df[df.A > 0])

50

```
A where operation for getting.
```

In [108]:
df

Unnamed: 0,A,B,C,D
2017-03-10,-0.178776,0.539217,-0.741046,1.413005
2017-03-11,-0.642461,0.726549,-0.983053,0.022568
2017-03-12,-0.394421,-0.743219,1.250500,0.312310
2017-03-13,-0.479280,-0.811287,0.371920,0.192895
2017-03-14,1.593042,0.974134,0.274854,-2.207133
2017-03-15,-0.950220,1.728647,-0.577922,1.007841
2017-03-16,0.504471,-0.604975,1.249009,-0.329755
2017-03-17,-0.182371,-0.033787,1.369554,-1.129134
2017-03-18,-0.584976,-1.606719,-0.348627,1.644637
2017-03-19,1.190062,-0.686335,-1.288669,0.721466


In [109]:
df[df > 0]

Unnamed: 0,A,B,C,D
2017-03-10,,0.539217,,1.413005
2017-03-11,,0.726549,,0.022568
2017-03-12,,,1.250500,0.312310
2017-03-13,,,0.371920,0.192895
2017-03-14,1.593042,0.974134,0.274854,
2017-03-15,,1.728647,,1.007841
2017-03-16,0.504471,,1.249009,
2017-03-17,,,1.369554,
2017-03-18,,,,1.644637
2017-03-19,1.190062,,,0.721466


```
Using the isin() method for filtering:
```

In [110]:
df2 = df.copy()

In [115]:
df2['E'] = np.arange(100) # because df contain 100 labels

In [116]:
df2

Unnamed: 0,A,B,C,D,E
2017-03-10,-0.178776,0.539217,-0.741046,1.413005,0
2017-03-11,-0.642461,0.726549,-0.983053,0.022568,1
2017-03-12,-0.394421,-0.743219,1.250500,0.312310,2
2017-03-13,-0.479280,-0.811287,0.371920,0.192895,3
2017-03-14,1.593042,0.974134,0.274854,-2.207133,4
2017-03-15,-0.950220,1.728647,-0.577922,1.007841,5
2017-03-16,0.504471,-0.604975,1.249009,-0.329755,6
2017-03-17,-0.182371,-0.033787,1.369554,-1.129134,7
2017-03-18,-0.584976,-1.606719,-0.348627,1.644637,8
2017-03-19,1.190062,-0.686335,-1.288669,0.721466,9


In [118]:
df # not view

Unnamed: 0,A,B,C,D
2017-03-10,-0.178776,0.539217,-0.741046,1.413005
2017-03-11,-0.642461,0.726549,-0.983053,0.022568
2017-03-12,-0.394421,-0.743219,1.250500,0.312310
2017-03-13,-0.479280,-0.811287,0.371920,0.192895
2017-03-14,1.593042,0.974134,0.274854,-2.207133
2017-03-15,-0.950220,1.728647,-0.577922,1.007841
2017-03-16,0.504471,-0.604975,1.249009,-0.329755
2017-03-17,-0.182371,-0.033787,1.369554,-1.129134
2017-03-18,-0.584976,-1.606719,-0.348627,1.644637
2017-03-19,1.190062,-0.686335,-1.288669,0.721466


In [120]:
df2[df2['E'].isin([0, 2, 4, 99])]

Unnamed: 0,A,B,C,D,E
2017-03-10,-0.178776,0.539217,-0.741046,1.413005,0
2017-03-12,-0.394421,-0.743219,1.2505,0.31231,2
2017-03-14,1.593042,0.974134,0.274854,-2.207133,4
2017-06-17,-0.465856,0.653827,-1.369861,-1.220598,99


##### Setting

```
Setting a new column automatically aligns the data by the indexes
```

In [123]:
s1 = pd.Series(np.arange(100), index=pd.date_range('20170310', periods=100))
s1

2017-03-10     0
2017-03-11     1
2017-03-12     2
2017-03-13     3
2017-03-14     4
2017-03-15     5
2017-03-16     6
2017-03-17     7
2017-03-18     8
2017-03-19     9
2017-03-20    10
2017-03-21    11
2017-03-22    12
2017-03-23    13
2017-03-24    14
2017-03-25    15
2017-03-26    16
2017-03-27    17
2017-03-28    18
2017-03-29    19
2017-03-30    20
2017-03-31    21
2017-04-01    22
2017-04-02    23
2017-04-03    24
2017-04-04    25
2017-04-05    26
2017-04-06    27
2017-04-07    28
2017-04-08    29
              ..
2017-05-19    70
2017-05-20    71
2017-05-21    72
2017-05-22    73
2017-05-23    74
2017-05-24    75
2017-05-25    76
2017-05-26    77
2017-05-27    78
2017-05-28    79
2017-05-29    80
2017-05-30    81
2017-05-31    82
2017-06-01    83
2017-06-02    84
2017-06-03    85
2017-06-04    86
2017-06-05    87
2017-06-06    88
2017-06-07    89
2017-06-08    90
2017-06-09    91
2017-06-10    92
2017-06-11    93
2017-06-12    94
2017-06-13    95
2017-06-14    96
2017-06-15    

In [124]:
df['F'] = s1

In [125]:
df

Unnamed: 0,A,B,C,D,F
2017-03-10,-0.178776,0.539217,-0.741046,1.413005,0
2017-03-11,-0.642461,0.726549,-0.983053,0.022568,1
2017-03-12,-0.394421,-0.743219,1.250500,0.312310,2
2017-03-13,-0.479280,-0.811287,0.371920,0.192895,3
2017-03-14,1.593042,0.974134,0.274854,-2.207133,4
2017-03-15,-0.950220,1.728647,-0.577922,1.007841,5
2017-03-16,0.504471,-0.604975,1.249009,-0.329755,6
2017-03-17,-0.182371,-0.033787,1.369554,-1.129134,7
2017-03-18,-0.584976,-1.606719,-0.348627,1.644637,8
2017-03-19,1.190062,-0.686335,-1.288669,0.721466,9


```
Setting values by label
```

In [126]:
df.at[dates[0], 'A'] = 0

In [127]:
df.loc['20170310', 'A']

0.0

In [132]:
df.at[dates[0], 'A']

0.0

```
Setting values by position


```

In [133]:
df.iat[0, 1] = 0

In [134]:
df.loc['20170310', 'B']

0.0

In [136]:
df.at[dates[0], 'B']

0.0

```
Setting by assigning with a numpy array
```

In [137]:
df.loc[:, 'D'] = np.array([5] * len(df))

In [138]:
df

Unnamed: 0,A,B,C,D,F
2017-03-10,0.000000,0.000000,-0.741046,5,0
2017-03-11,-0.642461,0.726549,-0.983053,5,1
2017-03-12,-0.394421,-0.743219,1.250500,5,2
2017-03-13,-0.479280,-0.811287,0.371920,5,3
2017-03-14,1.593042,0.974134,0.274854,5,4
2017-03-15,-0.950220,1.728647,-0.577922,5,5
2017-03-16,0.504471,-0.604975,1.249009,5,6
2017-03-17,-0.182371,-0.033787,1.369554,5,7
2017-03-18,-0.584976,-1.606719,-0.348627,5,8
2017-03-19,1.190062,-0.686335,-1.288669,5,9


```
A where operation with setting.
```

In [139]:
df2 = df.copy()

In [140]:
df2

Unnamed: 0,A,B,C,D,F
2017-03-10,0.000000,0.000000,-0.741046,5,0
2017-03-11,-0.642461,0.726549,-0.983053,5,1
2017-03-12,-0.394421,-0.743219,1.250500,5,2
2017-03-13,-0.479280,-0.811287,0.371920,5,3
2017-03-14,1.593042,0.974134,0.274854,5,4
2017-03-15,-0.950220,1.728647,-0.577922,5,5
2017-03-16,0.504471,-0.604975,1.249009,5,6
2017-03-17,-0.182371,-0.033787,1.369554,5,7
2017-03-18,-0.584976,-1.606719,-0.348627,5,8
2017-03-19,1.190062,-0.686335,-1.288669,5,9


In [141]:
df2[df2 > 0] = -df2
df2

Unnamed: 0,A,B,C,D,F
2017-03-10,0.000000,0.000000,-0.741046,-5,0
2017-03-11,-0.642461,-0.726549,-0.983053,-5,-1
2017-03-12,-0.394421,-0.743219,-1.250500,-5,-2
2017-03-13,-0.479280,-0.811287,-0.371920,-5,-3
2017-03-14,-1.593042,-0.974134,-0.274854,-5,-4
2017-03-15,-0.950220,-1.728647,-0.577922,-5,-5
2017-03-16,-0.504471,-0.604975,-1.249009,-5,-6
2017-03-17,-0.182371,-0.033787,-1.369554,-5,-7
2017-03-18,-0.584976,-1.606719,-0.348627,-5,-8
2017-03-19,-1.190062,-0.686335,-1.288669,-5,-9


### Missing Data