In [1]:
import pandas as pd
import numpy as np

from datetime import datetime, date

pd.set_option('display.notebook_repr_html', False)
pd.set_option('display.max_columns', 8)
pd.set_option('display.max_rows', 10)
pd.set_option('display.width', 80)

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
np.random.seed(123456)

df = pd.DataFrame(np.random.randint(0, 10, size=(5, 4)),
                  columns=['A', 'B', 'C', 'D'])

df

   A  B  C  D
0  1  2  1  8
1  0  7  4  8
2  4  2  6  6
3  7  2  6  2
4  4  4  7  4

In [3]:
df * 2

    A   B   C   D
0   2   4   2  16
1   0  14   8  16
2   8   4  12  12
3  14   4  12   4
4   8   8  14   8

In [4]:
s = df.iloc[0]

diff = df - s
diff

   A  B  C  D
0  0  0  0  0
1 -1  5  3  0
2  3  0  5 -2
3  6  0  5 -6
4  3  2  6 -4

In [5]:
s2 = s[1:3]
s2['E'] = 0
df + s2

    A    B    C   D   E
0 NaN  4.0  2.0 NaN NaN
1 NaN  9.0  5.0 NaN NaN
2 NaN  4.0  7.0 NaN NaN
3 NaN  4.0  7.0 NaN NaN
4 NaN  6.0  8.0 NaN NaN

In [6]:
subframe = df[1:4][['B', 'C']]

df - subframe

    A    B    C   D
0 NaN  NaN  NaN NaN
1 NaN  0.0  0.0 NaN
2 NaN  0.0  0.0 NaN
3 NaN  0.0  0.0 NaN
4 NaN  NaN  NaN NaN

In [7]:
df

   A  B  C  D
0  1  2  1  8
1  0  7  4  8
2  4  2  6  6
3  7  2  6  2
4  4  4  7  4

In [8]:
a_col = df['A']
df.sub(a_col, axis=0)

   A  B  C  D
0  0  1  0  7
1  0  7  4  8
2  0 -2  2  2
3  0 -5 -1 -5
4  0  0  3  0

In [9]:
s = pd.Series(['a', 'b', 'c', np.NaN])

print(s)
s.count()

0      a
1      b
2      c
3    NaN
dtype: object


3

In [10]:
s.unique()

array(['a', 'b', 'c', nan], dtype=object)

In [11]:
s.nunique()

3

In [12]:
s.nunique(dropna=False)

4

In [13]:
s.value_counts(dropna=False)

a      1
b      1
c      1
NaN    1
dtype: int64

In [14]:
df_sp500 = pd.read_csv('sp500.csv', index_col='Symbol', usecols=[0, 2, 3, 7])

df_omh = pd.read_csv('omh.csv')

In [15]:
df_omh[['MSFT', 'AAPL']].min()

MSFT     45.16
AAPL    106.75
dtype: float64

In [16]:
df_omh[['MSFT', 'AAPL']].max()

MSFT     48.84
AAPL    115.93
dtype: float64

In [17]:
df_omh[['MSFT', 'AAPL']].idxmin()

MSFT    11
AAPL    11
dtype: int64

In [18]:
df_omh.nsmallest(4, ['MSFT'])['MSFT']

11    45.16
12    45.74
21    46.45
10    46.67
Name: MSFT, dtype: float64

In [19]:
df_omh.nlargest(4, ['MSFT'])['MSFT']

3     48.84
0     48.62
1     48.46
16    48.45
Name: MSFT, dtype: float64

In [20]:
pd.Series([1, 2, 3, 4]).cumprod()

0     1
1     2
2     6
3    24
dtype: int64

In [21]:
pd.Series([1, 2, 3, 4]).cumsum()

0     1
1     3
2     6
3    10
dtype: int64

In [27]:
df_omh.describe()

            MSFT        AAPL
count  22.000000   22.000000
mean   47.493182  112.411364
std     0.933077    2.388772
min    45.160000  106.750000
25%    46.967500  111.660000
50%    47.625000  112.530000
75%    48.125000  114.087500
max    48.840000  115.930000

In [29]:
s = pd.Series(['a', 'b', 'c', 'a', np.NaN])

s.describe()

count     4
unique    3
top       a
freq      2
dtype: object

In [35]:
df_omh.head()

         Date   MSFT    AAPL
0  2014-12-01  48.62  115.07
1  2014-12-02  48.46  114.63
2  2014-12-03  48.08  115.93
3  2014-12-04  48.84  115.49
4  2014-12-05  48.42  115.00

In [39]:
df_omh[['MSFT', 'AAPL']].mean()

MSFT     47.493182
AAPL    112.411364
dtype: float64

In [41]:
df_omh[['MSFT', 'AAPL']].median()

MSFT     47.625
AAPL    112.530
dtype: float64

In [44]:
s = pd.Series([1, 2, 3, 4, 5])

s.mode()

0    1
1    2
2    3
3    4
4    5
dtype: int64

In [47]:
df_omh[['MSFT', 'AAPL']].var()

MSFT    0.870632
AAPL    5.706231
dtype: float64