(c) 2016 - present. Enplus Advisors, Inc.

In [1]:
import io

import numpy as np
import pandas as pd

pd.set_option('precision', 2)

In [2]:
prc = pd.read_csv(
    io.StringIO('ticker,open,date,close\nAAPL,426.23,2018-01-04,435.23\nMSFT,42.3,2018-01-04,51.3\nAAPL,436.23,2018-01-05,\nMSFT,52.3,2018-01-05,\n'),
    parse_dates=['date']
)
prc2 = prc.assign(
    date=pd.to_datetime('2018-01-06'),
    close=prc.open + np.random.randn(len(prc.open))
).drop('open', axis=1)

In [3]:
volume = pd.DataFrame({
    'ticker': ['AAPL', 'MSFT', 'IBM', 'YHOO', 'GOOG'],
    'volume': [1954.73,  335.83,  362.79,  858.18,  629.79]
}).assign(date=pd.to_datetime('2018-01-05'))

In [4]:
prc

Unnamed: 0,ticker,open,date,close
0,AAPL,426.23,2018-01-04,435.23
1,MSFT,42.3,2018-01-04,51.3
2,AAPL,436.23,2018-01-05,
3,MSFT,52.3,2018-01-05,


In [5]:
volume

Unnamed: 0,ticker,volume,date
0,AAPL,1954.73,2018-01-05
1,MSFT,335.83,2018-01-05
2,IBM,362.79,2018-01-05
3,YHOO,858.18,2018-01-05
4,GOOG,629.79,2018-01-05


**Exercise**

Merge `prc` and `volume` on `ticker, date`:

* Preserving only the records with common `ticker`s and `date`s
* Preserving all the records in `prc`
* Preserving the records in both `prc` and `volume`

_All of these merges should be performed on `ticker` and `date`_

In [6]:
cols = ['ticker', 'date']
pd.merge(prc, volume, on=cols)

Unnamed: 0,ticker,open,date,close,volume
0,AAPL,436.23,2018-01-05,,1954.73
1,MSFT,52.3,2018-01-05,,335.83


In [7]:
pd.merge(prc, volume, on=cols, how='left')

Unnamed: 0,ticker,open,date,close,volume
0,AAPL,426.23,2018-01-04,435.23,
1,MSFT,42.3,2018-01-04,51.3,
2,AAPL,436.23,2018-01-05,,1954.73
3,MSFT,52.3,2018-01-05,,335.83


In [8]:
pd.merge(prc, volume, on=cols, how='outer')

Unnamed: 0,ticker,open,date,close,volume
0,AAPL,426.23,2018-01-04,435.23,
1,MSFT,42.3,2018-01-04,51.3,
2,AAPL,436.23,2018-01-05,,1954.73
3,MSFT,52.3,2018-01-05,,335.83
4,IBM,,2018-01-05,,362.79
5,YHOO,,2018-01-05,,858.18
6,GOOG,,2018-01-05,,629.79


**Exercise:**

Using `append`, concatenate `prc2` to `prc`, making sure the index is not repeated

In [9]:
prc.append(prc2, ignore_index=True)

Unnamed: 0,ticker,open,date,close
0,AAPL,426.23,2018-01-04,435.23
1,MSFT,42.3,2018-01-04,51.3
2,AAPL,436.23,2018-01-05,
3,MSFT,52.3,2018-01-05,
4,AAPL,,2018-01-06,426.31
5,MSFT,,2018-01-06,40.33
6,AAPL,,2018-01-06,434.84
7,MSFT,,2018-01-06,51.92


**Exercise:**

Using `pd.concat`, concatenate the rows of `prc` and `prc2`, making
a single call to `pd.concat` for each bulleted sub-exercise:

* Make sure your result generates a new index like in the previous
  exercise
* Only include the columns in both `prc` and `prc2` in the result,
  additionally generating a new index
* Make your result include a `MultiIndex` with a value of `prc`
  or `prc2` to indicate which `DataFrame` provided the values

In [10]:
pd.concat([prc, prc2], ignore_index=True)

Unnamed: 0,ticker,open,date,close
0,AAPL,426.23,2018-01-04,435.23
1,MSFT,42.3,2018-01-04,51.3
2,AAPL,436.23,2018-01-05,
3,MSFT,52.3,2018-01-05,
4,AAPL,,2018-01-06,426.31
5,MSFT,,2018-01-06,40.33
6,AAPL,,2018-01-06,434.84
7,MSFT,,2018-01-06,51.92


In [11]:
pd.concat([prc, prc2], join='inner', ignore_index=True)

Unnamed: 0,ticker,date,close
0,AAPL,2018-01-04,435.23
1,MSFT,2018-01-04,51.3
2,AAPL,2018-01-05,
3,MSFT,2018-01-05,
4,AAPL,2018-01-06,426.31
5,MSFT,2018-01-06,40.33
6,AAPL,2018-01-06,434.84
7,MSFT,2018-01-06,51.92


In [12]:
pd.concat([prc, prc2], keys=['prc', 'prc2'])

Unnamed: 0,Unnamed: 1,ticker,open,date,close
prc,0,AAPL,426.23,2018-01-04,435.23
prc,1,MSFT,42.3,2018-01-04,51.3
prc,2,AAPL,436.23,2018-01-05,
prc,3,MSFT,52.3,2018-01-05,
prc2,0,AAPL,,2018-01-06,426.31
prc2,1,MSFT,,2018-01-06,40.33
prc2,2,AAPL,,2018-01-06,434.84
prc2,3,MSFT,,2018-01-06,51.92
