In [None]:
import pandas as pd

df = pd.read_csv('data/nyc_temperatures.csv')
df.head()

In [None]:
df.columns

In [None]:
df.rename(
    columns={
        'value': 'temp_C',
        'attributes': 'flags'
    }, inplace=True
)

In [None]:
df.columns

In [None]:
df.rename(str.upper, axis='columns').columns

In [None]:
df.dtypes

In [None]:
df.loc[:,'date'] = pd.to_datetime(df.date)
df.dtypes

In [None]:
df.date.describe()

In [None]:
df.date.describe()

In [None]:
pd.date_range(start='2018-10-25', periods=2, freq='D').tz_localize('EST')

In [None]:
eastern = pd.read_csv(
    'data/nyc_temperatures.csv', index_col='date', parse_dates=True
).tz_localize('EST')
eastern.head()

In [None]:
eastern.tz_convert('UTC').head()


In [None]:
eastern.to_period('M').index

In [None]:
eastern.to_period('M').to_timestamp().index

In [None]:
df = pd.read_csv('data/nyc_temperatures.csv').rename(
  columns={
      'value' : 'temp_C',
      'attributes' : 'flags'
   }
)

new_df = df.assign(
    date=pd.to_datetime(df.date),
    temp_F=(df.temp_C * 9/5) + 32
)
new_df.dtypes

In [None]:
new_df.head()

In [None]:
df = df.assign(
    date=pd.to_datetime(df.date),
    temp_C_whole=df.temp_C.astype('int'),
    temp_F=(df.temp_C * 9/5) + 32,
    temp_F_whole=lambda x: x.temp_F.astype('int')
)

df.head()

In [None]:
df_with_categories = df.assign(
    station=df.station.astype('category'),
    datatype=df.datatype.astype('category')
)
df_with_categories.dtypes

In [None]:
pd.Categorical(
    ['med', 'med', 'low', 'high'],
    categories=['low', 'med', 'high'],
    ordered=True
)

In [None]:

df.sort_values(by='temp_C', ascending=False).head(10)

In [None]:
df.sort_values(by=['temp_C', 'date'], ascending=False).head(10)

In [None]:
df.nlargest(n=5, columns='temp_C')

In [None]:
df.nsmallest(n=5, columns=['temp_C', 'date'])

In [None]:
df.sample(5, random_state=0).index

In [None]:
df.sample(5, random_state=0).sort_index().index

In [None]:
df.sort_index(axis=1).head()

In [None]:
df.sort_index(axis=1).head().loc[:, 'temp_C':'temp_F_whole']

In [None]:
df.equals(df.sort_values(by='temp_C'))

In [None]:
df.equals(df.sort_values(by='temp_C').sort_index())

In [None]:
df[df.datatype == 'TAVG'].head().reset_index()

In [None]:

df.set_index('date', inplace=True)
df.head()

In [None]:
df['2018-10-11':'2018-10-12']

In [None]:
sp = pd.read_csv(
    'data/sp500.csv', index_col='date', parse_dates=True
).drop(columns=['adj_close'])

sp.head(10).assign(
    day_of_week=lambda x: x.index.day_name()
)

In [None]:
bitcoin = pd.read_csv(
    'data/bitcoin.csv', index_col='date', parse_dates=True
).drop(columns=['market_cap'])

portfolio = pd.concat(
    [sp, bitcoin], sort=False
).groupby(pd.Grouper(freq='D')).sum()

portfolio.head(10).assign(
    day_of_week=lambda x: x.index.day_name()
)

In [None]:
import matplotlib.pyplot as plt # we use this module for plotting

In [None]:
portfolio['2017-Q4':'2018-Q2'].plot(
    y='close', figsize=(15, 5), legend=False,
    title='Bitcoin + S&P 500 value without accounting for different indices'
)
plt.ylabel('price ($)')
plt.show()

In [None]:
sp.reindex(bitcoin.index).head(10).assign(
    day_of_week=lambda x: x.index.day_name()
)

In [None]:
sp.reindex(
    bitcoin.index, method='ffill'
).head(10).assign(
    day_of_week=lambda x: x.index.day_name()
)


In [None]:
import numpy as np

sp_reindexed = sp.reindex(
    bitcoin.index
).assign(
    volume=lambda x: x.volume.fillna(0),
    close=lambda x: x.close.fillna(method='ffill'),
    open=lambda x: np.where(x.open.isnull(), x.close, x.open),
    high=lambda x: np.where(x.high.isnull(), x.close, x.high),
    low=lambda x: np.where(x.low.isnull(), x.close, x.low)
)
sp_reindexed.head(10).assign(
    day_of_week=lambda x: x.index.day_name()
)

In [None]:
fixed_portfolio = pd.concat([sp_reindexed, bitcoin], sort=False).groupby(pd.Grouper(freq='D')).sum()

ax = fixed_portfolio['2017-Q4':'2018-Q2'].plot(
    y='close', label='reindexed portfolio of S&P 500 + Bitcoin', figsize=(15, 5), linewidth=2,
    title='Reindexed portfolio vs. portfolio with mismatches indices'
)

portfolio['2017-Q4':'2018-Q2'].plot(
    y='close', ax=ax, linestyle='--, label='portfolio of S&P 500 + Bitcoin w/o reindexing'
).set_ylabel('price ($)') # add line for original portfolio for comparison and label y-axis

plt.show() # show the plot