In [1]:
import numpy as np
import pandas as pd

In [2]:
import datetime
from datetime import datetime, date

In [3]:
pd.set_option('display.notebook_repr_html', False)
pd.set_option('display.max_columns', 8)
pd.set_option('display.max_rows', 10)
pd.set_option('display.width', 60)

In [4]:
sp500 = pd.read_csv('Data/sp500.csv', index_col='Symbol', usecols=[0, 2, 3, 7])

In [5]:
np.random.seed(123456)

df = pd.DataFrame({'foo': np.random.random(10000), 'key': range(100, 10100)})

In [6]:
df.head()

        foo  key
0  0.126970  100
1  0.966718  101
2  0.260476  102
3  0.897237  103
4  0.376750  104

In [7]:
df.tail()

           foo    key
9995  0.769913  10095
9996  0.752521  10096
9997  0.216083  10097
9998  0.448789  10098
9999  0.272283  10099

In [8]:
df[df['key'] == 10099]

           foo    key
9999  0.272283  10099

In [9]:
%%timeit
df[df.key == 10099]

287 µs ± 15.2 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [10]:
df_with_index = df.set_index(['key'])

In [11]:
df_with_index.head()

          foo
key          
100  0.126970
101  0.966718
102  0.260476
103  0.897237
104  0.376750

In [12]:
%%timeit
df_with_index.loc[10099]

58.3 µs ± 1.36 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [13]:
temps = pd.DataFrame({'City': ['Missoula', 'Philadelphia'],
                     'Temperature': [70, 80]})
temps

           City  Temperature
0      Missoula           70
1  Philadelphia           80

In [14]:
temps.columns

Index(['City', 'Temperature'], dtype='object')

In [17]:
df_f64 = pd.DataFrame(np.arange(0, 1000, 5), np.arange(0.0, 100.0, 0.5))

In [19]:
df_f64.index

Float64Index([ 0.0,  0.5,  1.0,  1.5,  2.0,  2.5,  3.0,
               3.5,  4.0,  4.5,
              ...
              95.0, 95.5, 96.0, 96.5, 97.0, 97.5, 98.0,
              98.5, 99.0, 99.5],
             dtype='float64', length=200)

In [22]:
df_interval = pd.DataFrame({'A': [1, 2, 3, 4]}, index=pd.IntervalIndex.from_breaks([0, 0.5, 1.0, 1.5, 2.0]))

In [23]:
df_interval.head()

            A
(0.0, 0.5]  1
(0.5, 1.0]  2
(1.0, 1.5]  3
(1.5, 2.0]  4

In [24]:
df_interval.index

IntervalIndex([(0.0, 0.5], (0.5, 1.0], (1.0, 1.5], (1.5, 2.0]], dtype='interval[float64, right]')

In [46]:
df_categorical = pd.DataFrame({'A': np.arange(6), 'B': list('aabbca')})

In [47]:
# from pandas.api.types import CategoricalDtype
df_categorical['B'] = df_categorical['B'].astype('category')

In [48]:
df_categorical.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype   
---  ------  --------------  -----   
 0   A       6 non-null      int32   
 1   B       6 non-null      category
dtypes: category(1), int32(1)
memory usage: 290.0 bytes


In [41]:
df_categorical['B']

0    a
1    a
2    b
3    b
4    c
5    a
Name: B, dtype: category
Categories (3, object): ['a', 'b', 'c']

In [43]:
from pandas.api.types import CategoricalDtype
df_categorical['B'] = df_categorical['B'].astype(CategoricalDtype(categories=list('cab')))

In [45]:
df_categorical.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype   
---  ------  --------------  -----   
 0   A       6 non-null      int32   
 1   B       6 non-null      category
dtypes: category(1), int32(1)
memory usage: 290.0 bytes


In [49]:
df_categorical = df_categorical.set_index('B')
df_categorical.index

CategoricalIndex(['a', 'a', 'b', 'b', 'c', 'a'], categories=['a', 'b', 'c'], ordered=False, dtype='category', name='B')

In [50]:
df_categorical.head()

   A
B   
a  0
a  1
b  2
b  3
c  4

In [52]:
df_categorical.loc['a']

   A
B   
a  0
a  1
a  5

In [54]:
rng = pd.date_range('5/1/2017', periods=5, freq='H')

In [59]:
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts

2017-05-01 00:00:00    0.533249
2017-05-01 01:00:00   -0.819218
2017-05-01 02:00:00   -0.032955
2017-05-01 03:00:00   -0.639418
2017-05-01 04:00:00   -0.607207
Freq: H, dtype: float64

In [60]:
ts.index

DatetimeIndex(['2017-05-01 00:00:00',
               '2017-05-01 01:00:00',
               '2017-05-01 02:00:00',
               '2017-05-01 03:00:00',
               '2017-05-01 04:00:00'],
              dtype='datetime64[ns]', freq='H')

In [62]:
periods = pd.PeriodIndex(['2017-1', '2017-2', '2017-3'], freq='M')

In [63]:
periods

PeriodIndex(['2017-01', '2017-02', '2017-03'], dtype='period[M]')

In [66]:
period_series = pd.Series(np.random.randn(len(periods)), index=periods)

In [68]:
date_times = pd.DatetimeIndex(pd.date_range('5/1/2017', periods=5, freq='H'))

In [72]:
df_date_times = pd.DataFrame(np.arange(0, len(date_times)), index=date_times)

In [73]:
sp500.head()

                        Sector   Price  Book Value
Symbol                                            
MMM                Industrials  141.14      26.668
ABT                Health Care   39.60      15.573
ABBV               Health Care   53.95       2.954
ACN     Information Technology   79.79       8.326
ACE                 Financials  102.91      86.897

In [74]:
sp500.reset_index()

    Symbol                  Sector   Price  Book Value
0      MMM             Industrials  141.14      26.668
1      ABT             Health Care   39.60      15.573
2     ABBV             Health Care   53.95       2.954
3      ACN  Information Technology   79.79       8.326
4      ACE              Financials  102.91      86.897
..     ...                     ...     ...         ...
495   YHOO  Information Technology   35.02      12.768
496    YUM  Consumer Discretionary   74.77       5.147
497    ZMH             Health Care  101.84      37.181
498   ZION              Financials   28.43      30.191
499    ZTS             Health Care   30.53       2.150

[500 rows x 4 columns]

In [75]:
sp500.set_index('Sector')

                         Price  Book Value
Sector                                    
Industrials             141.14      26.668
Health Care              39.60      15.573
Health Care              53.95       2.954
Information Technology   79.79       8.326
Financials              102.91      86.897
...                        ...         ...
Information Technology   35.02      12.768
Consumer Discretionary   74.77       5.147
Health Care             101.84      37.181
Financials               28.43      30.191
Health Care              30.53       2.150

[500 rows x 2 columns]

In [76]:
sp500.reindex(index=['MMM', 'ABBV', 'FOO'])

             Sector   Price  Book Value
Symbol                                 
MMM     Industrials  141.14      26.668
ABBV    Health Care   53.95       2.954
FOO             NaN     NaN         NaN

In [82]:
'FOO' in sp500.index

False

In [80]:
sp500.loc['ABT']

Sector        Health Care
Price                39.6
Book Value         15.573
Name: ABT, dtype: object

In [81]:
sp500.head()

                        Sector   Price  Book Value
Symbol                                            
MMM                Industrials  141.14      26.668
ABT                Health Care   39.60      15.573
ABBV               Health Care   53.95       2.954
ACN     Information Technology   79.79       8.326
ACE                 Financials  102.91      86.897

In [84]:
sp500.reindex(columns=['Price', 'NewCol'])

         Price  NewCol
Symbol                
MMM     141.14     NaN
ABT      39.60     NaN
ABBV     53.95     NaN
ACN      79.79     NaN
ACE     102.91     NaN
...        ...     ...
YHOO     35.02     NaN
YUM      74.77     NaN
ZMH     101.84     NaN
ZION     28.43     NaN
ZTS      30.53     NaN

[500 rows x 2 columns]

In [88]:
sp500[['Price']]

         Price
Symbol        
MMM     141.14
ABT      39.60
ABBV     53.95
ACN      79.79
ACE     102.91
...        ...
YHOO     35.02
YUM      74.77
ZMH     101.84
ZION     28.43
ZTS      30.53

[500 rows x 1 columns]

In [103]:
sp500['Sector'].unique()

array(['Industrials', 'Health Care', 'Information Technology',
       'Financials', 'Utilities', 'Materials', 'Consumer Staples',
       'Consumer Discretionary', 'Energy', 'Telecommunications Services',
       'Consumer Staples ', 'Industries', 'Consumer Discretionary '],
      dtype=object)

In [104]:
reindexed = sp500.reset_index()

In [95]:
multi_fi = reindexed.set_index(['Sector', 'Symbol'])

In [102]:
multi_fi.loc[['Consumer Discretionary']]

                                Price  Book Value
Sector                 Symbol                    
Consumer Discretionary AMZN    312.24      22.452
                       AN       56.38      17.327
                       AZO     540.90     -51.275
                       BEAM      0.00         NaN
                       BBBY     61.36      19.563
...                               ...         ...
                       DIS      83.32      26.405
                       WHR     147.26      66.221
                       WYN      72.22      11.984
                       WYNN    207.86      -0.719
                       YUM      74.77       5.147

[85 rows x 2 columns]