In [2]:
!pip install yfinance
import pandas as pd, numpy as np
import yfinance as yf

Collecting yfinance
  Downloading yfinance-0.2.37-py2.py3-none-any.whl.metadata (11 kB)
Collecting multitasking>=0.0.7 (from yfinance)
  Downloading multitasking-0.0.11-py3-none-any.whl.metadata (5.5 kB)
Collecting frozendict>=2.3.4 (from yfinance)
  Downloading frozendict-2.4.0.tar.gz (314 kB)
     ---------------------------------------- 0.0/314.6 kB ? eta -:--:--
     - -------------------------------------- 10.2/314.6 kB ? eta -:--:--
     ------- ----------------------------- 61.4/314.6 kB 812.7 kB/s eta 0:00:01
     ----------------------- -------------- 194.6/314.6 kB 1.5 MB/s eta 0:00:01
     -------------------------------------- 314.6/314.6 kB 1.9 MB/s eta 0:00:00
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with s

In [3]:
#%% Heirarchical Indexing
"""
    Every index(including columns and index) in a DataFrame or a Series can have not only one layer.
"""
data = pd.Series(range(9),
                 index=[['a','a','a','b','b','c','c','d','d'],
                        ['x','y','z','x','y','x','y','x','y']])
data['a']
data['b']
data.loc[:,'x']

frame = pd.DataFrame(np.arange(12).reshape((4,3)),
                     index=[['a','a','b','b'],
                            [1,2,1,2]],
                     columns=[['Ohio','Ohio','Colorado'],
                              ['Green','Red','Green']])
frame.index.names = ['key1', 'key2']
frame.columns.names = ['state', 'color']

frame['Ohio']

frame.loc[('a',1):('b',1),'Ohio']

idx = pd.IndexSlice
frame.loc[idx[:, 1], idx[:, "Red"]]

Unnamed: 0_level_0,state,Ohio
Unnamed: 0_level_1,color,Red
key1,key2,Unnamed: 2_level_2
a,1,1
b,1,7


In [4]:
#%% Stack / Unstack
"""
    Stack and Unstack can be used to reduce the dimention of the DataFrame
    When stacking, pandas will drop the nan by default, we can add "dropna=False" to avoid this.
    When unstcking, pandas will auto fill nan values.
"""
data = pd.DataFrame(np.arange(6).reshape((2,3)),
                    index = ['Ohio', 'Colorado'],
                    columns = ['one', 'two', 'three'])
data.index.names = ['state']
data.columns.names = ['number']

data1=data.stack()
data1.unstack()
data1.unstack('state')
data1.unstack(0)
data1.unstack('number')
data1.unstack(1)

data = pd.DataFrame([[1,2,np.nan],
                     [3,np.nan,4]],
                    columns=['a','b','c'],
                    index=['one', 'two'])
data.stack()
data.stack(dropna=False)
data.stack().unstack()
data.stack().unstack(fill_value=-999)

frame = pd.DataFrame(np.arange(12).reshape((4,3)),
                     index=[['a','a','b','b'],
                            [1,2,1,2]],
                     columns=[['Ohio','Ohio','Colorado'],
                              ['Green','Red','Green']])
frame.index.names = ['char', 'label']
frame.columns.names = ['state', 'color']
frame.unstack('char')
frame.unstack('label')
frame.T.unstack('state').T
frame.stack('state')
frame.T.unstack('color').T
frame.stack('state')

Unnamed: 0_level_0,Unnamed: 1_level_0,color,Green,Red
char,label,state,Unnamed: 3_level_1,Unnamed: 4_level_1
a,1,Colorado,2,
a,1,Ohio,0,1.0
a,2,Colorado,5,
a,2,Ohio,3,4.0
b,1,Colorado,8,
b,1,Ohio,6,7.0
b,2,Colorado,11,
b,2,Ohio,9,10.0


In [5]:
#%% pivot / melt
"""
    Similiar to stack and unstack, but can set more thing inside
"""
df = pd.DataFrame({'key':['foo', 'bar', 'baz'],
                   'A': [1,2,3],
                   'B': [4,5,6],
                   'C': [7,8,9]})
melted = pd.melt(df)
melted_2 = pd.melt(df, ['key'])

#stack = df.stack(df.columns)

reshaped = melted_2.pivot('key', 'variable', 'value')
melted_2.set_index(['key', 'variable'], inplace=True)
reshaped_unstack = melted_2.unstack('variable')

reshaped['key'] = reshaped.index
reshaped.index = range(3)
reshaped.reset_index(inplace=True)

reshaped.set_index(['key'], inplace=True)
reshaped.index = reshaped['key']
reshaped.drop(['key'], inplace=True)



df = pd.DataFrame({'key':['foo', 'bar', 'baz'],
                   'A': [1,2,3],
                   'B': [4,5,6],
                   'C': [7,8,9]})
melted = pd.melt(df)
melted_2 = pd.melt(df, ['key'])
melted_2['value_2'] = range(9,0,-1)

reshaped = melted_2.pivot('key', 'variable', 'value')
reshaped = melted_2.pivot('key', 'variable', 'value_2')
reshaped = melted_2.pivot('key', 'variable')

TypeError: DataFrame.pivot() takes 1 positional argument but 4 were given