In [2]:
import pandas as pd

In [2]:
df = pd.read_csv("/home/ashporwal/hdd/lab/programmes/Python/Pandas/1_Series/TopRichestInWorld.csv")

In [3]:
df.head()

Unnamed: 0,Name,NetWorth,Age,Country/Territory,Source,Industry
0,Elon Musk,"$219,000,000,000",50,United States,"Tesla, SpaceX",Automotive
1,Jeff Bezos,"$171,000,000,000",58,United States,Amazon,Technology
2,Bernard Arnault & family,"$158,000,000,000",73,France,LVMH,Fashion & Retail
3,Bill Gates,"$129,000,000,000",66,United States,Microsoft,Technology
4,Warren Buffett,"$118,000,000,000",91,United States,Berkshire Hathaway,Finance & Investments


In [4]:
# Now I want to use Name as Index column and I will drop column Source
df = pd.read_csv("/home/ashporwal/hdd/lab/programmes/Python/Pandas/1_Series/TopRichestInWorld.csv", index_col='Name')
df.drop(columns=['Source'], inplace=True)
df.head()

Unnamed: 0_level_0,NetWorth,Age,Country/Territory,Industry
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Elon Musk,"$219,000,000,000",50,United States,Automotive
Jeff Bezos,"$171,000,000,000",58,United States,Technology
Bernard Arnault & family,"$158,000,000,000",73,France,Fashion & Retail
Bill Gates,"$129,000,000,000",66,United States,Technology
Warren Buffett,"$118,000,000,000",91,United States,Finance & Investments


`Stacking` a dataset means - when we use `stack()` on a dataframe then it 
stacks all the column names and uses them as inner most index.

The `stack()` function in Pandas is used to **pivot** or **"stack"** the columns of a DataFrame into a multi-level index (also called a hierarchical index). **It compresses the columns of a DataFrame into the rows**.


Syntax:  `df.stack(level=-1, dropna=True)`
- `level`: Specifies the level(s) to stack. By default, it stacks the innermost level.
- `dropna`: If True, any rows with missing values after stacking are dropped. If False, they are kept.


In [4]:
# Example of stacking it:

mydata = {
    'A': [1, 2, 3],
    'B': [4, 5, 6],
    'C': [7, 8, 9]
}

temp_df = pd.DataFrame(mydata)

temp_df

Unnamed: 0,A,B,C
0,1,4,7
1,2,5,8
2,3,6,9


In [6]:
stacked_df = temp_df.stack()

stacked_df

0  A    1
   B    4
   C    7
1  A    2
   B    5
   C    8
2  A    3
   B    6
   C    9
dtype: int64

**Explanation of Output**
- The original columns become the second level of the index.
- The values under each original column are now stacked vertically under a single column.
- This is especially useful when you need to reshape data for specific types of analysis or visualization.


**Use Cases**
- Data Reshaping: Converting data from a wide format to a long format, which can be useful for certain types of data analysis or visualization.
- Time Series Analysis: Useful in converting cross-sectional time-series data into a form that is easier to work with.
- Data Cleaning: Helps in data normalization by transforming data into a tidy format where each variable is a column and each observation is a row.

In [7]:
# Accessing Inner Level index - 

# Accessing all data for index 0
stacked_df.loc[0]

A    1
B    4
C    7
dtype: int64

In [8]:
# Accessing data for index 0 and column 'B'
stacked_df.loc[(0, 'B')]

np.int64(4)

In [5]:
df = df.stack(dropna=True) 
# as we overwrite it with stack, my all column names will be stacked under my index name,
# that is the reason I selected Name was index 
df

Name                                 
Elon Musk           NetWorth             $219,000,000,000
                    Age                                50
                    Country/Territory       United States
                    Industry                   Automotive
Jeff Bezos          NetWorth             $171,000,000,000
                                               ...       
Luo Liguo & family  Industry                Manufacturing
Peter Woo           NetWorth              $17,000,000,000
                    Age                                75
                    Country/Territory           Hong Kong
                    Industry                  Real Estate
Length: 404, dtype: object

In [6]:
# now we can again create a dataframe from above
pd.DataFrame(df)

Unnamed: 0_level_0,Unnamed: 1_level_0,0
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Elon Musk,NetWorth,"$219,000,000,000"
Elon Musk,Age,50
Elon Musk,Country/Territory,United States
Elon Musk,Industry,Automotive
Jeff Bezos,NetWorth,"$171,000,000,000"
...,...,...
Luo Liguo & family,Industry,Manufacturing
Peter Woo,NetWorth,"$17,000,000,000"
Peter Woo,Age,75
Peter Woo,Country/Territory,Hong Kong


In [7]:
# Note our index now after Dataframe columns got stacked
df.index

MultiIndex([(               'Elon Musk',          'NetWorth'),
            (               'Elon Musk',               'Age'),
            (               'Elon Musk', 'Country/Territory'),
            (               'Elon Musk',          'Industry'),
            (              'Jeff Bezos',          'NetWorth'),
            (              'Jeff Bezos',               'Age'),
            (              'Jeff Bezos', 'Country/Territory'),
            (              'Jeff Bezos',          'Industry'),
            ('Bernard Arnault & family',          'NetWorth'),
            ('Bernard Arnault & family',               'Age'),
            ...
            (            'Sun Piaoyang', 'Country/Territory'),
            (            'Sun Piaoyang',          'Industry'),
            (      'Luo Liguo & family',          'NetWorth'),
            (      'Luo Liguo & family',               'Age'),
            (      'Luo Liguo & family', 'Country/Territory'),
            (      'Luo Liguo & family'

`Unstacking` a dataset - we can revert the stacking on dataframe

In [8]:
df = df.unstack()
df

Unnamed: 0_level_0,NetWorth,Age,Country/Territory,Industry
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Elon Musk,"$219,000,000,000",50,United States,Automotive
Jeff Bezos,"$171,000,000,000",58,United States,Technology
Bernard Arnault & family,"$158,000,000,000",73,France,Fashion & Retail
Bill Gates,"$129,000,000,000",66,United States,Technology
Warren Buffett,"$118,000,000,000",91,United States,Finance & Investments
...,...,...,...,...
Vladimir Potanin,"$17,300,000,000",61,Russia,Metals & Mining
Harold Hamm & family,"$17,200,000,000",76,United States,Energy
Sun Piaoyang,"$17,100,000,000",63,China,Healthcare
Luo Liguo & family,"$17,000,000,000",66,China,Manufacturing


### But what happens if we unstack a fresh dataframe which was never got stacked.

In [9]:
# lets read the same file again
df = pd.read_csv(
    "/home/ashporwal/hdd/lab/programmes/Python/Pandas/1_Series/TopRichestInWorld.csv", 
    index_col=['Industry', 'Name'])

df


Unnamed: 0_level_0,Unnamed: 1_level_0,NetWorth,Age,Country/Territory,Source
Industry,Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Automotive,Elon Musk,"$219,000,000,000",50,United States,"Tesla, SpaceX"
Technology,Jeff Bezos,"$171,000,000,000",58,United States,Amazon
Fashion & Retail,Bernard Arnault & family,"$158,000,000,000",73,France,LVMH
Technology,Bill Gates,"$129,000,000,000",66,United States,Microsoft
Finance & Investments,Warren Buffett,"$118,000,000,000",91,United States,Berkshire Hathaway
...,...,...,...,...,...
Metals & Mining,Vladimir Potanin,"$17,300,000,000",61,Russia,metals
Energy,Harold Hamm & family,"$17,200,000,000",76,United States,oil & gas
Healthcare,Sun Piaoyang,"$17,100,000,000",63,China,pharmaceuticals
Manufacturing,Luo Liguo & family,"$17,000,000,000",66,China,chemicals


In [10]:
df.unstack()

Unnamed: 0_level_0,NetWorth,NetWorth,NetWorth,NetWorth,NetWorth,NetWorth,NetWorth,NetWorth,NetWorth,NetWorth,...,Source,Source,Source,Source,Source,Source,Source,Source,Source,Source
Name,Abigail Johnson,Alain Wertheimer,Alice Walton,Amancio Ortega,Andrew Forrest,Beate Heister & Karl Albrecht Jr. & family,Bernard Arnault & family,Bill Gates,Carlos Slim Helu & family,Changpeng Zhao,...,Vladimir Lisin,Vladimir Potanin,Wang Chuanfu,Wang Wei,Wang Wenyin,Warren Buffett,William Lei Ding,Yang Huiyan & family,Zhang Yiming,Zhong Shanshan
Industry,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
Automotive,,,,,,,,,,,...,,,"batteries, automobiles",,,,,,,
Diversified,,,,,,,,,,,...,,,,,,,,,,
Energy,,,,,,,,,,,...,,,,,,,,,,
Fashion & Retail,,"$31,200,000,000","$65,300,000,000","$59,600,000,000",,"$36,800,000,000","$158,000,000,000",,,,...,,,,,,,,,,
Finance & Investments,"$21,200,000,000",,,,,,,,,"$65,000,000,000",...,,,,,,Berkshire Hathaway,,,,
Food & Beverage,,,,,,,,,,,...,,,,,,,,,,"beverages, pharmaceuticals"
Gambling & Casinos,,,,,,,,,,,...,,,,,,,,,,
Healthcare,,,,,,,,,,,...,,,,,,,,,,
Logistics,,,,,,,,,,,...,,,,,,,,,,
Manufacturing,,,,,,,,,,,...,,,,,,,,,,


In [11]:
# but we can also specify to which level of index we want to unstack
df.unstack(level=0)  
# in this all Industry column will be in horizontal postion as compated to above

Unnamed: 0_level_0,NetWorth,NetWorth,NetWorth,NetWorth,NetWorth,NetWorth,NetWorth,NetWorth,NetWorth,NetWorth,...,Source,Source,Source,Source,Source,Source,Source,Source,Source,Source
Industry,Automotive,Diversified,Energy,Fashion & Retail,Finance & Investments,Food & Beverage,Gambling & Casinos,Healthcare,Logistics,Manufacturing,...,Gambling & Casinos,Healthcare,Logistics,Manufacturing,Media & Entertainment,Metals & Mining,Real Estate,Service,Technology,Telecom
Name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
Abigail Johnson,,,,,"$21,200,000,000",,,,,,...,,,,,,,,,,
Alain Wertheimer,,,,"$31,200,000,000",,,,,,,...,,,,,,,,,,
Alice Walton,,,,"$65,300,000,000",,,,,,,...,,,,,,,,,,
Amancio Ortega,,,,"$59,600,000,000",,,,,,,...,,,,,,,,,,
Andrew Forrest,,,,,,,,,,,...,,,,,,mining,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Warren Buffett,,,,,"$118,000,000,000",,,,,,...,,,,,,,,,,
William Lei Ding,,,,,,,,,,,...,,,,,,,,,online games,
Yang Huiyan & family,,,,,,,,,,,...,,,,,,,real estate,,,
Zhang Yiming,,,,,,,,,,,...,,,,,TikTok,,,,,


In [12]:
df.unstack(level=1)

Unnamed: 0_level_0,NetWorth,NetWorth,NetWorth,NetWorth,NetWorth,NetWorth,NetWorth,NetWorth,NetWorth,NetWorth,...,Source,Source,Source,Source,Source,Source,Source,Source,Source,Source
Name,Abigail Johnson,Alain Wertheimer,Alice Walton,Amancio Ortega,Andrew Forrest,Beate Heister & Karl Albrecht Jr. & family,Bernard Arnault & family,Bill Gates,Carlos Slim Helu & family,Changpeng Zhao,...,Vladimir Lisin,Vladimir Potanin,Wang Chuanfu,Wang Wei,Wang Wenyin,Warren Buffett,William Lei Ding,Yang Huiyan & family,Zhang Yiming,Zhong Shanshan
Industry,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
Automotive,,,,,,,,,,,...,,,"batteries, automobiles",,,,,,,
Diversified,,,,,,,,,,,...,,,,,,,,,,
Energy,,,,,,,,,,,...,,,,,,,,,,
Fashion & Retail,,"$31,200,000,000","$65,300,000,000","$59,600,000,000",,"$36,800,000,000","$158,000,000,000",,,,...,,,,,,,,,,
Finance & Investments,"$21,200,000,000",,,,,,,,,"$65,000,000,000",...,,,,,,Berkshire Hathaway,,,,
Food & Beverage,,,,,,,,,,,...,,,,,,,,,,"beverages, pharmaceuticals"
Gambling & Casinos,,,,,,,,,,,...,,,,,,,,,,
Healthcare,,,,,,,,,,,...,,,,,,,,,,
Logistics,,,,,,,,,,,...,,,,,,,,,,
Manufacturing,,,,,,,,,,,...,,,,,,,,,,
