# Preppin Data 
## Week 45: Strange Table Structure

https://preppindata.blogspot.com/2022/11/2022-week-45-strange-table-structure.html

#### Import libraries

In [1]:
import pandas as pd



#### Import file using the header argument to specify a multi-index

In [2]:
file = 'Strange table structure.xlsx'
df = pd.read_excel(file, header=[2,3])
df.head()

Unnamed: 0_level_0,Unnamed: 0_level_0,Jan,Jan,Feb,Feb,Mar,Mar,Apr,Apr,May,...,Jun,Jun,Jul,Jul,Aug,Aug,Sep,Sep,Oct,Oct
Unnamed: 0_level_1,Store,Sales,Profit,Sales,Profit,Sales,Profit,Sales,Profit,Sales,...,Sales,Profit,Sales,Profit,Sales,Profit,Sales,Profit,Sales,Profit
0,Dulwich,9822,762,4859,299,3230,760,5386,-301,9073,...,4952,-330,7273,574,9042,-795,1257,474,6418,539
1,Wimbledon,2263,444,8531,27,4855,492,5505,-87,7592,...,4659,-394,4519,-680,8556,284,8782,924,9679,-706
2,Notting Hill,6100,874,5203,202,1048,563,5151,86,1896,...,1768,-918,8945,552,5904,854,2621,-851,9385,656
3,Lewisham,2909,-129,5393,960,6262,-446,7443,-17,6130,...,6893,877,4814,-340,7549,775,2888,197,2672,-172


#### Use Transpose to Switch the Columns and The Rows

In [3]:
#Transpose flips the columns and the rows

t = df.transpose()
t

Unnamed: 0,Unnamed: 1,0,1,2,3
Unnamed: 0_level_0,Store,Dulwich,Wimbledon,Notting Hill,Lewisham
Jan,Sales,9822,2263,6100,2909
Jan,Profit,762,444,874,-129
Feb,Sales,4859,8531,5203,5393
Feb,Profit,299,27,202,960
Mar,Sales,3230,4855,1048,6262
Mar,Profit,760,492,563,-446
Apr,Sales,5386,5505,5151,7443
Apr,Profit,-301,-87,86,-17
May,Sales,9073,7592,1896,6130


#### Set the store names as the column names

1. Set the column names equal to row 1 (the plant names)
2. Drop row 1

In [4]:
#Use df.columns to make the Plant Names into a Column Name

t.columns = t.iloc[0, :]
t

Unnamed: 0,"(Unnamed: 0_level_0, Store)",Dulwich,Wimbledon,Notting Hill,Lewisham
Unnamed: 0_level_0,Store,Dulwich,Wimbledon,Notting Hill,Lewisham
Jan,Sales,9822,2263,6100,2909
Jan,Profit,762,444,874,-129
Feb,Sales,4859,8531,5203,5393
Feb,Profit,299,27,202,960
Mar,Sales,3230,4855,1048,6262
Mar,Profit,760,492,563,-446
Apr,Sales,5386,5505,5151,7443
Apr,Profit,-301,-87,86,-17
May,Sales,9073,7592,1896,6130


In [5]:
#Drop the first row

t = t.iloc[1:, :].copy()
t

Unnamed: 0,"(Unnamed: 0_level_0, Store)",Dulwich,Wimbledon,Notting Hill,Lewisham
Jan,Sales,9822,2263,6100,2909
Jan,Profit,762,444,874,-129
Feb,Sales,4859,8531,5203,5393
Feb,Profit,299,27,202,960
Mar,Sales,3230,4855,1048,6262
Mar,Profit,760,492,563,-446
Apr,Sales,5386,5505,5151,7443
Apr,Profit,-301,-87,86,-17
May,Sales,9073,7592,1896,6130
May,Profit,-125,-251,630,-658


#### Reset the index to flatten the row indices (Month, Store)

In [6]:
t2 = t.reset_index()
t2

"(Unnamed: 0_level_0, Store)",level_0,level_1,Dulwich,Wimbledon,Notting Hill,Lewisham
0,Jan,Sales,9822,2263,6100,2909
1,Jan,Profit,762,444,874,-129
2,Feb,Sales,4859,8531,5203,5393
3,Feb,Profit,299,27,202,960
4,Mar,Sales,3230,4855,1048,6262
5,Mar,Profit,760,492,563,-446
6,Apr,Sales,5386,5505,5151,7443
7,Apr,Profit,-301,-87,86,-17
8,May,Sales,9073,7592,1896,6130
9,May,Profit,-125,-251,630,-658


#### Use df.melt to reshape the data.

- The id_vars argument contains the row elements to preserve (Month, Sales vs. Profit Indicator)

- The value_vars element contains the data to populate the new columns

In [7]:
t2 = t2.melt(id_vars=['level_0', 'level_1'], value_vars=['Dulwich', 'Wimbledon', 'Notting Hill', 'Lewisham'])
t2

Unnamed: 0,level_0,level_1,"(Unnamed: 0_level_0, Store)",value
0,Jan,Sales,Dulwich,9822
1,Jan,Profit,Dulwich,762
2,Feb,Sales,Dulwich,4859
3,Feb,Profit,Dulwich,299
4,Mar,Sales,Dulwich,3230
...,...,...,...,...
75,Aug,Profit,Lewisham,775
76,Sep,Sales,Lewisham,2888
77,Sep,Profit,Lewisham,197
78,Oct,Sales,Lewisham,2672


#### Pivot the data to create separate sales and profit columns

- the index arguments are the row information we want preserved
- The columns argument specifies THE NAMES of the new columns (and how they will be split)
- The values argument specifies which data will populate the new columns

In [8]:
t2 = t2.pivot(index=['level_0', ('Unnamed: 0_level_0', 'Store')], columns='level_1', values='value')
t2

Unnamed: 0_level_0,level_1,Profit,Sales
level_0,"(Unnamed: 0_level_0, Store)",Unnamed: 2_level_1,Unnamed: 3_level_1
Apr,Dulwich,-301,5386
Apr,Lewisham,-17,7443
Apr,Notting Hill,86,5151
Apr,Wimbledon,-87,5505
Aug,Dulwich,-795,9042
Aug,Lewisham,775,7549
Aug,Notting Hill,854,5904
Aug,Wimbledon,284,8556
Feb,Dulwich,299,4859
Feb,Lewisham,960,5393


#### Use Reset Index to flatten the Month and Store columns again

- Also rename the columns for cleanliness

In [9]:
#Reset index
t2 = t2.reset_index()
t2

level_1,level_0,"(Unnamed: 0_level_0, Store)",Profit,Sales
0,Apr,Dulwich,-301,5386
1,Apr,Lewisham,-17,7443
2,Apr,Notting Hill,86,5151
3,Apr,Wimbledon,-87,5505
4,Aug,Dulwich,-795,9042
5,Aug,Lewisham,775,7549
6,Aug,Notting Hill,854,5904
7,Aug,Wimbledon,284,8556
8,Feb,Dulwich,299,4859
9,Feb,Lewisham,960,5393


In [None]:
#Rename columns
t2.columns = ['Month', 'Store', 'Profit', 'Sales']

### Export to csv

In [10]:
t2.to_csv('reshaped_data.csv', index=False)