## Reshaping and Pivoting

In [3]:
import numpy as np
import pandas as pd

data = { 
'Date': pd.to_datetime(['2023-01-01', '2023-01-01', '2023-01-02', 
'2023-01-02', '2023-01-03', '2023-01-03']), 
'City': ['New York', 'Los Angeles', 'New York', 'Los Angeles', 'New York', 
'Los Angeles'], 
'Variable': ['Temperature', 'Humidity', 'Temperature', 'Humidity', 
'Temperature', 'Humidity'], 
'Value': [15, 60, 17, 65, 16, 62] 
} 
df_long = pd.DataFrame(data) 
df_long

Unnamed: 0,Date,City,Variable,Value
0,2023-01-01,New York,Temperature,15
1,2023-01-01,Los Angeles,Humidity,60
2,2023-01-02,New York,Temperature,17
3,2023-01-02,Los Angeles,Humidity,65
4,2023-01-03,New York,Temperature,16
5,2023-01-03,Los Angeles,Humidity,62


In [4]:
df_wide = pd.DataFrame({ 
"City": ["New York", "Los Angeles", "Chicago"], 
"Temp_Jan": [10, 20, 5], 
"Temp_Feb": [12, 22, 7], 
"Humidity_Jan": [55, 65, 50], 
"Humidity_Feb": [58, 68, 52], 
}) 
df = pd.DataFrame(df_wide)
df

Unnamed: 0,City,Temp_Jan,Temp_Feb,Humidity_Jan,Humidity_Feb
0,New York,10,12,55,58
1,Los Angeles,20,22,65,68
2,Chicago,5,7,50,52


In [5]:
# 49. Pivot df_long so that each 'City' is a row, each 'Variable' is a column, and the 'Value' populates the table. Use 'Date' as the index.
df_long.pivot(index='Date', columns='Variable', values='Value')

Variable,Humidity,Temperature
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2023-01-01,60,15
2023-01-02,65,17
2023-01-03,62,16


In [25]:
# 50. Use pivot_table on df_sales (from Section 1) to show the average sales for each 'Product' (rows) across each 'Region' (columns). 
df_sales = pd.DataFrame( {
    'Region': ['North', 'South', 'North', 'South', 'East', 'West', 'East', 'West', 'North', 'South'],
    'Product': ['A', 'A', 'B', 'B', 'A', 'B', 'C', 'C', 'A', 'C'],
    'Sales': [100, 150, 200, 250, 120, 220, 180, 280, 110, 170],
    'Quantity': [10, 15, 20, 25, 12, 22, 18, 28, 11, 17]
})

In [26]:
# 51. Melt df_wide to transform it from a wide format to a long format, with 'City' as the ID variable. 
melted = pd.melt(df_wide, id_vars=['City'])
melted

Unnamed: 0,City,variable,value
0,New York,Temp_Jan,10
1,Los Angeles,Temp_Jan,20
2,Chicago,Temp_Jan,5
3,New York,Temp_Feb,12
4,Los Angeles,Temp_Feb,22
5,Chicago,Temp_Feb,7
6,New York,Humidity_Jan,55
7,Los Angeles,Humidity_Jan,65
8,Chicago,Humidity_Jan,50
9,New York,Humidity_Feb,58


In [27]:
# 52. In the melted DataFrame from the previous question, separate the 'variable' column into two new columns: 'Metric' (e.g., Temp, Humidity) and 'Month' (e.g., Jan, Feb). 
melted[['Metric','Month']] = melted['variable'].str.split('_', expand=True)
melted

Unnamed: 0,City,variable,value,Metric,Month
0,New York,Temp_Jan,10,Temp,Jan
1,Los Angeles,Temp_Jan,20,Temp,Jan
2,Chicago,Temp_Jan,5,Temp,Jan
3,New York,Temp_Feb,12,Temp,Feb
4,Los Angeles,Temp_Feb,22,Temp,Feb
5,Chicago,Temp_Feb,7,Temp,Feb
6,New York,Humidity_Jan,55,Humidity,Jan
7,Los Angeles,Humidity_Jan,65,Humidity,Jan
8,Chicago,Humidity_Jan,50,Humidity,Jan
9,New York,Humidity_Feb,58,Humidity,Feb


In [28]:
# 53. Create a cross-tabulation (pd.crosstab) from df_sales to show the frequency of each 'Product' in each 'Region'. 
df = pd.crosstab(df_sales['Region'], df_sales['Product'])
df

Product,A,B,C
Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
East,1,0,1
North,2,1,0
South,1,1,1
West,0,1,1


In [29]:
# 54. Unstack the result of grouping df_sales by 'Region' and 'Product' to create a pivot-like table. 
grouped_sales = df_sales.groupby(['Region', 'Product'])['Sales'].sum()
pivot_sales = grouped_sales.unstack()
pivot_sales

Product,A,B,C
Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
East,120.0,,180.0
North,210.0,200.0,
South,150.0,250.0,170.0
West,,220.0,280.0


In [30]:
# 55. Using df_long, create a pivot table that shows the maximum value for each 'Variable' on each 'Date'. 
pd.pivot_table(df_long, values='Value', index='Date', columns='Variable', aggfunc='max')

Variable,Humidity,Temperature
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2023-01-01,60,15
2023-01-02,65,17
2023-01-03,62,16


In [31]:
# 56. Melt df_wide again, but this time, keep both 'Temp_Jan' and 'Humidity_Jan' as value variables. 
pd.melt(df_wide, id_vars=['City'], value_vars=['Temp_Jan','Humidity_Jan'])

Unnamed: 0,City,variable,value
0,New York,Temp_Jan,10
1,Los Angeles,Temp_Jan,20
2,Chicago,Temp_Jan,5
3,New York,Humidity_Jan,55
4,Los Angeles,Humidity_Jan,65
5,Chicago,Humidity_Jan,50


In [32]:
# 57. Create a MultiIndex DataFrame from df_sales using 'Region' and 'Product' as index levels. 
df_sales.set_index(['Region','Product'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Sales,Quantity
Region,Product,Unnamed: 2_level_1,Unnamed: 3_level_1
North,A,100,10
South,A,150,15
North,B,200,20
South,B,250,25
East,A,120,12
West,B,220,22
East,C,180,18
West,C,280,28
North,A,110,11
South,C,170,17


In [33]:
# 58. Use stack() on df_wide after setting 'City' as the index. What does it do? 
df_wide.set_index('City').stack()

City                     
New York     Temp_Jan        10
             Temp_Feb        12
             Humidity_Jan    55
             Humidity_Feb    58
Los Angeles  Temp_Jan        20
             Temp_Feb        22
             Humidity_Jan    65
             Humidity_Feb    68
Chicago      Temp_Jan         5
             Temp_Feb         7
             Humidity_Jan    50
             Humidity_Feb    52
dtype: int64

In [34]:
# 59. From the pivot table created in question 50, fill any NaN values with 0. 
pivot50 = pd.pivot_table(df_sales, values='Sales', index='Product', columns='Region', aggfunc='mean')
pivot50.fillna(0)

Region,East,North,South,West
Product,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
A,120.0,105.0,150.0,0.0
B,0.0,200.0,250.0,220.0
C,180.0,0.0,170.0,280.0


In [35]:
# 60. Reshape df_long to have a MultiIndex with 'Date' and 'City' and a single column for each 'Variable'. 
reshaped = df_long.pivot_table(index=['Date','City'], columns='Variable', values='Value')
reshaped

Unnamed: 0_level_0,Variable,Humidity,Temperature
Date,City,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-01-01,Los Angeles,60.0,
2023-01-01,New York,,15.0
2023-01-02,Los Angeles,65.0,
2023-01-02,New York,,17.0
2023-01-03,Los Angeles,62.0,
2023-01-03,New York,,16.0
