# Long-Wide Format
[towardsdatascience.com](https://towardsdatascience.com/long-and-wide-formats-in-data-explained-e48d7c9a06cb)

## Import Modules

In [1]:
import pandas as pd

## Prepare Data

In [38]:
# data in long format
data = {
    "name": ["Alice", "Bob", "Charlie", "Alice", "Bob", "Charlie"],
    "year": [2020, 2020, 2020, 2021, 2021, 2021],
    "height": [160, 170, 165, 162, 172, 167]
}

df_raw = pd.DataFrame(data_raw)
df_raw

Unnamed: 0,name,year,height
0,Alice,2020,160
1,Bob,2020,170
2,Charlie,2020,165
3,Alice,2021,162
4,Bob,2021,172
5,Charlie,2021,167


## Convert "raw/long" into "wide" format

In [41]:
df_long = df_raw.copy()
df_long_indexed = df_long.set_index(['name', 'year'])

# unstack() to convert the "year"-column into single columns per "year"-value
df_wide = df_long_indexed.unstack(level='year')

# rename column names to the "height_{year}" schema 
df_wide.columns = [f'height_{col}' for col in df_wide.columns.get_level_values(1)]

# reset the index to flatten the DataFrame
df_wide = df_wide.reset_index()

df_wide

Unnamed: 0,name,height_2020,height_2021
0,Alice,160,162
1,Bob,170,172
2,Charlie,165,167


Another way using a loop in Python:

In [21]:
# create an empty DataFrame with "name" as index
df_wide = pd.DataFrame({'name': df_raw['name'].unique()})

# loop over "year" and create a new column per year
for year in df_raw['year'].unique():
    # filter for specific "year"
    df_filtered = df_raw[df_raw['year'] == year][['name', 'height']]
    
    # adjust column names to the "height_{year}" schema
    df_filtered = df_filtered.rename(columns={'height': f'height_{year}'})
    
    # add the filtered colums to "df_wide" DataFrame
    df_wide = pd.merge(df_wide, df_filtered, on='name', how='left')

df_wide

Unnamed: 0,name,height_2020,height_2021
0,Alice,160,162
1,Bob,170,172
2,Charlie,165,167


## Convert "raw/long" into "wide" format (Pivot)

In [17]:
df_pivot = df_raw.pivot(index='name', columns='year', values='height')
df_pivot.columns = ['height_' + str(col) for col in df_pivot.columns]  # rename columns to get the "height_{year}" schema
df_pivot

Unnamed: 0_level_0,height_2020,height_2021
name,Unnamed: 1_level_1,Unnamed: 2_level_1
Alice,160,162
Bob,170,172
Charlie,165,167


In [35]:
df_flat_pivot = df_pivot.copy()
df_flat_pivot.columns = df_flat_pivot.columns.tolist()
df_flat_pivot

Unnamed: 0_level_0,height_2020,height_2021
name,Unnamed: 1_level_1,Unnamed: 2_level_1
Alice,160,162
Bob,170,172
Charlie,165,167


## Convert "wide" into "raw/long" format

In [28]:
df_long = pd.melt(df_wide, id_vars='name', var_name='year', value_name='height')
df_long['year'] = df_long['year'].str.extract(r'(\d{4})')
df_long

Unnamed: 0,name,year,height
0,Alice,2020,160
1,Bob,2020,170
2,Charlie,2020,165
3,Alice,2021,162
4,Bob,2021,172
5,Charlie,2021,167
