In [25]:
import pandas as pd
import numpy as np

# **常見的轉置應用是寬表格（Wide Format）與長表格（Long Format）之間的互相轉換**


寬表格是比較熟悉的資料框樣式，一列是獨立的觀測值，加入資訊是以增添欄位方式實踐，故得其名為寬表格；長表格是比較陌生的資料框樣式，具有以一欄 key 搭配一欄 value 來紀錄資料的項目與值，加入資訊是以增添列數方式實踐，故得其名為長表格

多數時候我們所使用的資料皆是寬表格的外觀，像是 NBA 球員的基本資料，一列是獨特的一名球員

In [31]:
player_profile = pd.read_csv("https://python4ds.s3-ap-northeast-1.amazonaws.com/player_profile.csv")
wide_format = player_profile[["temporaryDisplayName", "heightMeters", "weightKilograms"]]
wide_format.head()

Unnamed: 0,temporaryDisplayName,heightMeters,weightKilograms
0,"Adams, Steven",2.13,120.2
1,"Adebayo, Bam",2.08,115.7
2,"Adel, Deng",2.01,90.7
3,"Aldridge, LaMarcus",2.11,117.9
4,"Alexander, Kyle",2.11,99.8


In [34]:
long_format = pd.melt(wide_format, id_vars="temporaryDisplayName", value_vars=["heightMeters", "weightKilograms"], var_name="key", value_name="value")
long_format.head(10) #id_vars 球員名稱不動 以這個為主轉置

long_format.sort_values('temporaryDisplayName') #每一個球員現在有身高跟體重兩個rows

Unnamed: 0,temporaryDisplayName,key,value
0,"Adams, Steven",heightMeters,2.13
524,"Adams, Steven",weightKilograms,120.20
525,"Adebayo, Bam",weightKilograms,115.70
1,"Adebayo, Bam",heightMeters,2.08
2,"Adel, Deng",heightMeters,2.01
...,...,...,...
521,"Zeller, Cody",heightMeters,2.13
1046,"Zizic, Ante",weightKilograms,115.20
522,"Zizic, Ante",heightMeters,2.11
523,"Zubac, Ivica",heightMeters,2.16


# **將長表格的外觀轉換為寬表格，會應用到類似分組的操作，以球員姓名作為分組依據，將數值資料樞紐回兩個變數，使用資料框的 .pivot() 方法**

In [35]:
player_profile = pd.read_csv("https://python4ds.s3-ap-northeast-1.amazonaws.com/player_profile.csv")
wide_format = player_profile[["temporaryDisplayName", "heightMeters", "weightKilograms"]]
long_format = pd.melt(wide_format, id_vars="temporaryDisplayName", value_vars=["heightMeters", "weightKilograms"], var_name="key", value_name="value")
long_format.pivot(index="temporaryDisplayName", columns="key", values="value").head()

#index就是上面的id_vars

key,heightMeters,weightKilograms
temporaryDisplayName,Unnamed: 1_level_1,Unnamed: 2_level_1
"Adams, Steven",2.13,120.2
"Adebayo, Bam",2.08,115.7
"Adel, Deng",2.01,90.7
"Aldridge, LaMarcus",2.11,117.9
"Alexander, Kyle",2.11,99.8


# **最後稍微整理一下，利用 .reset_index() 以及刪除列索引的名稱，就能將樞紐後的表格回復成與原本一模一樣的寬表格**

In [37]:
# player_profile = pd.read_csv("https://python4ds.s3-ap-northeast-1.amazonaws.com/player_profile.csv")
# wide_format = player_profile[["temporaryDisplayName", "heightMeters", "weightKilograms"]]
long_format = pd.melt(wide_format, id_vars="temporaryDisplayName", value_vars=["heightMeters", "weightKilograms"], var_name="key", value_name="value")
wide_format = long_format.pivot(index="temporaryDisplayName", columns="key", values="value").reset_index()
wide_format = wide_format.rename_axis(None, axis=1)
wide_format.head()

Unnamed: 0,temporaryDisplayName,heightMeters,weightKilograms
0,"Adams, Steven",2.13,120.2
1,"Adebayo, Bam",2.08,115.7
2,"Adel, Deng",2.01,90.7
3,"Aldridge, LaMarcus",2.11,117.9
4,"Alexander, Kyle",2.11,99.8
