## Add/Remove Rows and Columns From DataFrames

In [1]:
import pandas as pd
import numpy as np

以下為 Python dictionary 各種寫法

In [2]:
person = {"first": "Corey",
          "last": "Schafer",
          "email": "CoreySchafer@gmail.com"}

In [3]:
people = {"first": ["Corey"],
          "last": ["Schafer"],
          "email": ["CoreySchafer@gmail.com"]}

In [4]:
people = {"first": ["Corey", "Jane", "John"],
          "last": ["Schafer", "Doe", "Doe"],
          "email": ["CoreySchafer@gmail.com", "JaneDoe@gmail.com", "JohnDoe@gmail.com"]}

In [5]:
df = pd.DataFrame(people) # pandas 可以直接讀取 dictionary 格式

In [6]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreySchafer@gmail.com
1,Jane,Doe,JaneDoe@gmail.com
2,John,Doe,JohnDoe@gmail.com


In [8]:
df['first'] + ' ' + df['last'] # 不同列索引可以進行對應資料型態的處理後產生對應結果

0    Corey Schafer
1         Jane Doe
2         John Doe
dtype: object

In [9]:
df['full_name'] = df['first'] + ' ' + df['last'] # 直接生成新的列索引儲存上面結果
df

Unnamed: 0,first,last,email,full_name
0,Corey,Schafer,CoreySchafer@gmail.com,Corey Schafer
1,Jane,Doe,JaneDoe@gmail.com,Jane Doe
2,John,Doe,JohnDoe@gmail.com,John Doe


In [10]:
df.drop(columns=['first', 'last']) # 與其他 pandas 函式相似 需再加入 inplace=True 才會在原資料生效

Unnamed: 0,email,full_name
0,CoreySchafer@gmail.com,Corey Schafer
1,JaneDoe@gmail.com,Jane Doe
2,JohnDoe@gmail.com,John Doe


In [11]:
df # 原資料不變

Unnamed: 0,first,last,email,full_name
0,Corey,Schafer,CoreySchafer@gmail.com,Corey Schafer
1,Jane,Doe,JaneDoe@gmail.com,Jane Doe
2,John,Doe,JohnDoe@gmail.com,John Doe


In [12]:
df.drop(columns=['first', 'last'], inplace=True) # 承上述加入 inplace=True
df

Unnamed: 0,email,full_name
0,CoreySchafer@gmail.com,Corey Schafer
1,JaneDoe@gmail.com,Jane Doe
2,JohnDoe@gmail.com,John Doe


In [13]:
df_copy = df.copy() # 複製一個和 origin dataframe 相同的 dataframe

In [14]:
df_copy['full_name'].str.split(' ') # split 可以將該列索引的資料有找尋到該條件的項目作字串切割 並存成 list 的單列資料

0    [Corey, Schafer]
1         [Jane, Doe]
2         [John, Doe]
Name: full_name, dtype: object

In [15]:
df_copy['full_name'] = df['full_name'].str.split(' ') # 將結果存回原資料可發現切割結果如下
df_copy

Unnamed: 0,email,full_name
0,CoreySchafer@gmail.com,"[Corey, Schafer]"
1,JaneDoe@gmail.com,"[Jane, Doe]"
2,JohnDoe@gmail.com,"[John, Doe]"


In [20]:
df_copy['full_name'].str.split(' ') # 若讓 split 去找找不到的切割條件會以 NaN 回傳

0   NaN
1   NaN
2   NaN
Name: full_name, dtype: float64

In [21]:
df['full_name'].str.split(' ', expand=True) # expand=True 可以將切割後的結果存成 雙列結果

Unnamed: 0,0,1
0,Corey,Schafer
1,Jane,Doe
2,John,Doe


In [22]:
df[['first', 'last']] = df['full_name'].str.split(' ', expand=True) # 因為有雙列結果 存回原資料時也要準備兩個列索引接收資料
df

Unnamed: 0,email,full_name,first,last
0,CoreySchafer@gmail.com,Corey Schafer,Corey,Schafer
1,JaneDoe@gmail.com,Jane Doe,Jane,Doe
2,JohnDoe@gmail.com,John Doe,John,Doe


In [23]:
df.append({'first':'Tony'}) # dictionary 不能直接加入現有的 pandas dataframe 若要加入需 ignore_index=True

TypeError: Can only append a dict if ignore_index=True

In [24]:
df.append({'first':'Tony'}, ignore_index=True) # 承上述加入 ignore_index=True

Unnamed: 0,email,full_name,first,last
0,CoreySchafer@gmail.com,Corey Schafer,Corey,Schafer
1,JaneDoe@gmail.com,Jane Doe,Jane,Doe
2,JohnDoe@gmail.com,John Doe,John,Doe
3,,,Tony,


In [25]:
df = df.append({'first':'Tony'}, ignore_index=True) # 存回原資料
df

Unnamed: 0,email,full_name,first,last
0,CoreySchafer@gmail.com,Corey Schafer,Corey,Schafer
1,JaneDoe@gmail.com,Jane Doe,Jane,Doe
2,JohnDoe@gmail.com,John Doe,John,Doe
3,,,Tony,


In [26]:
people = {"first": ["Tony", "Steve"],
          "last": ["Stark", "Roger"],
          "email": ["Ironman@avenge.com", "Cap@avenge.com"]}
df2 = pd.DataFrame(people)

In [27]:
df2

Unnamed: 0,first,last,email
0,Tony,Stark,Ironman@avenge.com
1,Steve,Roger,Cap@avenge.com


In [28]:
df.append(df2) # 將 df2 資料直接加入 df 若沒有 ignore_index=True 則行索引會如下 (此結果未存回原資料)

Unnamed: 0,email,full_name,first,last
0,CoreySchafer@gmail.com,Corey Schafer,Corey,Schafer
1,JaneDoe@gmail.com,Jane Doe,Jane,Doe
2,JohnDoe@gmail.com,John Doe,John,Doe
3,,,Tony,
0,Ironman@avenge.com,,Tony,Stark
1,Cap@avenge.com,,Steve,Roger


In [29]:
df.append(df2, ignore_index=True) # 承上述加入 ignore_index=True 結果如下 (此結果未存回原資料)

Unnamed: 0,email,full_name,first,last
0,CoreySchafer@gmail.com,Corey Schafer,Corey,Schafer
1,JaneDoe@gmail.com,Jane Doe,Jane,Doe
2,JohnDoe@gmail.com,John Doe,John,Doe
3,,,Tony,
4,Ironman@avenge.com,,Tony,Stark
5,Cap@avenge.com,,Steve,Roger


In [30]:
df = df.append(df2, ignore_index=True) # 存回原資料 這裡值得一提原有資料列索引 first 中已經有 Tony 不會被後來加入的資料覆蓋
df

Unnamed: 0,email,full_name,first,last
0,CoreySchafer@gmail.com,Corey Schafer,Corey,Schafer
1,JaneDoe@gmail.com,Jane Doe,Jane,Doe
2,JohnDoe@gmail.com,John Doe,John,Doe
3,,,Tony,
4,Ironman@avenge.com,,Tony,Stark
5,Cap@avenge.com,,Steve,Roger


In [32]:
df.drop(index = 3) # 刪除第三行索引的所有資料 (此結果未存回原資料)

Unnamed: 0,email,full_name,first,last
0,CoreySchafer@gmail.com,Corey Schafer,Corey,Schafer
1,JaneDoe@gmail.com,Jane Doe,Jane,Doe
2,JohnDoe@gmail.com,John Doe,John,Doe
4,Ironman@avenge.com,,Tony,Stark
5,Cap@avenge.com,,Steve,Roger


In [33]:
df.drop(3) # 效果同上但是 index 上面寫法較優 (此結果未存回原資料)

Unnamed: 0,email,full_name,first,last
0,CoreySchafer@gmail.com,Corey Schafer,Corey,Schafer
1,JaneDoe@gmail.com,Jane Doe,Jane,Doe
2,JohnDoe@gmail.com,John Doe,John,Doe
4,Ironman@avenge.com,,Tony,Stark
5,Cap@avenge.com,,Steve,Roger


In [34]:
df = df.drop(index = 3) # 存回原資料
df

Unnamed: 0,email,full_name,first,last
0,CoreySchafer@gmail.com,Corey Schafer,Corey,Schafer
1,JaneDoe@gmail.com,Jane Doe,Jane,Doe
2,JohnDoe@gmail.com,John Doe,John,Doe
4,Ironman@avenge.com,,Tony,Stark
5,Cap@avenge.com,,Steve,Roger


In [37]:
filt = df['last'] == 'Doe' # 找尋列索引 last = Doe 的所有行
print(df[filt].index)
df.drop(index = df[filt].index) # 將過濾器找到的結果轉換成該位置的行索引回傳 便可 drop last = Doe 的所有行 (此結果未存回原資料)

Int64Index([1, 2], dtype='int64')


Unnamed: 0,email,full_name,first,last
0,CoreySchafer@gmail.com,Corey Schafer,Corey,Schafer
4,Ironman@avenge.com,,Tony,Stark
5,Cap@avenge.com,,Steve,Roger


In [38]:
df = df.drop(index = df[filt].index)
df

Unnamed: 0,email,full_name,first,last
0,CoreySchafer@gmail.com,Corey Schafer,Corey,Schafer
4,Ironman@avenge.com,,Tony,Stark
5,Cap@avenge.com,,Steve,Roger


In [39]:
df.drop(columns = ['email', 'full_name']) # drop 列的寫法

Unnamed: 0,first,last
0,Corey,Schafer
4,Tony,Stark
5,Steve,Roger
