In [1]:
import pandas as pd

table = pd.DataFrame({
    "ID": [1, 2, 3, 4], 
    "Name":["A", "B", "C", "D"],
    "Cost": [10, 20, 30, 20]})

table


Unnamed: 0,ID,Name,Cost
0,1,A,10
1,2,B,20
2,3,C,30
3,4,D,20


python Dictionary 模式: `table名字["Column名字"]`

Dot (.) 模式: `table名字.Column名字`

In [4]:
table["ID"]
table.ID

0    1
1    2
2    3
3    4
Name: ID, dtype: int64

In [2]:
table[["ID", "Name"]]

Unnamed: 0,ID,Name
0,1,A
1,2,B
2,3,C
3,4,D


`.loc[row行數, "Column名字"]`

In [7]:
table.loc[2, "Name"]

'C'

選擇 連在一起的 column / row, (注意: [i:j] j 是包括的)

In [8]:
table.loc[1:3, "ID":"Name"]

Unnamed: 0,ID,Name
1,2,B
2,3,C
3,4,D


沒有連在一起的

In [3]:
table.loc[[1,3], ["Cost", "ID"]]

Unnamed: 0,Cost,ID
1,20,2
3,20,4


`.loc[row行數, column行數]`

`.iloc[rowI:rowJ, colI:colJ]`

In [10]:
table.iloc[1, 1]

'B'

In [11]:
table.iloc[1:3, 1:2]

Unnamed: 0,Name
1,B
2,C


`table名字[table名字.Column名字 > 值]`

`table名字[table名字["Column名字"] > 值]`

In [5]:
好東西 = table[table.Cost > 20] # 當然可以用 ==, !=, >, >=, <, <= 
好東西

Unnamed: 0,ID,Name,Cost
2,3,C,30


In [6]:
table.loc[table.Cost.isin([10, 20])]

Unnamed: 0,ID,Name,Cost
0,1,A,10
1,2,B,20
3,4,D,20


`Table名字.Function名字`

`Table名字.Column名字.Function名字`

`Table名字["Column名字"].Function名字`

In [None]:
table.unique()
table.value_counts().X
table.mean()
table.describe()
table.sort_values(by = ["Column名字"], inplace=True, ascending=[True])

# 把 Column / Row 刪掉
table.drop(["Column名字"], inplace=True, axis=1) # axis=1, 是 Column
table.drop(["Row名字"], inplace=True, axis=0)    # axis=0, 是 Row


table.fillna(0)
table.fillna({"Column名字": 5, "Column名字": 0.5})

table.dropna(axis=0) # 把所有 有 N/A 數據 全部都 刪了 (Row)

In [15]:
table.sort_values(by=["Cost", "Name"], inplace=False, ascending=[False, False])


Unnamed: 0,ID,Name,Cost
2,3,C,30
3,4,D,20
1,2,B,20
0,1,A,10


In [None]:
table.to_csv("path/to/file.csv")

pd.read_excel("path/to/file.xlsx", "Sheet1", startrow=0, startcol=0)

# 如果有某一個 列 是 0 1 2 3 4 ..., 你並不打算把它當成 column 你就加上 index_col=X
# 如果 你的 文件裡 第一行不要 就加上 header=0
pd.read_csv("path/to/file.csv", index_col=0, header=0)

In [16]:
t2 = pd.DataFrame({
    "ID": [1, 2, 3, 4], 
    "Name":["Mr. A", "Mrs. B", "Mr. C", "Mrs. D"],
    "Cost": [10, 20, 30, 20]})

t2

Unnamed: 0,ID,Name,Cost
0,1,Mr. A,10
1,2,Mrs. B,20
2,3,Mr. C,30
3,4,Mrs. D,20


`考試時 請全部加上 regex=False`

In [20]:
# t2.Name.str.contains("Mr\.") 
t2.Name.str.contains("Mr.", regex=False)

t2.Name.str.replace("Mr.", "XYZ", regex=False)

0     XYZ A
1    Mrs. B
2     XYZ C
3    Mrs. D
Name: Name, dtype: object

### Apply

`Table名字[["Column名字1", "Column名字"]].apply(Functio名字)`

In [21]:
def func(name):
    if "Mr." in name:
        return "Guy"
    else:
        return "Girlllllll"

t2.Name.apply(func)

0           Guy
1    Girlllllll
2           Guy
3    Girlllllll
Name: Name, dtype: object

In [23]:
def func2(num):
    return 'Even' if num % 2 == 0 else 'Odd'

t2.ID.apply(func2)
t2.Cost.apply(func2)

0    Even
1    Even
2    Even
3    Even
Name: Cost, dtype: object

In [26]:
def func3(row):
    id = row.ID
    name = row.Name
    # do something with the id and name
    return f'Hello my name is {name} and id is {id}'

t2["Message"] = t2.apply(func3, axis=1)
t2



Unnamed: 0,ID,Name,Cost,Message
0,1,Mr. A,10,Hello my name is Mr. A and id is 1
1,2,Mrs. B,20,Hello my name is Mrs. B and id is 2
2,3,Mr. C,30,Hello my name is Mr. C and id is 3
3,4,Mrs. D,20,Hello my name is Mrs. D and id is 4


In [27]:
t3 = pd.DataFrame({
    "ID": [1, 2, 3, 4], 
    "Name": ["A", "B", "C", "D"],
    "Gender": ["F", "M", "M", "F"],
    "Age": [21, 20, 22, 20]})

t3

Unnamed: 0,ID,Name,Gender,Age
0,1,A,F,21
1,2,B,M,20
2,3,C,M,22
3,4,D,F,20


In [28]:
t3.groupby("Gender").count()

Unnamed: 0_level_0,ID,Name,Age
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
F,2,2,2
M,2,2,2


In [39]:
t3.groupby("Age").count()

Unnamed: 0_level_0,ID,Name,Gender
Age,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
20,2,2,2
21,1,1,1
22,1,1,1


In [29]:
def find_oldest(row):
    return pd.Series({"MaxAge": row.Age.max()})

t3.groupby("Gender").apply(find_oldest)

Unnamed: 0_level_0,MaxAge
Gender,Unnamed: 1_level_1
F,21
M,22
