In [1]:
import pandas as pd
import numpy as np

### 1-1. Pandas series

In [2]:
data = pd.Series([1, 2, 3, 4, 5])

data

0    1
1    2
2    3
3    4
4    5
dtype: int64

### 1-2. index

In [3]:
# 修改行名稱(column)

data = pd.Series([1, 2, 3, 4, 5])
print(data)

data = pd.Series([1, 2, 3, 4, 5], index=['a,', 'b', 'c', 'd', 'e'])
print(data)

0    1
1    2
2    3
3    4
4    5
dtype: int64
a,    1
b     2
c     3
d     4
e     5
dtype: int64


In [4]:
data.values

array([1, 2, 3, 4, 5], dtype=int64)

In [5]:
data.index

Index(['a,', 'b', 'c', 'd', 'e'], dtype='object')

### 1-3. dataframe

In [6]:
# 字典
data = {'col1': [1, 2], 'col2': [3, 4]}

pd.DataFrame(data)

Unnamed: 0,col1,col2
0,1,3
1,2,4


In [7]:
pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}, index=['a', 'b'])

Unnamed: 0,col1,col2
a,1,3
b,2,4


### 題外話: 重新命名行/列

In [8]:
data = np.arange(12).reshape(4, 3)
data = pd.DataFrame(data)
data

Unnamed: 0,0,1,2
0,0,1,2
1,3,4,5
2,6,7,8
3,9,10,11


In [9]:
data.rename(index={0:'a', 1:'b', 2:'c', 3:'d'}, inplace=True)
data

Unnamed: 0,0,1,2
a,0,1,2
b,3,4,5
c,6,7,8
d,9,10,11


In [10]:
data.rename(columns={0:'col1', 1:'col2', 2:'col3', 3:'col4'}, inplace=True)
data

Unnamed: 0,col1,col2,col3
a,0,1,2
b,3,4,5
c,6,7,8
d,9,10,11


### 1-4. 轉置

In [11]:
data = np.arange(12).reshape(4, 3)
data = pd.DataFrame(data, index=['a', 'b', 'c', 'd'], columns=['col1', 'col2', 'col3'])
data

Unnamed: 0,col1,col2,col3
a,0,1,2
b,3,4,5
c,6,7,8
d,9,10,11


In [12]:
data.T

Unnamed: 0,a,b,c,d
col1,0,3,6,9
col2,1,4,7,10
col3,2,5,8,11


### 1-5. 特定行/列

In [13]:
data = np.arange(12).reshape(4, 3)
data = pd.DataFrame(data, index=['a', 'b', 'c', 'd'], columns=['col1', 'col2', 'col3'])
data

Unnamed: 0,col1,col2,col3
a,0,1,2
b,3,4,5
c,6,7,8
d,9,10,11


In [14]:
data[['col1']]

Unnamed: 0,col1
a,0
b,3
c,6
d,9


In [15]:
data.col1

a    0
b    3
c    6
d    9
Name: col1, dtype: int32

In [16]:
data[['col1', 'col3']]

Unnamed: 0,col1,col3
a,0,2
b,3,5
c,6,8
d,9,11


### 題外話. iloc

In [17]:
data = np.arange(12).reshape(4, 3)
data = pd.DataFrame(data, index=['a', 'b', 'c', 'd'], columns=['col1', 'col2', 'col3'])
data

Unnamed: 0,col1,col2,col3
a,0,1,2
b,3,4,5
c,6,7,8
d,9,10,11


In [18]:
data.iloc[1, 2]

5

In [19]:
data['col1']['a']

0

### 1-5. 過濾器

In [20]:
data = np.arange(12).reshape(4, 3)
data = pd.DataFrame(data, index=['a', 'b', 'c', 'd'], columns=['col1', 'col2', 'col3'])
data

Unnamed: 0,col1,col2,col3
a,0,1,2
b,3,4,5
c,6,7,8
d,9,10,11


In [21]:
data[data['col1']>3]

Unnamed: 0,col1,col2,col3
c,6,7,8
d,9,10,11


In [22]:
data['col1']>3

a    False
b    False
c     True
d     True
Name: col1, dtype: bool

In [23]:
data>3

Unnamed: 0,col1,col2,col3
a,False,False,False
b,False,True,True
c,True,True,True
d,True,True,True


In [24]:
data['col1']['a'] = np.nan
print(data.isnull())
print(data.isnull().sum())

    col1   col2   col3
a   True  False  False
b  False  False  False
c  False  False  False
d  False  False  False
col1    1
col2    0
col3    0
dtype: int64


### 1-6. 刪除行/列

In [25]:
data = np.arange(12).reshape(4, 3)
data = pd.DataFrame(data, index=['a', 'b', 'c', 'd'], columns=['col1', 'col2', 'col3'])
data

Unnamed: 0,col1,col2,col3
a,0,1,2
b,3,4,5
c,6,7,8
d,9,10,11


In [26]:
data.drop(columns=['col1', 'col3'], axis=1, inplace=True)
data

Unnamed: 0,col2
a,1
b,4
c,7
d,10


In [27]:
data.drop(['a', 'c'], inplace=True)
data

Unnamed: 0,col2
b,4
d,10


### 1-7. 合併

In [28]:
data1 = np.arange(12).reshape(4, 3)
data1 = pd.DataFrame(data1)
data1

Unnamed: 0,0,1,2
0,0,1,2
1,3,4,5
2,6,7,8
3,9,10,11


In [29]:
data2 = np.arange(12).reshape(4, 3)
data2 = pd.DataFrame(data2+12)
data2

Unnamed: 0,0,1,2
0,12,13,14
1,15,16,17
2,18,19,20
3,21,22,23


In [30]:
pd.concat([data1, data2], axis=1)

Unnamed: 0,0,1,2,0.1,1.1,2.1
0,0,1,2,12,13,14
1,3,4,5,15,16,17
2,6,7,8,18,19,20
3,9,10,11,21,22,23


In [31]:
pd.concat([data1, data2], axis=0, ignore_index=True)

Unnamed: 0,0,1,2
0,0,1,2
1,3,4,5
2,6,7,8
3,9,10,11
4,12,13,14
5,15,16,17
6,18,19,20
7,21,22,23


### 1-8. 統計

In [32]:
data = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
                    columns=['a', 'b', 'c'])
data

Unnamed: 0,a,b,c
0,1,2,3
1,4,5,6
2,7,8,9


In [33]:
data['a'].max()

7

In [34]:
data.max()

a    7
b    8
c    9
dtype: int32

### 1-9. 排序

In [35]:
data = pd.DataFrame(np.random.rand(12).reshape(4, 3), columns=['col1', 'col2', 'col3'])
data

Unnamed: 0,col1,col2,col3
0,0.131552,0.403078,0.524958
1,0.362352,0.36142,0.38804
2,0.531777,0.754734,0.45994
3,0.627676,0.262086,0.323666


In [36]:
data.col1.sort_values()

0    0.131552
1    0.362352
2    0.531777
3    0.627676
Name: col1, dtype: float64

In [37]:
# 低到高
data['col1'].sort_values()

0    0.131552
1    0.362352
2    0.531777
3    0.627676
Name: col1, dtype: float64

In [38]:
# 高到低
data['col1'].sort_values(ascending=False)

3    0.627676
2    0.531777
1    0.362352
0    0.131552
Name: col1, dtype: float64