# pandas 入门

In [6]:
import numpy as np
import pandas as pd

In [7]:
pd.__version__

'1.3.0'

# 根据dict创建df

In [11]:
data = {
    'animal':
    ['cat', 'cat', 'snake', 'dog', 'dog', 'cat', 'snake', 'cat', 'dog', 'dog'],
    'age': [2.5, 3, 0.5, np.nan, 5, 2, 4.5, np.nan, 7, 3],
    'visits': [1, 3, 2, 3, 2, 3, 1, 1, 2, 1],
    'priority':
    ['yes', 'yes', 'no', 'yes', 'no', 'no', 'no', 'yes', 'no', 'no']
}
labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']
df = pd.DataFrame(data, index=labels)

## 显示前三行

In [12]:
df.head(3)

Unnamed: 0,animal,age,visits,priority
a,cat,2.5,1,yes
b,cat,3.0,3,yes
c,snake,0.5,2,no


## 显示前三行

In [10]:
df.iloc[:3]

Unnamed: 0,animal,age,visits,priority
a,cat,2.5,1,yes
b,cat,3.0,3,yes
c,snake,0.5,2,no


# 根据np.array创建df

In [13]:
df2 = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
                   columns=['a', 'b', 'c'])
df2

Unnamed: 0,a,b,c
0,1,2,3
1,4,5,6
2,7,8,9


In [8]:
# dataframe复制
df3 = df2[["a", "b", "c"]].copy()
df3

Unnamed: 0,a,b,c
0,1,2,3
1,4,5,6
2,7,8,9


# 通过Series创建df

In [23]:
data = {
    'animal':
    ['cat', 'cat', 'snake', 'dog', 'dog', 'cat', 'snake', 'cat', 'dog', 'dog'],
    'age': [2.5, 3, 0.5, np.nan, 5, 2, 4.5, np.nan, 7, 3],
    'visits': [1, 3, 2, 3, 2, 3, 1, 1, 2, 1],
    'priority':
    ['yes', 'yes', 'no', 'yes', 'no', 'no', 'no', 'yes', 'no', 'no']
}

In [24]:
s_1 = pd.Series(data['animal'])
s_2 = pd.Series(data['age'])
s_3 = pd.Series(data['visits'])
s_4 = pd.Series(data['priority'])

pd_2 = pd.DataFrame([s_1, s_2, s_3, s_4])

pd_2

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,cat,cat,snake,dog,dog,cat,snake,cat,dog,dog
1,2.5,3.0,0.5,,5.0,2.0,4.5,,7.0,3.0
2,1,3,2,3,2,3,1,1,2,1
3,yes,yes,no,yes,no,no,no,yes,no,no


# 从nparray创建

In [35]:
d1 = np.arange(12).reshape(3, 4)
d1

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [36]:
d2 = np.ones((3, 4))
d2

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.]])

# concat

In [37]:
df1 = pd.DataFrame(d1,
                   columns=['A', 'B', 'C', 'D'],
                   index=[0, 1, 2])
df2 = pd.DataFrame(d2,
                   columns=['B', 'C', 'D', 'E'],
                   index=[1, 2, 3])

In [32]:
pd.concat([df1, df2], join='outer', ignore_index=True)

Unnamed: 0,A,B,C,D,E
0,0.0,1.0,2.0,3.0,
1,4.0,5.0,6.0,7.0,
2,8.0,9.0,10.0,11.0,
3,,1.0,1.0,1.0,1.0
4,,1.0,1.0,1.0,1.0
5,,1.0,1.0,1.0,1.0


In [38]:
pd.concat([df1, df2], axis=1)

Unnamed: 0,A,B,C,D,B.1,C.1,D.1,E
0,0.0,1.0,2.0,3.0,,,,
1,4.0,5.0,6.0,7.0,1.0,1.0,1.0,1.0
2,8.0,9.0,10.0,11.0,1.0,1.0,1.0,1.0
3,,,,,1.0,1.0,1.0,1.0


# 追加

In [40]:
df1.append([df2], ignore_index=True)

Unnamed: 0,A,B,C,D,E
0,0.0,1.0,2.0,3.0,
1,4.0,5.0,6.0,7.0,
2,8.0,9.0,10.0,11.0,
3,,1.0,1.0,1.0,1.0
4,,1.0,1.0,1.0,1.0
5,,1.0,1.0,1.0,1.0


In [41]:
df = pd.DataFrame([[1, 2], [3, 4]])
df = df.append([[1, 2]])
print(df)

   0  1
0  1  2
1  3  4
0  1  2


In [42]:
df = pd.DataFrame([[1, 2], [3, 4]], columns=list('AB'))
df2 = pd.DataFrame([[5, 6], [7, 8]], columns=list('AB'))
df = df.append(df2)
print(df)

   A  B
0  1  2
1  3  4
0  5  6
1  7  8


In [44]:
caller = pd.DataFrame({
    'key': ['A0', 'A1', 'A2', 'A3', 'A4', 'A5'],
    'B': ['B0', 'B1', 'B2', 'B3', 'B4', 'B5']
})
other = pd.DataFrame({'key': ['A0', 'A1', 'A2'], 'C': ['C0', 'C1', 'C2']})
caller.join(other, lsuffix='_caller', rsuffix='_other', how='inner')

Unnamed: 0,key_caller,B,key_other,C
0,A0,B0,A0,C0
1,A1,B1,A1,C1
2,A2,B2,A2,C2


In [45]:
df = pd.merge(caller, other, on=['key'], how='inner')
df

Unnamed: 0,key,B,C
0,A0,B0,C0
1,A1,B1,C1
2,A2,B2,C2
