In [1]:
import pandas as pd
import numpy as np

# Series

In [2]:
s2 = pd.Series(np.arange(5),index=['a','b','c','d','e'])
s2

a    0
b    1
c    2
d    3
e    4
dtype: int32

In [3]:
s2.values

array([0, 1, 2, 3, 4])

In [4]:
s2.index

Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

In [5]:
s3 = pd.Series({"a":1,"b":2,"c":3})
s3

a    1
b    2
c    3
dtype: int64

In [6]:
s3 = pd.Series({"a":1,"b":2,"c":3},index=["a","b","c","d"])
s3

a    1.0
b    2.0
c    3.0
d    NaN
dtype: float64

In [7]:
s3 = pd.Series({"a":1,"b":2,"c":3},index=["a","b","c","d"]).shift(1)
s3

a    NaN
b    1.0
c    2.0
d    3.0
dtype: float64

In [8]:
s3.notnull()

a    False
b     True
c     True
d     True
dtype: bool

In [9]:
s3[~s3.isnull()]

b    1.0
c    2.0
d    3.0
dtype: float64

In [10]:
s3[1],s3["b"]

(1.0, 1.0)

In [11]:
s3[["a","b"]],s3[[1,2]]

(a    NaN
 b    1.0
 dtype: float64,
 b    1.0
 c    2.0
 dtype: float64)

In [12]:
s3.name = "letter"
s3.index.name="number"

# DataFrame

In [13]:
df = pd.DataFrame({
    "a":[1,2,3,4],
    "b":[2,3,4,5],
    "c":[np.nan,2,3,1],
    "d":0
})
df

Unnamed: 0,a,b,c,d
0,1,2,,0
1,2,3,2.0,0
2,3,4,3.0,0
3,4,5,1.0,0


In [14]:
df.values

array([[ 1.,  2., nan,  0.],
       [ 2.,  3.,  2.,  0.],
       [ 3.,  4.,  3.,  0.],
       [ 4.,  5.,  1.,  0.]])

In [15]:
df.to_numpy()

array([[ 1.,  2., nan,  0.],
       [ 2.,  3.,  2.,  0.],
       [ 3.,  4.,  3.,  0.],
       [ 4.,  5.,  1.,  0.]])

In [16]:
df.index

RangeIndex(start=0, stop=4, step=1)

In [17]:
df.index=["A","B","C","D"]

In [18]:
df.columns

Index(['a', 'b', 'c', 'd'], dtype='object')

In [19]:
df

Unnamed: 0,a,b,c,d
A,1,2,,0
B,2,3,2.0,0
C,3,4,3.0,0
D,4,5,1.0,0


In [20]:
df.name="number"
df.index.name="LETTER"
df.columns.name="letter"
df

letter,a,b,c,d
LETTER,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
A,1,2,,0
B,2,3,2.0,0
C,3,4,3.0,0
D,4,5,1.0,0


In [21]:
df = pd.DataFrame({
    "a":[1,2,3,4],
    "b":[2,3,4,5],
    "c":[np.nan,2,3,1],
},index=["A","B","C","D"],columns=["a","b","c","d"])
df


Unnamed: 0,a,b,c,d
A,1,2,,
B,2,3,2.0,
C,3,4,3.0,
D,4,5,1.0,


In [22]:
df = pd.DataFrame({
    "a":pd.Series([2]*len(df),index=["A","B","C","D"]),
    "b":[2,3,4,5],
    "c":[np.nan,2,3,1],
},index=["A","B","C","D"],columns=["a","b","c","d"])
df


Unnamed: 0,a,b,c,d
A,2,2,,
B,2,3,2.0,
C,2,4,3.0,
D,2,5,1.0,


In [23]:
df = pd.DataFrame({
    "a":pd.Series(np.random.randn(3)),
    "b":pd.Series(np.random.randn(2))
})
df

Unnamed: 0,a,b
0,-0.018228,-0.805749
1,-0.168204,-0.587687
2,0.863241,


In [24]:
df = pd.DataFrame(
{
    "a":{"A":1,"B":2},
    "b":{"A":2,"B":3,"C":3}
}
)
df

Unnamed: 0,a,b
A,1.0,2
B,2.0,3
C,,3


In [25]:
df.values

array([[ 1.,  2.],
       [ 2.,  3.],
       [nan,  3.]])

In [26]:
df1 = pd.DataFrame(df.values,index=df.index,columns=df.columns)
df

Unnamed: 0,a,b
A,1.0,2
B,2.0,3
C,,3


In [27]:
df = pd.DataFrame(
[
    {"A":1,"B":2},
    {"A":2,"B":3,"C":3}
]
)
df

Unnamed: 0,A,B,C
0,1,2,
1,2,3,3.0


In [28]:
df = df.T
df.columns=["a","b"]
df

Unnamed: 0,a,b
A,1.0,2.0
B,2.0,3.0
C,,3.0


In [29]:
df["c"]=[1,2,3]
df

Unnamed: 0,a,b,c
A,1.0,2.0,1
B,2.0,3.0,2
C,,3.0,3


In [30]:
df.loc["D",:] = [0,0,0]
df

Unnamed: 0,a,b,c
A,1.0,2.0,1.0
B,2.0,3.0,2.0
C,,3.0,3.0
D,0.0,0.0,0.0


In [31]:
df["A":"C"]

Unnamed: 0,a,b,c
A,1.0,2.0,1.0
B,2.0,3.0,2.0
C,,3.0,3.0


In [32]:
df["0":"3"]

Unnamed: 0,a,b,c


In [33]:
del(df["c"])


In [34]:
df

Unnamed: 0,a,b
A,1.0,2.0
B,2.0,3.0
C,,3.0
D,0.0,0.0


In [35]:
df.drop("D",axis=0)

Unnamed: 0,a,b
A,1.0,2.0
B,2.0,3.0
C,,3.0


In [36]:
df.drop("C",axis=0)

Unnamed: 0,a,b
A,1.0,2.0
B,2.0,3.0
D,0.0,0.0


In [37]:
df.drop("a",axis=1)

Unnamed: 0,b
A,2.0
B,3.0
C,3.0
D,0.0


# 索引操作

In [38]:
import numpy as np
import pandas as pd

In [39]:
ps1 = pd.Series(range(5),index=['a','b','c','d','e'])
print(type(ps1.index))
ps1

<class 'pandas.core.indexes.base.Index'>


a    0
b    1
c    2
d    3
e    4
dtype: int64

In [40]:
pd1 = pd.DataFrame(np.arange(9).reshape(3,3),index = ['a','b','c'],columns = ['A','B','C'])
print(type(pd1.index))
pd1

<class 'pandas.core.indexes.base.Index'>


Unnamed: 0,A,B,C
a,0,1,2
b,3,4,5
c,6,7,8


In [41]:
ps1.index[0]

'a'

In [42]:
pd1.columns[0]

'A'

In [43]:
ps1.reindex(['b','a','d','c','e','f'])

b    1.0
a    0.0
d    3.0
c    2.0
e    4.0
f    NaN
dtype: float64

In [44]:
pd1.reindex(["a","c","c"])

Unnamed: 0,A,B,C
a,0,1,2
c,6,7,8
c,6,7,8


In [45]:
pd1.reindex(columns=['B','C','A'])

Unnamed: 0,B,C,A
a,1,2,0
b,4,5,3
c,7,8,6


In [46]:
ps1["g"] = 10
ps1

a     0
b     1
c     2
d     3
e     4
g    10
dtype: int64

In [47]:
s1 = pd.Series({"f":999})
s1

f    999
dtype: int64

In [48]:
ps1.append(s1)

a      0
b      1
c      2
d      3
e      4
g     10
f    999
dtype: int64

In [49]:
pd1

Unnamed: 0,A,B,C
a,0,1,2
b,3,4,5
c,6,7,8


In [50]:
pd1["D"] = [10,11,12]
pd1

Unnamed: 0,A,B,C,D
a,0,1,2,10
b,3,4,5,11
c,6,7,8,12


In [51]:
pd1.loc["d",:] = [4]*4
pd1

Unnamed: 0,A,B,C,D
a,0.0,1.0,2.0,10.0
b,3.0,4.0,5.0,11.0
c,6.0,7.0,8.0,12.0
d,4.0,4.0,4.0,4.0


In [52]:
pd1_cp = pd1.copy()
pd1_cp.insert(0,'E',np.random.rand(4))
pd1_cp

Unnamed: 0,E,A,B,C,D
a,0.630866,0.0,1.0,2.0,10.0
b,0.043775,3.0,4.0,5.0,11.0
c,0.828179,6.0,7.0,8.0,12.0
d,0.4376,4.0,4.0,4.0,4.0


In [61]:
pd1.append({"E":1,"A":1,"B":1,"C":1,"D":1},ignore_index=True)

Unnamed: 0,A,B,C,D,E
0,0.0,1.0,2.0,10.0,
1,3.0,4.0,5.0,11.0,
2,6.0,7.0,8.0,12.0,
3,4.0,4.0,4.0,4.0,
4,1.0,1.0,1.0,1.0,1.0


In [60]:
pd1

Unnamed: 0,A,B,C,D
a,0.0,1.0,2.0,10.0
b,3.0,4.0,5.0,11.0
c,6.0,7.0,8.0,12.0
d,4.0,4.0,4.0,4.0


In [71]:
ps1=ps1.append(pd.Series({"g":100}))
ps1

a      0
b      1
c      2
d      3
e      4
g    100
g    100
dtype: int64

In [72]:
del ps1["g"]

In [73]:
ps1

a    0
b    1
c    2
d    3
e    4
dtype: int64

In [75]:
ps1["g"] = 100
ps1

a      0
b      1
c      2
d      3
e      4
g    100
dtype: int64

In [76]:
del(ps1["g"])

In [77]:
ps1

a    0
b    1
c    2
d    3
e    4
dtype: int64

In [78]:
pd1

Unnamed: 0,A,B,C,D
a,0.0,1.0,2.0,10.0
b,3.0,4.0,5.0,11.0
c,6.0,7.0,8.0,12.0
d,4.0,4.0,4.0,4.0


In [79]:
del pd1["D"]

In [80]:
pd1

Unnamed: 0,A,B,C
a,0.0,1.0,2.0
b,3.0,4.0,5.0
c,6.0,7.0,8.0
d,4.0,4.0,4.0


In [85]:
ps1 = ps1.drop(["a","b"])
ps1

c    2
d    3
e    4
dtype: int64

In [86]:
pd1

Unnamed: 0,A,B,C
a,0.0,1.0,2.0
b,3.0,4.0,5.0
c,6.0,7.0,8.0
d,4.0,4.0,4.0


In [88]:
pd1.drop("a")

Unnamed: 0,A,B,C
b,3.0,4.0,5.0
c,6.0,7.0,8.0
d,4.0,4.0,4.0


In [89]:
ps1

c    2
d    3
e    4
dtype: int64

In [90]:
ps1[0] = 1

In [91]:
ps1.iloc[0] = 2

In [92]:
ps1

c    2
d    3
e    4
dtype: int64

In [95]:
ps1.loc["d"] = 2

In [96]:
ps1

c    2
d    2
e    4
0    2
a    2
dtype: int64

In [97]:
pd1

Unnamed: 0,A,B,C
a,0.0,1.0,2.0
b,3.0,4.0,5.0
c,6.0,7.0,8.0
d,4.0,4.0,4.0


In [103]:
pd1.C=3
pd1

Unnamed: 0,A,B,C
a,0.0,1.0,3
b,3.0,4.0,3
c,6.0,7.0,3
d,4.0,4.0,3


In [105]:
ps1

c    2
d    2
e    4
0    2
a    2
dtype: int64

In [106]:
ps1[["d","e"]]

d    2
e    4
dtype: int64

In [107]:
pd1

Unnamed: 0,A,B,C
a,0.0,1.0,3
b,3.0,4.0,3
c,6.0,7.0,3
d,4.0,4.0,3


In [108]:
pd1["C"]["a"]

3

In [109]:
pd1.loc["a"]

A    0.0
B    1.0
C    3.0
Name: a, dtype: float64

In [113]:
pd1.loc[["a","d"],["B","C"]]

Unnamed: 0,B,C
a,1.0,3
d,4.0,3
