# Series
- 带有标签的一维数组，可以保存任何数据类型（整数，字符串，浮点数，python对象等），轴标签统称为索引

In [1]:
import numpy as np
import pandas as pd

s = pd.Series(np.random.rand(5))
print(s)
print(type(s))

# .index查看series索引值，类型为rangeindex
# .values查看serirs值，类型是ndarray
print(s.index,type(s.index))
print(s.values,type(s.values))

0    0.862635
1    0.313600
2    0.579789
3    0.712433
4    0.312322
dtype: float64
<class 'pandas.core.series.Series'>
RangeIndex(start=0, stop=5, step=1) <class 'pandas.core.indexes.range.RangeIndex'>
[0.8626352  0.31360002 0.57978888 0.7124334  0.3123223 ] <class 'numpy.ndarray'>


# Series创建方法

In [2]:
# 1.通过字典创建，字典的key就是index，values就是values
dic = {'a':1,'b':2,'c':3,'4':4,'5':5}
s = pd.Series(dic)
print(s)

a    1
b    2
c    3
4    4
5    5
dtype: int64


In [4]:
# 方法2：由数组创建(一维数组)
# 默认index是从0开始，步长为1的数字
arr = np.random.randn(5)
s = pd.Series(arr)
print(arr)
print(s)

# 设置index和dtype
s = pd.Series(arr,index=['a','b','c','d','e'],dtype=np.object)
print(s)

[-2.13484296 -0.74861614 -0.79320521  0.25209303 -0.60185881]
0   -2.134843
1   -0.748616
2   -0.793205
3    0.252093
4   -0.601859
dtype: float64
a    -2.13484
b   -0.748616
c   -0.793205
d    0.252093
e   -0.601859
dtype: object


In [5]:
# 方法3：由标量创建
# 如果data是标量，则必须提供索引，该值会重复，来匹配索引的长度
s = pd.Series(10,index=range(4))
print(s)

0    10
1    10
2    10
3    10
dtype: int64


In [6]:
# Series 名称属性：name
# name为Series的一个参数，创建一个数组的名称
# .name方法：输出数组的名称，输出格式为str，如果没有定义输出名称，输出为None
s1 = pd.Series(np.random.randn(5))
print(s1)
print('-'*10)
s2 = pd.Series(np.random.randn(5),name='test')
print(s2)
print(s1.name,s2.name,type(s2.name))

# .rename()重命名一个数组的名称，并且新指向一个数组，原数组不变
s3 = s2.rename('hehehe')
print(s3)
print(s3.name,s2.name)

0   -0.312242
1   -0.878823
2    1.251027
3   -0.603434
4   -0.239842
dtype: float64
----------
0   -0.577718
1   -1.400919
2   -0.166261
3   -0.274031
4   -0.639755
Name: test, dtype: float64
None test <class 'str'>
0   -0.577718
1   -1.400919
2   -0.166261
3   -0.274031
4   -0.639755
Name: hehehe, dtype: float64
hehehe test


In [7]:
# Series索引：位置下标，标签索引，切片索引，布尔型索引
# 下标索引
s = pd.Series(np.random.rand(5))
print(s)
print(s[0],type(s[0]),s[0].dtype)
print(float(s[0]),type(float(s[0])))

# 标签索引
s = pd.Series(np.random.rand(5),index=['a','b','c','d','e'])
print(s)
print(s['a'],type(s['a']),s['a'].dtype)

# 多个标签
sci = s[['a','b','c']]
print(sci,type(sci))

0    0.983443
1    0.736079
2    0.933357
3    0.893932
4    0.025307
dtype: float64
0.9834433471356109 <class 'numpy.float64'> float64
0.9834433471356109 <class 'float'>
a    0.130533
b    0.064540
c    0.527872
d    0.869899
e    0.797566
dtype: float64
0.13053266270991903 <class 'numpy.float64'> float64
a    0.130533
b    0.064540
c    0.527872
dtype: float64 <class 'pandas.core.series.Series'>


In [8]:
# 切片索引
s1 = pd.Series(np.random.rand(5))
s2 = pd.Series(np.random.rand(5),index=['a','b','c','d','e'])
print(s1[1:4],s1[4])
print(s2['a':'c'],s2['c'])
print(s2[0:3],s2[3])
print('-'*10)

print(s2[:-1])
print(s2[::2])

1    0.481887
2    0.613715
3    0.877353
dtype: float64 0.761392416790313
a    0.647902
b    0.734657
c    0.456358
dtype: float64 0.4563575526368312
a    0.647902
b    0.734657
c    0.456358
dtype: float64 0.4348206606715873
----------
a    0.647902
b    0.734657
c    0.456358
d    0.434821
dtype: float64
a    0.647902
c    0.456358
e    0.347548
dtype: float64


In [9]:
# 布尔型索引
s = pd.Series(np.random.rand(3)*100)
s[4] = None
print(s)
bs1 = s > 50
bs2 = s.isnull() # .isnull - 判断是否为空值（None代表空值，NaN代表有问题的数值，两个都会识别为空值）
bs3 = s.notnull() # .notnull 
print(bs1,type(bs1),bs1.dtype)
print(bs2,type(bs2),bs2.dtype)
print(bs3,type(bs3),bs3.dtype)
print('-'*10)

# 布尔型索引方法
print(s[s > 50])
print(s[bs3])

0    27.8466
1    88.0153
2    92.8496
4       None
dtype: object
0    False
1     True
2     True
4    False
dtype: bool <class 'pandas.core.series.Series'> bool
0    False
1    False
2    False
4     True
dtype: bool <class 'pandas.core.series.Series'> bool
0     True
1     True
2     True
4    False
dtype: bool <class 'pandas.core.series.Series'> bool
----------
1    88.0153
2    92.8496
dtype: object
0    27.8466
1    88.0153
2    92.8496
dtype: object


# Series基本技巧

In [10]:
# 数据查看
# .head()查看头部数据
# .tail()查看尾部数据
s = pd.Series(np.random.rand(50))
print(s.head(10))
print(s.tail())

0    0.970346
1    0.845211
2    0.935872
3    0.958179
4    0.524690
5    0.877036
6    0.140658
7    0.564132
8    0.091039
9    0.254349
dtype: float64
45    0.911409
46    0.023389
47    0.930987
48    0.158886
49    0.616344
dtype: float64


In [None]:
# 重新索引reindex
# .reindex将会根据索引重新排序，如果当前索引不存在，则引入缺失值
s = pd.Series(np.random.rand(3),index=['a','b','c'])
print(s)
s1 = s.reindex(['c','b','a','d'])
print(s1)

# fill_value参数：填充缺失值的值
s2 = s.reindex(['c','b','a','d'],fill_value=0)
print(s2)

In [13]:
# 删除：.drop,删除后返回副本(inplace=False)
s = pd.Series(np.random.rand(5),index=list('ngjur'))
print(s)
s1 = s.drop('n')
s2 = s.drop(['g','j'])
print(s)
print(s1)
print(s2)

n    0.727622
g    0.908325
j    0.391960
u    0.985895
r    0.119303
dtype: float64
n    0.727622
g    0.908325
j    0.391960
u    0.985895
r    0.119303
dtype: float64
g    0.908325
j    0.391960
u    0.985895
r    0.119303
dtype: float64
n    0.727622
u    0.985895
r    0.119303
dtype: float64


In [14]:
# 添加
# 通过下标添加
s1 = pd.Series(np.random.rand(5))
s2 = pd.Series(np.random.rand(5),index=list('ngjur'))
print(s1)
print(s2)
s1[5] = 100
s2['a'] = 100
print(s1)
print(s2)
print('-'*10)

# 通过append添加，生成新数组，不改变之前的数组
s3 = s1.append(s2)
print(s3)
print(s1)

0    0.731020
1    0.591488
2    0.672731
3    0.474557
4    0.968795
dtype: float64
n    0.217407
g    0.326707
j    0.010252
u    0.298753
r    0.951276
dtype: float64
0      0.731020
1      0.591488
2      0.672731
3      0.474557
4      0.968795
5    100.000000
dtype: float64
n      0.217407
g      0.326707
j      0.010252
u      0.298753
r      0.951276
a    100.000000
dtype: float64
----------
0      0.731020
1      0.591488
2      0.672731
3      0.474557
4      0.968795
5    100.000000
n      0.217407
g      0.326707
j      0.010252
u      0.298753
r      0.951276
a    100.000000
dtype: float64
0      0.731020
1      0.591488
2      0.672731
3      0.474557
4      0.968795
5    100.000000
dtype: float64


In [15]:
# 修改，通过下标直接修改
s = pd.Series(np.random.rand(3),index=['a','b','c'])
print(s)
s['a'] = 100
s[['b','c']] = 200
print(s)

a    0.506596
b    0.357606
c    0.785010
dtype: float64
a    100.0
b    200.0
c    200.0
dtype: float64
