# 常量

In [2]:
import numpy as np
# 1. 两个numpy.nan不等 
print(np.nan == np.nan) # False
x = np.array([1, 2, 8, np.nan, 10])
print(x) # [ 1.  2.  8. nan 10.]
# 2. 使用np.isnan判断numpy矩阵中是否有nan值，返回一个boolean矩阵 
y = np.isnan(x) 
print(y) # [False False False  True False]
# 3. 计数numpy数组中的非零值 
z = np.count_nonzero(y) 
print(z) # 1

False
[ 1.  2.  8. nan 10.]
[False False False  True False]
1


# 数据类型

In [3]:
# 数据类型的创建
a = np.dtype('b1')
# 打印变量的数据类型
print(a.type) # <class 'numpy.bool_'> 
# 打印变量占据的内存大小
print(a.itemsize) # 1 

<class 'numpy.bool_'>
1


In [5]:
# 整数类型的数据类型信息
ii16 = np.iinfo(np.int16)
print(ii16.min) # -32768
print(ii16.max) # 32767
# 浮点数类型的数据类型信息
ff16 = np.finfo(np.float16)
print(ff16.bits) # 16
print(ff16.min) # -65500.0
print(ff16.max) # 65500.0
print(ff16.eps) # 0.000977

-32768
32767
16
-65500.0
65500.0
0.000977


# 时间日期和时间增量

In [12]:
# 从字符串创建 datetime64 类型，默认自动选择单位
a = np.datetime64('2020-03-01')
print(a, a.dtype)
a = np.datetime64('2020-03')
print(a, a.dtype)
a = np.datetime64('2020-03-08 20:00:05')
print(a, a.dtype)
a = np.datetime64('2020-03-08 20:00')
print(a, a.dtype)
a = np.datetime64('2020-03-08 20')
print(a, a.dtype)
# 从字符串创建 datetime64 类型，强制指定使用的单位
a = np.datetime64('2020-03', 'D')
print(a, a.dtype)
a = np.datetime64('2020-03', 'Y')
print(a, a.dtype)
# datetime64数组自动单位转换，一律转化为其中最小的单位
a = np.array(['2020-03', '2020-03-08', '2020-03-08 20:00'], dtype='datetime64')
print(a, a.dtype)
# 使用arange()chauchau创建datetime64数组，生成日期范围
# 递增单位为天
a = np.arange('2020-08-01', '2020-08-10', dtype=np.datetime64)
print(a)
print(a.dtype)
# 递增单位为分钟
a = np.arange('2020-08-01 20:00', '2020-08-10', dtype=np.datetime64)
print(a)
# 递增单位为月
a = np.arange('2020-05', '2020-12', dtype=np.datetime64)
print(a)

2020-03-01 datetime64[D]
2020-03 datetime64[M]
2020-03-08T20:00:05 datetime64[s]
2020-03-08T20:00 datetime64[m]
2020-03-08T20 datetime64[h]
2020-03-01 datetime64[D]
2020 datetime64[Y]
['2020-03-01T00:00' '2020-03-08T00:00' '2020-03-08T20:00'] datetime64[m]
['2020-08-01' '2020-08-02' '2020-08-03' '2020-08-04' '2020-08-05'
 '2020-08-06' '2020-08-07' '2020-08-08' '2020-08-09']
datetime64[D]
['2020-08-01T20:00' '2020-08-01T20:01' '2020-08-01T20:02' ...
 '2020-08-09T23:57' '2020-08-09T23:58' '2020-08-09T23:59']
['2020-05' '2020-06' '2020-07' '2020-08' '2020-09' '2020-10' '2020-11']


## datetime64和timedelta64运算

In [13]:
a = np.datetime64('2020-03-08') - np.datetime64('2020-03-07')
b = np.datetime64('2020-03-08') - np.datetime64('202-03-07 08:00')
c = np.datetime64('2020-03-08') - np.datetime64('2020-03-07 23:00', 'D')
print(a, a.dtype)
print(b, b.dtype)
print(c, c.dtype)

1 days timedelta64[D]
956178240 minutes timedelta64[m]
1 days timedelta64[D]


In [14]:
a = np.datetime64('2020-03') + np.timedelta64(20, 'D')
b = np.datetime64('2020-06-15 00:00') + np.timedelta64(12, 'h')
print(a, a.dtype)
print(b, b.dtype)

2020-03-21 datetime64[D]
2020-06-15T12:00 datetime64[m]


In [18]:
a = np.timedelta64(1, 'Y')
b = np.timedelta64(a, 'M')
print(a)
print(b)
c = np.timedelta64(1, 'h')
d = np.timedelta64(c, 'm')
print(c)
print(d)
# 月和天不能直接转化 因为每个月有多少天是不确定的
# print(np.timedelta64(a, 'D'))

1 years
12 months
1 hours
60 minutes


In [19]:
a = np.timedelta64(1, 'Y')
b = np.timedelta64(6, 'M')
c = np.timedelta64(1, 'W')
d = np.timedelta64(1, 'D')
e = np.timedelta64(10, 'D')

print(a)  # 1 years
print(b)  # 6 months
print(a + b)  # 18 months
print(a - b)  # 6 months
print(2 * a)  # 2 years
print(a / b)  # 2.0
print(c / d)  # 7.0
print(c % e)  # 7 days

1 years
6 months
18 months
6 months
2 years
2.0
7.0
7 days


In [21]:
# numpy.datetime64和datetime.datetime相互转换
import datetime
dt = datetime.datetime(year=2020, month=6, day=1, hour=20, minute=5, second=30)
dt64 = np.datetime64(dt, 's')
print(dt64, dt64.dtype)
dt2 = dt64.astype(datetime.datetime)
print(dt2, type(dt2))

2020-06-01T20:05:30 datetime64[s]
2020-06-01 20:05:30 <class 'datetime.datetime'>


## datetime64的应用

In [25]:
# 2020-07-10 星期五
a = np.busday_offset('2020-07-10', offsets=1)
print(a)
# a = np.busday_offset('2020-07-11', offsets=1)
# print(a)
a = np.busday_offset('2020-07-11', offsets=0, roll='forward')
b = np.busday_offset('2020-07-11', offsets=0, roll='backward')
print(a)  # 2020-07-13
print(b)  # 2020-07-10
a = np.busday_offset('2020-07-11', offsets=1, roll='forward')
b = np.busday_offset('2020-07-11', offsets=1, roll='backward')
print(a)  # 2020-07-14
print(b)  # 2020-07-13

2020-07-13
2020-07-13
2020-07-10
2020-07-14
2020-07-13


In [26]:
# 2020-07-10 星期五
a = np.is_busday('2020-07-10')
b = np.is_busday('2020-07-11')
print(a)  # True
print(b)  # False

True
False


In [27]:
# 计数指定日期区间内的工作日
begindates = np.datetime64('2020-07-10')
enddates = np.datetime64('2020-07-20')
a = np.arange(begindates, enddates, dtype='datetime64')
b = np.count_nonzero(np.is_busday(a))
print(a)
# ['2020-07-10' '2020-07-11' '2020-07-12' '2020-07-13' '2020-07-14'
#  '2020-07-15' '2020-07-16' '2020-07-17' '2020-07-18' '2020-07-19']
print(b)  # 6

['2020-07-10' '2020-07-11' '2020-07-12' '2020-07-13' '2020-07-14'
 '2020-07-15' '2020-07-16' '2020-07-17' '2020-07-18' '2020-07-19']
6


In [28]:
# 自定义周掩码值
a = np.is_busday('2020-07-10', weekmask=[1, 1, 1, 0, 0, 1, 1])
b = np.is_busday('2020-07-10', weekmask=[1, 1, 1, 0, 0, 1, 1])
print(a)  # True
print(b)  # False

False
False


In [29]:
# 使用np.busday_count计数指定日期区间内的工作日数量
begindates = np.datetime64('2020-07-10')
enddates = np.datetime64('2020-07-20')
a = np.busday_count(begindates, enddates)
b = np.busday_count(enddates, begindates)
print(a)  # 6
print(b)  # -6

6
-6


# 数组的创建

## 依据现有数据创建ndarray

In [31]:
# 使用array()函数创建数组
# 一维数组
a = np.array([0, 1, 2, 3, 4]) # 通过list创建
b = np.array((0, 1, 2, 3, 4)) # 通过tuple创建
print(a, type(a))
# [0 1 2 3 4] <class 'numpy.ndarray'>
print(b, type(b))
# [0 1 2 3 4] <class 'numpy.ndarray'>
# 二维数组
c = np.array([[11, 12, 13, 14, 15],
              [16, 17, 18, 19, 20],
              [21, 22, 23, 24, 25],
              [26, 27, 28, 29, 30],
              [31, 32, 33, 34, 35]])
print(c, type(c))
# 三维数组
d = np.array([[(1.5, 2, 3), (4, 5, 6)],
              [(3, 2, 1), (4, 5, 6)]])
print(d, type(d))

[0 1 2 3 4] <class 'numpy.ndarray'>
[0 1 2 3 4] <class 'numpy.ndarray'>
[[11 12 13 14 15]
 [16 17 18 19 20]
 [21 22 23 24 25]
 [26 27 28 29 30]
 [31 32 33 34 35]] <class 'numpy.ndarray'>
[[[1.5 2.  3. ]
  [4.  5.  6. ]]

 [[3.  2.  1. ]
  [4.  5.  6. ]]] <class 'numpy.ndarray'>


In [34]:
# 使用asarray()函数创建数组
x = [[1, 1, 1], [1, 1, 1], [1, 1, 1]]
y = np.array(x)
z = np.asarray(x)
x[1][2] = 2
print(x,type(x))
print(y,type(y))
print(z,type(z))
# 当数据源为ndarray时，对比array()函数和asarray()函数
x = np.array([[1, 1, 1], [1, 1, 1], [1, 1, 1]])
y = np.array(x)
z = np.asarray(x)
w = np.asarray(x, dtype=np.int32)
x[1][2] = 2
print(x,type(x),x.dtype)
print(y,type(y),y.dtype)
print(z,type(z),z.dtype)
print(w,type(w),w.dtype)

[[1, 1, 1], [1, 1, 2], [1, 1, 1]] <class 'list'>
[[1 1 1]
 [1 1 1]
 [1 1 1]] <class 'numpy.ndarray'>
[[1 1 1]
 [1 1 1]
 [1 1 1]] <class 'numpy.ndarray'>
[[1 1 1]
 [1 1 2]
 [1 1 1]] <class 'numpy.ndarray'> int32
[[1 1 1]
 [1 1 1]
 [1 1 1]] <class 'numpy.ndarray'> int32
[[1 1 1]
 [1 1 2]
 [1 1 1]] <class 'numpy.ndarray'> int32
[[1 1 1]
 [1 1 2]
 [1 1 1]] <class 'numpy.ndarray'> int32


In [38]:
# ndarray中dtype的修改
x = np.array([[1, 1, 1], [1, 1, 1], [1, 1, 1]])
print(x, x.dtype)
# [[1 1 1]
#  [1 1 1]
#  [1 1 1]] int32
x.dtype = np.float64 # ValueError: When changing to a larger dtype, its size must be a divisor of the total size in bytes of the last axis of the array.

[[1 1 1]
 [1 1 1]
 [1 1 1]] int32


ValueError: When changing to a larger dtype, its size must be a divisor of the total size in bytes of the last axis of the array.

In [39]:
# fromfunction函数创建ndarray
def f(x, y):
    return 10 * x + y
x = np.fromfunction(f, (5, 4), dtype=int)
print(x)
x = np.fromfunction(lambda i, j: i == j, (3, 3), dtype=int)
print(x)
x = np.fromfunction(lambda i, j: i+j, (3, 3), dtype=int)
print(x)
# fromfunction中的第一个参数为ndarray中每个值的计算方法，这个计算方法为一个包含两个参数的函数，这两个参数分别是数组值的所在坐标

[[ 0  1  2  3]
 [10 11 12 13]
 [20 21 22 23]
 [30 31 32 33]
 [40 41 42 43]]
[[ True False False]
 [False  True False]
 [False False  True]]
[[0 1 2]
 [1 2 3]
 [2 3 4]]


## 依据ones和zeros填充ndarray

In [41]:
# zeros数组
x = np.zeros(5)
print(x)
x = np.zeros([2,3])
print(x)

x = np.array([[1,2,3], [4,5,6]])
y = np.zeros_like(x)
print(y)

[0. 0. 0. 0. 0.]
[[0. 0. 0.]
 [0. 0. 0.]]
[[0 0 0]
 [0 0 0]]


In [42]:
x = np.ones(5)
print(x)  # [1. 1. 1. 1. 1.]
x = np.ones([2, 3])
print(x)

x = np.array([[1, 2, 3], [4, 5, 6]])
y = np.ones_like(x)
print(y)

[1. 1. 1. 1. 1.]
[[1. 1. 1.]
 [1. 1. 1.]]
[[1 1 1]
 [1 1 1]]


In [44]:
x = np.empty(5)
print(x)
x = np.empty([3,2])
print(x)
x = np.array([[1,2,3], [4,5,6]])
y = np.empty_like(x)
print(y)

[1. 1. 1. 1. 1.]
[[0. 0.]
 [0. 0.]
 [0. 0.]]
[[-1329469632         703           0]
 [          0      131074           0]]


In [45]:
x = np.eye(4)
print(x)
x = np.eye(2,3) # eye()函数可以接受任意形状的矩阵
print(x)
x = np.identity(4)
print(x)

[[1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]]
[[1. 0. 0.]
 [0. 1. 0.]]
[[1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]]


In [46]:
# 提取方阵的对角元素
x = np.arange(9).reshape((3,3))
print(x)
print(np.diag(x))
print(np.diag(x, k=1))
print(np.diag(x, k=-1))
# 构造对角矩阵
v = [1,3,5,7]
x = np.diag(v)
print(x)

[[0 1 2]
 [3 4 5]
 [6 7 8]]
[0 4 8]
[1 5]
[3 7]
[[1 0 0 0]
 [0 3 0 0]
 [0 0 5 0]
 [0 0 0 7]]


In [47]:
# full函数相关
x = np.full((2,), 7)
print(x)
x = np.full(2, 7)
print(x)
x = np.full((2,7), 7)
print(x)
x = np.array([[1,2,3], [4,5,6]])
y = np.full_like(x,7)
print(y)

[7 7]
[7 7]
[[7 7 7 7 7 7 7]
 [7 7 7 7 7 7 7]]
[[7 7 7]
 [7 7 7]]


In [50]:
x = np.arange(5)
print(x)
x = np.arange(3,7,2)
print(x)
x = np.linspace(start=0, stop=2, num=9)
print(x)
x = np.logspace(0,1,5)
print(np.around(x,2)) # np.around()返回四舍五入后的值，可指定精度 around(a, decimals=0, out=None)
# 使用np.linspace实现np.logspace的效果
x = np.linspace(start=0, stop=1, num=5)
x = [10**i for i in x]
print(np.around(x,2))
x = np.random.random(5)
print(x)
x = np.random.random([2,3])
print(x)

[0 1 2 3 4]
[3 5]
[0.   0.25 0.5  0.75 1.   1.25 1.5  1.75 2.  ]
[ 1.    1.78  3.16  5.62 10.  ]
[ 1.    1.78  3.16  5.62 10.  ]
[0.30940028 0.22763538 0.30006168 0.95435816 0.45419465]
[[0.53596564 0.14881121 0.12304416]
 [0.48454338 0.34090522 0.56052073]]


In [52]:
# 利用字典自定义结构
personType = np.dtype({
    'names': ['name', 'age', 'weight'],
    'formats': ['U30', 'i8', 'f8']
})
a = np.array([('LiMing', 24, 63.9), ('Mike', 15, 67), ('Jan', 34, 45.9)], dtype=personType)
print(a, type(a))

[('LiMing', 24, 63.9) ('Mike', 15, 67. ) ('Jan', 34, 45.9)] <class 'numpy.ndarray'>


In [54]:
# 利用包含多个元组的列表定义结构
personType = np.dtype([('name', 'U30'), ('age', 'i8'), ('weight', 'f8')])
a = np.array([('LiMing', 24, 63.9), ('Mike', 15, 67), ('Jan', 34, 45.9)], dtype=personType)
print(a, type(a))
print(a[0])
print(a[-2:])
print(a['name'])
print(a['age'])
print(a['weight'])

[('LiMing', 24, 63.9) ('Mike', 15, 67. ) ('Jan', 34, 45.9)] <class 'numpy.ndarray'>
('LiMing', 24, 63.9)
[('Mike', 15, 67. ) ('Jan', 34, 45.9)]
['LiMing' 'Mike' 'Jan']
[24 15 34]
[63.9 67.  45.9]


In [55]:
# 数组属性
a = np.array([1,2,3,4,5])
print(a.shape)
print(a.dtype)
print(a.size)
print(a.ndim)
print(a.itemsize)

b = np.array([[1,2,3], [4,5,6.0]])
print(b.shape)
print(b.dtype)
print(b.size)
print(b.ndim)
print(b.itemsize)

a = np.array([1, 2, 3, 4, 5])
print(a)  # [1 2 3 4 5]
b = np.array([1, 2, 3, 4, '5'])
print(b)  # ['1' '2' '3' '4' '5']
c = np.array([1, 2, 3, 4, 5.0])
print(c)  # [1. 2. 3. 4. 5.]

(5,)
int32
5
1
4
(2, 3)
float64
6
2
8
[1 2 3 4 5]
['1' '2' '3' '4' '5']
[1. 2. 3. 4. 5.]


# 数组的创建练习

In [56]:
0*np.nan

nan

In [57]:
np.nan==np.nan

False

In [58]:
np.inf > np.nan

False

In [59]:
np.nan - np.nan

nan

In [60]:
0.3 == 3 * 0.1

False

1. 如何将numpy的datetime64对象转换为datetime的datetime对象？
2. 如何填写不规则系列的numpy日期中的缺失日期？
3. 如何得到昨天，今天，明天的的日期
4. 创建从0到9的一维数字数组
5. 创建一个元素全为True的 3×3 数组
6. 创建一个长度为10并且除了第五个值为1的空向量
7. 创建一个值域范围从10到49的向量
8. 创建一个 3x3x3的随机数组
9. 创建一个二维数组，其中边界值为1，其余值为0
10. 创建长度为10的numpy数组，从5开始，在连续的数字之间的步长为3
11. 将本地图像导入并将其转换为numpy数组

In [61]:
dt64 = np.datetime64('2020-02-25 22:10:10')
dt2 = dt64.astype(datetime.datetime)
print(dt2)

2020-02-25 22:10:10


In [71]:
dates = np.arange('2020-02-01', '2020-02-10', 2, np.datetime64)
print(dates)
begindate = dates[0]
enddate = dates[-1] + np.timedelta64(1, 'D')
dates = np.arange(begindate, enddate, 1, np.datetime64)
print(dates)

['2020-02-01' '2020-02-03' '2020-02-05' '2020-02-07' '2020-02-09']
['2020-02-01' '2020-02-02' '2020-02-03' '2020-02-04' '2020-02-05'
 '2020-02-06' '2020-02-07' '2020-02-08' '2020-02-09']


In [72]:
# 怎么得到今天的日期啊？
# 先随便搞一个日期吧
today = np.datetime64('2024-02-23')
yesterday = today - np.timedelta64(1, 'D')
tomorrow = today + np.timedelta64(1, 'D')
print(today)
print(yesterday)
print(tomorrow)

2024-02-23
2024-02-22
2024-02-24


In [73]:
# 0~9
a = np.arange(0,10)
print(a)

[0 1 2 3 4 5 6 7 8 9]


In [75]:
# 布尔数组
a = np.full((3,3), True)
print(a)

[[ True  True  True]
 [ True  True  True]
 [ True  True  True]]


In [87]:
# 长度为10， 除了第五个值为1的空向量？
a = np.zeros(10)
a[4] = 1
print(a)

[0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]


In [76]:
a = np.arange(10, 50)
print(a)

[10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49]


In [77]:
a = np.random.rand(3,3,3)
print(a)

[[[0.82419913 0.5999049  0.76874503]
  [0.56842444 0.76590315 0.20946612]
  [0.08763596 0.03717465 0.65043936]]

 [[0.87405804 0.37577745 0.26938512]
  [0.73974971 0.8916477  0.98448363]
  [0.41755428 0.1260998  0.27028164]]

 [[0.430904   0.58712705 0.08607508]
  [0.3511812  0.67492515 0.18733117]
  [0.55754083 0.97824122 0.43259657]]]


In [88]:
# 边界值为1，其余值为0的二维数组？
Z = np.ones((10,10))
Z[1:-1,1:-1] = 0 # 将排除掉边界后的元素设置为0
print(Z)

[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]


In [80]:
# 长度为10，从5开始，步长为3的数组？
a = np.arange(5, 5+10*3, 3)
print(a)

[ 5  8 11 14 17 20 23 26 29 32]


In [None]:
# 本地图像导入并转化为numpy数组？

# 数组的创建练习——答案

In [83]:
# 如何填充一个包含不规则间隔的numpy日期中的缺失日期？ # 这个处理常用于时间序列分析中
dates = np.arange('2020-02-01', '2020-02-10', 2, np.datetime64)
print(dates)
print()
out = []
for date, d in zip(dates, np.diff(dates)): # 这里的np.diff求得日期数组中后一个日期与当前日期的差值，可适用于间隔不规则的数组
    out.extend(np.arange(date, date + d))
print(out)
print()
fillin = np.array(out)
print(fillin)
print()
output = np.hstack([fillin, dates[-1]])
print(output)

['2020-02-01' '2020-02-03' '2020-02-05' '2020-02-07' '2020-02-09']

[numpy.datetime64('2020-02-01'), numpy.datetime64('2020-02-02'), numpy.datetime64('2020-02-03'), numpy.datetime64('2020-02-04'), numpy.datetime64('2020-02-05'), numpy.datetime64('2020-02-06'), numpy.datetime64('2020-02-07'), numpy.datetime64('2020-02-08')]

['2020-02-01' '2020-02-02' '2020-02-03' '2020-02-04' '2020-02-05'
 '2020-02-06' '2020-02-07' '2020-02-08']

['2020-02-01' '2020-02-02' '2020-02-03' '2020-02-04' '2020-02-05'
 '2020-02-06' '2020-02-07' '2020-02-08' '2020-02-09']


In [86]:
# 如何得到昨天，今天，明天的的日期?
yesterday = np.datetime64('today', 'D') - np.timedelta64(1, 'D')
today     = np.datetime64('today', 'D') # 通过这个方式获得今天的日期
tomorrow  = np.datetime64('today', 'D') + np.timedelta64(1, 'D')
print ("Yesterday is " + str(yesterday))
print ("Today is " + str(today))
print ("Tomorrow is "+ str(tomorrow))

Yesterday is 2024-02-22
Today is 2024-02-23
Tomorrow is 2024-02-24


In [2]:
# 如何将图像转换为numpy数组？
# 主要不知道怎么读取图片
import numpy as np
from PIL import Image

img1 = Image.open('test.jpg')
a = np.array(img1)

print(a.shape, a.dtype) # (673, 1050, 4) uint8 在通常的RGB通道之外还有一个alpha通道表示图像透明度

(673, 1050, 4) uint8
