In [1]:
#使用time模块计时计算用原生列表进行100万次乘法的时间
import numpy as np
import time
start = time.time()
lst = list(range(int(1e6)))
arr = [n*5 for n in lst]
end = time.time()
print(f"It takes {(end-start)*1000}ms to do 1M multiplications using the builtin list")

It takes 94.23065185546875ms to do 1M multiplications using the builtin list


In [2]:
#使用time模块计时计算用np数组进行100万次乘法的时间
import numpy as np
import time
start = time.time()
arr = np.arange(1e6)
arr = arr*5
end = time.time()
print(f"It takes {(end-start)*1000}ms to do 1M multiplications using the numpy array")

It takes 21.153688430786133ms to do 1M multiplications using the numpy array


In [3]:
#使用列表作为np.array()方法的参数初始化一个数组
import numpy as np
data = np.array([1,2,3,4,5])
print(data)
print(type(data))

[1 2 3 4 5]
<class 'numpy.ndarray'>


In [4]:
#创建np二维数组
import numpy as np
data = np.array([
    [1,2,3],
    [4,5,6]
])
print(data)
#使用ndim属性查看数组的维度
print(data.ndim)
#使用shape查看数组各维度上的长度
print(data.shape)

[[1 2 3]
 [4 5 6]]
2
(2, 3)


In [5]:
#使用zeros创建全零数组
data = np.zeros(10)
print(data)

[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]


In [6]:
#使用ones创建全1数组
data = np.ones((3,10))
print(data)

[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]]


In [7]:
#通过索引的方式获取数组中的某个数字
data = np.arange(10)
print(data[5])

5


In [8]:
#通过切片来获取数组中的连续数字
data = np.arange(10)
print(data[3:6:2])
#切片的参数规则start:end:step

[3 5]


In [9]:
#在使用切片的时候会影响原来的数组
data = np.arange(10)
data_slice = data
data_slice[2] = 100
print(data)

[  0   1 100   3   4   5   6   7   8   9]


In [10]:
#使用copy函数来避免修改原始数据
data = np.arange(10)
data_slice = data.copy()
data_slice[2] = 100
print(data)

[0 1 2 3 4 5 6 7 8 9]


In [11]:
#使用reshape函数变换数组的维度
data = np.arange(10)
print(data.reshape((2,5)))

[[0 1 2 3 4]
 [5 6 7 8 9]]


In [12]:
#使用.T来进行转置变换
data = np.arange(10)
data1 = data.reshape((2,5))
print(data1)
data2 = data1.T
print(data2)

[[0 1 2 3 4]
 [5 6 7 8 9]]
[[0 5]
 [1 6]
 [2 7]
 [3 8]
 [4 9]]


In [13]:
#对np数组进行数学运算
data = np.arange(10)
print(np.sqrt(data))
print(np.abs(data))
print(np.square(data))
print(np.exp(data))
print(np.sign(data))
print(np.ceil(data))
print(np.floor(data))
print(np.round(data))
print(np.isnan(data))

[0.         1.         1.41421356 1.73205081 2.         2.23606798
 2.44948974 2.64575131 2.82842712 3.        ]
[0 1 2 3 4 5 6 7 8 9]
[ 0  1  4  9 16 25 36 49 64 81]
[1.00000000e+00 2.71828183e+00 7.38905610e+00 2.00855369e+01
 5.45981500e+01 1.48413159e+02 4.03428793e+02 1.09663316e+03
 2.98095799e+03 8.10308393e+03]
[0 1 1 1 1 1 1 1 1 1]
[0. 1. 2. 3. 4. 5. 6. 7. 8. 9.]
[0. 1. 2. 3. 4. 5. 6. 7. 8. 9.]
[0 1 2 3 4 5 6 7 8 9]
[False False False False False False False False False False]


In [14]:
#对np数组进行二元运算
data1 = np.array([1,3,5,7,9])
data2 = np.array([2,4,6,8,10])
print(data1+data2)
print(np.add(data1,data2))
print(np.subtract(data1,data2))
print(np.multiply(data1,data2))
print(np.divide(data1,data2))
print(np.power(data1,data2))
print(np.fmax(data1,data2))
print(np.fmin(data1,data2))

[ 3  7 11 15 19]
[ 3  7 11 15 19]
[-1 -1 -1 -1 -1]
[ 2 12 30 56 90]
[0.5        0.75       0.83333333 0.875      0.9       ]
[         1         81      15625    5764801 3486784401]
[ 2  4  6  8 10]
[1 3 5 7 9]


In [15]:
data = np.arange(10)
print(data.sum())
print(data.mean())
print(data.std())
print(data.min())
print(data.max())
print(data.argmax())
print(data.argmin())

45
4.5
2.8722813232690143
0
9
9
0


In [16]:
data = np.array([1,9,3,2,7,4,5,6,8])
data.sort()
print(data)

[1 2 3 4 5 6 7 8 9]


In [17]:
data = np.genfromtxt('data.txt',delimiter=",")
print(data)
print(data.astype(int))

[[ 1.  2.  3.  4.  5.]
 [ 6.  7.  8.  9. 10.]
 [11. 12. 13. 14. 15.]]
[[ 1  2  3  4  5]
 [ 6  7  8  9 10]
 [11 12 13 14 15]]


In [18]:
import numpy as np
data = np.genfromtxt("./data/rating.txt",delimiter=",")
data = data.astype(int)
print(data)
rating_sum = np.zeros(10000)
rating_count = np.zeros(10000)
for rating in data:
    book_id = rating[1]
    book_index = book_id - 1
    book_score = rating[2]
    rating_sum[book_index] += book_score
    rating_count[book_index] += 1
rating_average = rating_sum/rating_count
np.savetxt("./rating_average.txt",list(zip(list(range(1,10001)),rating_average,rating_count)),fmt="%8d%8.3f%8d")
print(rating_average)

[[    1   258     5]
 [    2  4081     4]
 [    2   260     5]
 ...
 [38411  5559     4]
 [38411  6254     3]
 [12322  2205     5]]
[4.27998767 4.35006465 3.21572006 ... 4.32352941 3.71875    4.00900901]


  del sys.path[0]
