# 1.numpy
- N维数组（矩阵），快速高效，矢量运算
- 高效的Index，不需要循环
- 开源免费跨平台，运行效率媲美c/matlab

# 2.Scipy
- 依赖于Numpy
- 专为科学和工程设计
- 实现了多种常用科学计算：傅里叶变换，信号和图像处理

# 3.pandas
- 结构化数据分析利器（依赖numpy）
- 提供多种高级数据结构：Series, DataFrame, Panel
- 强大的数据索引和处理能力

# 4.matplotlib
- Python 2D绘图领域最广泛的组件
- 基本能取代Matlab的绘图功能
- 通过mplot3d可以绘制3D图

# 5.Scikit-learn
- 机器学习模块
- 建立在Scipy上提供了常用算法
- 简单易学的Api接口

# Numpy
- 基本概念：
    - 矩阵：二维数组
    - 向量：1\*n或者n\*1的矩阵
    - 标量：1\*1的矩阵
    - 数组：多维数组，矩阵（二维数组）的延伸
- 加减运算：
    - 相加减的两个矩阵必须要有相同的行和列
    - 行和列对应元素相加减
- 乘法运算：
    - 数组乘法（点乘）：对应元素之间的乘法
    - 矩阵乘法：前行乘后列

# 1.numpy介绍

## 数组的创建和访问

In [1]:
import numpy as np

In [2]:
list_1 = [1,2,3,4]
list_1

[1, 2, 3, 4]

In [3]:
array_1 = np.array(list_1)
array_1

array([1, 2, 3, 4])

In [4]:
list_2 = [5,6,7,8]
array_2 = np.array([list_1, list_2])
array_2

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [5]:
array_2.shape

(2, 4)

In [6]:
array_2.size

8

In [7]:
array_2.dtype

dtype('int32')

In [8]:
array_3 = np.array([[1.0,2,3],[4.0,5,6]])

In [9]:
array_3.dtype

dtype('float64')

In [10]:
array_4 = np.arange(1,10,2)
array_4

array([1, 3, 5, 7, 9])

In [11]:
np.zeros(5)

array([0., 0., 0., 0., 0.])

In [13]:
np.zeros([2,3])

array([[0., 0., 0.],
       [0., 0., 0.]])

In [14]:
np.eye(5)

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

In [15]:
np.eye(5).dtype

dtype('float64')

In [16]:
a = np.arange(1,10)
a

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [17]:
a[1]

2

In [18]:
a[1:5]

array([2, 3, 4, 5])

In [19]:
b = np.array([[1,2,3],[4,5,6]])
b

array([[1, 2, 3],
       [4, 5, 6]])

In [20]:
b[1][0]

4

In [21]:
b[1,0]

4

In [22]:
c = np.array([[1,2,3],[4,5,6],[7,8,9]])
c

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [23]:
c[:2,1:]

array([[2, 3],
       [5, 6]])

## 数组与矩阵运算

In [24]:
np.random.randn(10)

array([ 1.76414617, -0.06764337, -0.72971012,  0.4751042 ,  1.13291889,
        1.28794153,  0.39036675, -0.78125575,  0.25007364,  0.51393357])

In [25]:
np.random.randint(10)

1

In [26]:
np.random.randint(10,size=(4,5))

array([[5, 1, 0, 8, 4],
       [7, 1, 2, 6, 4],
       [7, 2, 1, 9, 4],
       [8, 2, 4, 4, 8]])

In [27]:
np.random.randint(10,size=20).reshape(4,5)

array([[9, 2, 9, 3, 7],
       [9, 5, 0, 3, 1],
       [0, 3, 3, 4, 2],
       [5, 7, 1, 8, 2]])

In [28]:
a = np.random.randint(10,size=20).reshape(4,5)
a

array([[0, 7, 2, 2, 7],
       [1, 5, 4, 5, 4],
       [7, 7, 9, 6, 0],
       [2, 5, 2, 0, 1]])

In [29]:
b = np.random.randint(10,size=20).reshape(4,5)
b

array([[8, 8, 9, 3, 2],
       [5, 1, 8, 6, 5],
       [2, 5, 7, 5, 7],
       [9, 1, 7, 0, 5]])

In [30]:
a + b

array([[ 8, 15, 11,  5,  9],
       [ 6,  6, 12, 11,  9],
       [ 9, 12, 16, 11,  7],
       [11,  6,  9,  0,  6]])

In [31]:
a - b

array([[-8, -1, -7, -1,  5],
       [-4,  4, -4, -1, -1],
       [ 5,  2,  2,  1, -7],
       [-7,  4, -5,  0, -4]])

In [32]:
a * b

array([[ 0, 56, 18,  6, 14],
       [ 5,  5, 32, 30, 20],
       [14, 35, 63, 30,  0],
       [18,  5, 14,  0,  5]])

In [33]:
a / b

  a / b


array([[0.        , 0.875     , 0.22222222, 0.66666667, 3.5       ],
       [0.2       , 5.        , 0.5       , 0.83333333, 0.8       ],
       [3.5       , 1.4       , 1.28571429, 1.2       , 0.        ],
       [0.22222222, 5.        , 0.28571429,        nan, 0.2       ]])

In [34]:
np.mat([[1,2,3],[4,5,6]])

matrix([[1, 2, 3],
        [4, 5, 6]])

In [35]:
a

array([[0, 7, 2, 2, 7],
       [1, 5, 4, 5, 4],
       [7, 7, 9, 6, 0],
       [2, 5, 2, 0, 1]])

In [36]:
np.mat(a)

matrix([[0, 7, 2, 2, 7],
        [1, 5, 4, 5, 4],
        [7, 7, 9, 6, 0],
        [2, 5, 2, 0, 1]])

In [37]:
A = np.mat(a)
B = np.mat(b)

In [38]:
A, B

(matrix([[0, 7, 2, 2, 7],
         [1, 5, 4, 5, 4],
         [7, 7, 9, 6, 0],
         [2, 5, 2, 0, 1]]),
 matrix([[8, 8, 9, 3, 2],
         [5, 1, 8, 6, 5],
         [2, 5, 7, 5, 7],
         [9, 1, 7, 0, 5]]))

In [39]:
A + B

matrix([[ 8, 15, 11,  5,  9],
        [ 6,  6, 12, 11,  9],
        [ 9, 12, 16, 11,  7],
        [11,  6,  9,  0,  6]])

In [40]:
A - B

matrix([[-8, -1, -7, -1,  5],
        [-4,  4, -4, -1, -1],
        [ 5,  2,  2,  1, -7],
        [-7,  4, -5,  0, -4]])

In [41]:
A * B

ValueError: shapes (4,5) and (4,5) not aligned: 5 (dim 1) != 4 (dim 0)

In [42]:
a = np.mat(np.random.randint(10, size=20).reshape(4,5))
b = np.mat(np.random.randint(10, size=20).reshape(5,4))

In [43]:
a

matrix([[6, 4, 9, 1, 4],
        [0, 0, 0, 8, 3],
        [3, 0, 7, 5, 8],
        [6, 3, 9, 0, 9]])

In [44]:
b

matrix([[9, 2, 1, 0],
        [1, 8, 4, 7],
        [4, 7, 0, 4],
        [0, 8, 4, 4],
        [7, 9, 0, 2]])

In [45]:
a * b

matrix([[122, 151,  26,  76],
        [ 21,  91,  32,  38],
        [111, 167,  23,  64],
        [156, 180,  18,  75]])

In [46]:
 a = np.random.randint(10,size=20).reshape(4,5)

In [47]:
a

array([[5, 6, 9, 6, 4],
       [1, 2, 1, 4, 2],
       [1, 3, 2, 5, 7],
       [3, 3, 2, 6, 6]])

In [48]:
np.unique(a)

array([1, 2, 3, 4, 5, 6, 7, 9])

In [49]:
sum(a)

array([10, 14, 14, 21, 19])

In [50]:
sum(a[0])

30

In [51]:
sum(a[:, 0])

10

In [52]:
a.max()

9

In [53]:
max(a[0])

9

In [54]:
max(a[:, 0])

5

## Array的保存与加载

In [60]:
np.save("one_array.npy", a)

In [62]:
np.load("one_array.npy")

array([[5, 6, 9, 6, 4],
       [1, 2, 1, 4, 2],
       [1, 3, 2, 5, 7],
       [3, 3, 2, 6, 6]])

# 2.pandas入门

## Series介绍

In [67]:
import pandas as pd

In [68]:
s1 = pd.Series([1,2,3,4])
s1

0    1
1    2
2    3
3    4
dtype: int64

In [69]:
s1.values

array([1, 2, 3, 4], dtype=int64)

In [70]:
s1.index 

RangeIndex(start=0, stop=4, step=1)

In [71]:
s2 = pd.Series(np.arange(10))
s2

0    0
1    1
2    2
3    3
4    4
5    5
6    6
7    7
8    8
9    9
dtype: int32

In [72]:
s3 = pd.Series({'a':1, 'b':2, 'c':3})
s3

a    1
b    2
c    3
dtype: int64

In [73]:
s3.index

Index(['a', 'b', 'c'], dtype='object')

In [74]:
s4 = pd.Series([1,2,3,], index=['A', 'B', 'C'])
s4

A    1
B    2
C    3
dtype: int64

In [75]:
s4["A"]

1

In [77]:
s4[s4>2]

C    3
dtype: int64

In [78]:
s4.to_dict()

{'A': 1, 'B': 2, 'C': 3}

In [82]:
s5 = pd.Series(s4.to_dict())
s5.values

array([1, 2, 3], dtype=int64)

In [91]:
s6 = pd.Series(s5, index=['A','B','C','D'])
s6

A    1.0
B    2.0
C    3.0
D    NaN
dtype: float64

In [92]:
pd.isnull(s6)

A    False
B    False
C    False
D     True
dtype: bool

In [93]:
pd.notnull(s6)

A     True
B     True
C     True
D    False
dtype: bool

In [94]:
s6.name = '总体名字'
s6

A    1.0
B    2.0
C    3.0
D    NaN
Name: 总体名字, dtype: float64

In [95]:
s6.index.name = '索引名字'
s6

索引名字
A    1.0
B    2.0
C    3.0
D    NaN
Name: 总体名字, dtype: float64

In [96]:
s6.index

Index(['A', 'B', 'C', 'D'], dtype='object', name='索引名字')

## dataframe入门

In [114]:
from pandas import Series, DataFrame
import webbrowser
import pandas as pd

In [100]:
url = 'https://www.tiobe.com/tiobe-index/'
webbrowser.open(url)

True

In [104]:
!cd

C:\Users\DELL\PycharmProjects\StageTwo\virtual_environment\2021_10_AI\Scripts\LiWenXiang


In [117]:
df = pd.read_clipboard()
df

Unnamed: 0,x1,x2,y
0,0.0323,0.0244,1
1,0.0887,0.0244,1
2,0.169,0.0163,1
3,0.242,0.0,1
4,0.242,0.0488,1
5,0.153,0.0569,1
6,0.113,0.0976,1
7,0.0484,0.13,1


In [116]:
df.columns

Index(['x1', 'x2', 'y'], dtype='object')

In [119]:
df.x1

0    0.0323
1    0.0887
2    0.1690
3    0.2420
4    0.2420
5    0.1530
6    0.1130
7    0.0484
Name: x1, dtype: float64

In [122]:
df['x1']

0    0.0323
1    0.0887
2    0.1690
3    0.2420
4    0.2420
5    0.1530
6    0.1130
7    0.0484
Name: x1, dtype: float64

In [123]:
df_new = DataFrame(df, columns=['y', '不存在'])
df_new

Unnamed: 0,y,不存在
0,1,
1,1,
2,1,
3,1,
4,1,
5,1,
6,1,
7,1,


In [127]:
df_new['不存在'] = [i for i in range(8)]
df_new

Unnamed: 0,y,不存在
0,1,0
1,1,1
2,1,2
3,1,3
4,1,4
5,1,5
6,1,6
7,1,7


In [128]:
ss1 = pd.Series([666, 999], index=[5, 6])
df_new['不存在'] = ss1
df_new

Unnamed: 0,y,不存在
0,1,
1,1,
2,1,
3,1,
4,1,
5,1,666.0
6,1,999.0
7,1,


In [134]:
data = {"Capital":["Brussels", "New Delhi", "Brasilia"],
       "Country":["Belgium", "India", "Brazil"], 
       "Population": [11190846, 1303171035, 207847528]}

In [135]:
s1 = pd.Series(data['Country'])
s1

0    Belgium
1      India
2     Brazil
dtype: object

In [136]:
df1 = pd.DataFrame(data)
df1

Unnamed: 0,Capital,Country,Population
0,Brussels,Belgium,11190846
1,New Delhi,India,1303171035
2,Brasilia,Brazil,207847528


<div class="alert alert-block alert-info"> 
    创建三个Series，合并成一个原DataFrame
</div>

In [156]:
ss1 = pd.Series(data['Capital'])
ss2 = pd.Series(data['Country'])
ss3 = pd.Series(data['Population'])

In [157]:
ss2

0    Belgium
1      India
2     Brazil
dtype: object

In [163]:
df_new = pd.DataFrame([ss1, ss2, ss3], index=['Capital','Country','Population']).T
df_new

Unnamed: 0,Capital,Country,Population
0,Brussels,Belgium,11190846
1,New Delhi,India,1303171035
2,Brasilia,Brazil,207847528


In [145]:
s3

a    1
b    2
c    3
dtype: int64