# 构造双半月分类数据集，并使用KNN和MLP进行分类

In [None]:
import tensorflow as tf

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_moons
from plotly import express as px
import os

from tensorflow_estimator.python.estimator.keras_lib import model_to_estimator

os.environ["CUDA_VISIBLE_DEVICES"]="0"
X, y = make_moons(n_samples=411, noise=0.3)
moons = pd.DataFrame(pd.concat([pd.DataFrame(X,columns=['x1', 'x2']), pd.DataFrame(y,columns=["label"])], axis=1))

fig = px.scatter(moons,
                 x='x1',
                 y='x2',
                 color='label',
                 color_continuous_scale=['blue', 'red'],
                 title='散点图 - 不同标签用不同颜色表示')

fig.show()



## 1、使用KNN进行分类

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from my_ML_tool import plot_decision_boundary


KNN = KNeighborsClassifier(n_neighbors=20)
KNN.fit(X,y)  # predict_centre为使用K_Means计算出来的中心，在这里相当于数据标签
# a = pd.DataFrame([5,5]).T

result = KNN.predict(X) # 转置之后才是 1行2列 的数据，或者使用KNN.predict(pd.array([5,5]))
print("KNN 预测准确度：", KNN.score(X,y))

import plotly.graph_objects as go

# 定义网格边界（稍微扩展数据范围以更好地显示边界）
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1

# 创建网格点
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),
                     np.arange(y_min, y_max, 0.02))

# 预测网格点的类别
Z = KNN.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)

fig = go.Figure()

# 添加决策边界背景（使用等高线图）
fig.add_trace(go.Contour(
    x=np.arange(x_min, x_max, 0.02),
    y=np.arange(y_min, y_max, 0.02),
    z=Z,
    showscale=False,
    opacity=0.4,
    colorscale='Viridis'  # 可根据类别数调整颜色映射
))

# 添加原始数据点
fig.add_trace(go.Scatter(
    x=X[:, 0],
    y=X[:, 1],
    mode='markers',
    marker=dict(
        color=y,
        colorscale='Viridis',
        showscale=False
    ),
    showlegend=False
))

# 更新布局
fig.update_layout(
    title='KNN Decision Boundary',
    xaxis_title='Feature 1',
    yaxis_title='Feature 2'
)

# 显示图形
fig.show()

#
# print(result_cluster)

plot_decision_boundary(KNN,X,y)



## 2、使用MLP进行分类
### 2.1 检查GPU是否可用

In [171]:
import os
# 设置环境变量，指向 Conda CUDA DLL 所在目录
# os.environ["PATH"] += os.pathsep + r"E:\Porject\python\conda_virtual_env\ML_GPU_309\Library\bin"

import tensorflow as tf
gpus = tf.config.list_physical_devices('GPU')
print("GPU 是否可用：", gpus)

GPU 是否可用： [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [None]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=42)

mlp = Sequential()
mlp.add(Dense(units=10,input_dim=2,activation="sigmoid"))
mlp.add(Dense(units=10,activation="sigmoid"))
mlp.add(Dense(units=10,activation="sigmoid"))
mlp.add(Dense(units=1,activation="sigmoid"))
mlp.summary()

mlp.compile(optimizer="adam",loss="binary_crossentropy",)

# os.environ["CUDA_VISIBLE_DEVICES"]="0" #如果要是GPU训练的话，这里要指定CUDA可视的设备编号
with tf.device('/GPU:0'):
    mlp.fit(X_train,y_train,epochs=3000, verbose=0)

## 3、查看训练的网络的 各节点的参数


In [None]:
from sklearn.metrics import accuracy_score
from my_ML_tool import plot_decision_boundary

mlp.summary()

print("第一层权重：",mlp.layers[0].get_weights()[0])
print("第一层偏置：",mlp.layers[0].get_weights()[1])
print("第二层权重：",mlp.layers[1].get_weights()[0])
print("第二层偏置：",mlp.layers[1].get_weights()[1])

y_predict = mlp.predict(X_train)

print("准确度：",accuracy_score(y_train,np.where(y_predict>0.5,1,0)))
print("训练集损失值：",mlp.evaluate(X_train,y_train,verbose=1))
print("测试集损失值：",mlp.evaluate(X_test,y_test,verbose=1))

plt.scatter(X_train[:,0],X_train[:,1],c=y_predict)

plot_decision_boundary(mlp,X_train,y_train)
# plot_mlp_structure(mlp)


## 绘制MLP可视化

In [None]:
from my_ML_tool import plot_mlp_structure



# minst 数据集，进行手写数字识别

In [None]:
from tensorflow.keras.datasets import mnist

(X_train, y_train), (X_test, y_test) = mnist.load_data()


## 展示部分数据

In [None]:
imag1 = X_train[56]

fig1 = plt.figure(figsize=(3,3))
plt.imshow(imag1,cmap="gray")
plt.title(f"标签：{y_train[0]}")
plt.show()


## 将这些二维的数据进行预处理

In [None]:
format_X_train = X_train.reshape(-1,28*28) / 255.0
format_X_test = X_test.reshape(-1,28*28) / 255.0

from keras.utils import to_categorical  # 进行独热编码
format_y_train = to_categorical(y_train)
format_y_test = to_categorical(y_test)




## 建立模型

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Activation

mlp = Sequential()
mlp.add(Dense(units=392,input_dim=28*28,activation="sigmoid"))
mlp.add(Dense(units=392,activation="sigmoid"))
mlp.add(Dense(units=10,activation="softmax"))
mlp.summary()
mlp.compile(optimizer="adam",loss="categorical_crossentropy",metrics=["accuracy"])

## 训练模型

In [None]:
mlp.fit(format_X_train,format_y_train,epochs=10,verbose=1)

# 卷积神经网络

In [195]:
from keras.preprocessing.image import ImageDataGenerator
train_datagen = ImageDataGenerator(rescale=1./255)


training_set = train_datagen.flow_from_directory(r"E:\Porject\python\ML\dataset_self\cat_and_dog\8year_before\training_set\training_set",target_size=(50,50),batch_size=32,class_mode="binary")
#每张图片在输入网络前都会被缩放成 50×50 像素
#每次从文件夹里读取 32张图片 作为一个批次（batch）输入到模型。

Found 8005 images belonging to 2 classes.


In [196]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten,Dense


model = Sequential()
# 卷积层
model.add(Conv2D(32,(3,3),input_shape=(50,50,3),activation = "relu"))
model.add(MaxPooling2D(pool_size=(2,2)))

#第二个卷积层、池化层
model.add(Conv2D(32,(3,3),activation = "relu"))
model.add(MaxPooling2D(pool_size=(2,2)))

#Flattening层
model.add(Flatten())


model.add(Dense(units=128,activation="relu"))
model.add(Dense(units=1,activation="sigmoid"))

model.compile(optimizer="adam",loss="binary_crossentropy",metrics=["accuracy"])

model.summary()

Model: "sequential_28"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_10 (Conv2D)          (None, 48, 48, 32)        896       
                                                                 
 max_pooling2d_10 (MaxPoolin  (None, 24, 24, 32)       0         
 g2D)                                                            
                                                                 
 conv2d_11 (Conv2D)          (None, 22, 22, 32)        9248      
                                                                 
 max_pooling2d_11 (MaxPoolin  (None, 11, 11, 32)       0         
 g2D)                                                            
                                                                 
 flatten_5 (Flatten)         (None, 3872)              0         
                                                                 
 dense_78 (Dense)            (None, 128)             

## 训练模型

In [198]:
with tf.device('/GPU:0'):
    model.fit(training_set,epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:

model.evalate(training_set)


## 预测
### 待预测图片预处理

In [217]:
from tensorflow.keras.preprocessing import image

img_path = "image/img_1.png"

# 加载图片，并调整到训练时的 target_size
img = image.load_img(img_path, target_size=(50, 50))

# 转成数组
img_array = image.img_to_array(img)

# 扩展维度，因为模型期望输入形状是 (batch_size, 50, 50, 3)
img_array = np.expand_dims(img_array, axis=0)

# 和训练时一样做归一化
img_array = img_array / 255.0

### 正式预测

In [218]:
print(training_set.class_indices)

prediction = model.predict(img_array)
print(prediction)


{'cats': 0, 'dogs': 1}
[[0.9981013]]


In [132]:
import torch
import numpy as np
import torch.nn as nn
from torch.utils.data import DataLoader

from torchvision import models
from torchvision import datasets, transforms

from torch.utils.tensorboard import SummaryWriter

logger = SummaryWriter(log_dir = "logs")

AttributeError: partially initialized module 'torch' has no attribute 'version' (most likely due to a circular import)

In [138]:
import os
print("工作目录:", os.getcwd())



工作目录: E:\Porject\python\ML\ML_learn_test


In [131]:
for n_iter in range(100):
    logger.add_scalar(tag='Loss/train', scalar_value=np.random.random(), global_step=n_iter)
    logger.add_scalar(tag='Loss/test', scalar_value=np.random.random(), global_step=n_iter)

NameError: name 'logger' is not defined