<a href="https://www.kaggle.com/code/eupphh/eupph-facial-keypoints-detection-cnn?scriptVersionId=292738410" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import torch
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset
import torch.nn as nn
import torch.optim as optim

In [None]:
train_data = pd.read_csv('/kaggle/input/facial-keypoints-detection/training.zip')
train_data.describe()

In [None]:
train_data.columns

In [None]:
train_data['Image']

In [None]:
#7049样本 30 特征
train_data.isnull().sum()

In [None]:
train_data.columns.tolist()

In [None]:
key = train_data.drop('Image',axis=1)
image = train_data['Image'].values
print(key.shape,image.shape)

In [None]:
#处理缺失值
key = key.fillna(key.mean()).values

In [None]:
#图像现在是字符串 转换
def string_to_image(x):
    tmp = np.array([int(i) for i in x.split()],dtype=np.float32)#这里如果是split(" ")那么多个空格就不行了
    return tmp.reshape(96,96) 
images = np.array([string_to_image(i) for i in image])
images.shape

In [None]:
plt.imshow(images[0],cmap='gray')

In [None]:
def show_picture(image,key):
    x = key[0::2]
    y = key[1::2]
    plt.figure(figsize=(6,6))
    plt.imshow(images[0],cmap='gray')
    plt.scatter(x, y, c='#47FF31', marker='o', s=15)
    plt.show()
show_picture(image[0],key[0])

In [None]:
#归一化 96*96图片坐标最大96
key_n = key/96.0
images_n = images/255.0

In [None]:
images_n[0]

In [None]:
X_train, X_val, y_train, y_val = train_test_split(
    images_n,          # 图像数据
    key_n,             # 关键点数据
    test_size=0.2,     # 验证集比例20%
    random_state=42,   # 随机种子，确保可重复性
    shuffle=True       # 打乱数据
)

print(f"训练集: {len(X_train)} 个样本")
print(f"验证集: {len(X_val)} 个样本")

In [None]:
#转换为tensor 
#!!!! float32!!!!!
X_train_tensor = torch.tensor(X_train,dtype=torch.float32).reshape(-1,1,96,96)
X_val_tensor = torch.tensor(X_val,dtype=torch.float32).reshape(-1,1,96,96)
y_train_tensor = torch.tensor(y_train,dtype=torch.float32)
y_val_tensor = torch.tensor(y_val,dtype=torch.float32)

In [None]:
y_val_tensor.shape

In [None]:
#创建PyTorch Dataset和DataLoader
batch_size = 64
train_dataset = TensorDataset(X_train_tensor,y_train_tensor)
val_dataset = TensorDataset(X_val_tensor,y_val_tensor)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=64)

In [None]:
#测试一下
for image,key in train_loader:
    print("一个batch：",image.shape,key.shape)
    break

* 输入: (batch, 1, 96, 96)

* 卷积层1: Conv2d(1→32, 3×3, padding=1) + BatchNorm + ReLU
* 池化: MaxPool2d(2×2) → (32, 48, 48)

* 卷积层2: Conv2d(32→64, 3×3, padding=1) + BatchNorm + ReLU
* 池化: MaxPool2d(2×2) → (64, 24, 24)

* 卷积层3: Conv2d(64→128, 3×3, padding=1) + BatchNorm + ReLU
* 池化: MaxPool2d(2×2) → (128, 12, 12)

* 卷积层4: Conv2d(128→256, 3×3, padding=1) + BatchNorm + ReLU
* 池化: MaxPool2d(2×2) → (256, 6, 6)

* 展平: 256×6×6 = 9216

* 全连接1: 9216 → 512 + Dropout(0.3) + ReLU
* 全连接2: 512 → 256 + Dropout(0.2) + ReLU
* 输出层: 256 → 30 (15个关键点×2坐标)

In [None]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(1,32,3,1,1),      #(1,96,96) -> (32,96,96)
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)#(32,96,96) -> (32,48,48)
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(32,64,3,1,1),     #(32,48,48) -> (64,48,48)
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)#(64,48,48) -> (64,24,24)
        )
        self.conv3 = nn.Sequential(
            nn.Conv2d(64,128,3,1,1),     #(64,24,24) -> (128,24,24)
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)#(128,24,24) -> (128,12,12)
        )
        self.conv4 = nn.Sequential(
            nn.Conv2d(128,256,3,1,1),     #(128,12,12) -> (256,12,12)
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)#(256,12,12) -> (256,6,6)
        )
        self.out = nn.Sequential(
            nn.Linear(256*6*6,512),
            nn.Dropout(0.3),
            nn.ReLU(),
            nn.Linear(512,256),
            nn.Dropout(0.2),
            nn.ReLU(),
            nn.Linear(256,30)
        )
    def forward(self,x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = x.view(x.size(0),-1)
        x = self.out(x)
        return x


In [None]:
#损失函数和优化器
model= CNN()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
def fit(epochs,model,cri,opt,train_loader,val_loader):
    for epoch in range(epochs):
        model.train()
        for x,y in train_loader:
            pred = model(x)#前向
            loss = cri(pred,y)#损失
            loss.backward()#反向
            opt.step()#更新
            opt.zero_grad()#请梯度
            
        #所有batch训练完毕 验证集  
        model.eval()
        t_loss= []
        with torch.no_grad():  #验证时不计算梯度
            for x, y in val_loader:
                pred = model(x)
                loss = cri(pred, y)
                t_loss.append(loss.item())
        #验证集的损失
        print("当前epoch:",epoch,"  |  平均验证集的损失:",sum(t_loss)/len(t_loss))
        print("========epoch",epoch,"完成=========")
            

In [None]:
fit(30,model,criterion,optimizer,train_loader,val_loader)

In [None]:
#测试集
test_data = pd.read_csv('/kaggle/input/facial-keypoints-detection/test.zip')
test_images = test_data['Image'].values

In [None]:
# 转换图像字符串
test_images_np = np.array([string_to_image(img_str) for img_str in test_images])

# 归一化
test_images_norm = test_images_np / 255.0

# 转Tensor并reshape
test_tensor = torch.tensor(test_images_norm, dtype=torch.float32).reshape(-1, 1, 96, 96)

In [None]:
model.eval()  # 切换到评估模式
with torch.no_grad():  # 不计算梯度
    pred = model(test_tensor)
    pred = pred.numpy()

In [None]:
pred.shape

In [None]:
pred_original = pred * 96.0

In [None]:
# 加载模板
sample = pd.read_csv('/kaggle/input/facial-keypoints-detection/SampleSubmission.csv')
lookup = pd.read_csv('/kaggle/input/facial-keypoints-detection/IdLookupTable.csv')

# 转换列类型
sample['Location'] = sample['Location'].astype(float)

# 填充数据
for i, row in lookup.iterrows():
    image_id = row['ImageId'] - 1
    feature_name = row['FeatureName']
    
    col_idx = list(train_data.columns).index(feature_name)
    pred_value = pred_original[image_id, col_idx]
    
    sample.loc[sample['RowId'] == row['RowId'], 'Location'] = pred_value

# 保存
sample.to_csv('submission.csv', index=False)