In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim
from torch.nn.init import kaiming_normal_
import pandas as pd
import numpy as np
from sklearn import datasets
from sklearn.utils import shuffle
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from imblearn.combine import SMOTETomek

In [2]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(28, 50)
        self.fc2 = nn.Linear(50, 40)
        self.fc3 = nn.Linear(40, 30)
        self.fc4 = nn.Linear(30, 20)
        self.fc5 = nn.Linear(20, 1)

    def forward(self, x):
        out = self.fc1(x)
        out = F.relu(out)
        out = self.fc2(out)
        out = F.relu(out)
        out = self.fc3(out)
        out = F.relu(out)
        out = self.fc4(out)
        out = F.relu(out)
        out = self.fc5(out)
        return out


def test(predict, label):
    t = predict == label
    accu = np.mean(t)
    return accu

In [3]:
table = pd.read_csv(r'D:\repos\py\temp\online_shoppers_intention.csv')
table = shuffle(table, random_state=3)
y = table.iloc[:, -1].apply(lambda x: 1 if x is True else 0)
table = table.drop(columns=['Revenue'])
X = pd.get_dummies(table)
mm = MinMaxScaler(feature_range=(0, 1))
X = mm.fit_transform(X)
train_data, test_data, train_label, test_label = train_test_split(X, y, test_size=0.3, random_state=3)

smote_tomek = SMOTETomek(random_state=3)
train_data, train_label = smote_tomek.fit_resample(train_data, train_label)
print('over-sampling done\n')

test_label = np.array(test_label).ravel()
train_label = np.array(train_label).ravel()

over-sampling done



In [4]:
myNet = Net()
criterion = nn.MSELoss()  # 损失函数
optimzer = torch.optim.ASGD(myNet.parameters(), lr=0.15)
epochs = 10000  # 训练次数

In [5]:
def check(epoch):
    with torch.no_grad():
        test_in = torch.from_numpy(test_data).float()
        test_out = myNet(test_in).squeeze().numpy()
        for i in range(len(test_out)):
            if test_out[i] < 0.5:
                test_out[i] = 0
            else:
                test_out[i] = 1
        accu = test(test_out, test_label)
        f1 = f1_score(test_label, test_out)
        print("Epoch:{}\tLoss:{:.10f}\tAccuracy:{:.10f}\tF1-score:{:.10f}".format(epoch + 1, loss.item(), accu, f1))

In [6]:
for i in range(epochs):
    x = torch.from_numpy(train_data).float()
    y = torch.from_numpy(train_label).float()
    out = myNet(x).squeeze()
    loss = criterion(out, y)
    optimzer.zero_grad()
    loss.backward()
    optimzer.step()
    if (i+1) % 500 == 0:  # 每100次输出相关的信息
        check(i)

Epoch:500	Loss:0.1927117407	Accuracy:0.4963503650	F1-score:0.3661109221
Epoch:1000	Loss:0.1674987674	Accuracy:0.5717761557	F1-score:0.4022641509
Epoch:1500	Loss:0.1537401825	Accuracy:0.6304406596	F1-score:0.4344228382
Epoch:2000	Loss:0.1447963566	Accuracy:0.6828872668	F1-score:0.4709066306
Epoch:2500	Loss:0.1368924379	Accuracy:0.7164098405	F1-score:0.4959154253
Epoch:3000	Loss:0.1319444478	Accuracy:0.7391186807	F1-score:0.5143432310
Epoch:3500	Loss:0.1366590559	Accuracy:0.7391186807	F1-score:0.5167751627
Epoch:4000	Loss:0.1089606211	Accuracy:0.7812922411	F1-score:0.5527915976
Epoch:4500	Loss:0.1133034825	Accuracy:0.8067045147	F1-score:0.5801526718
Epoch:5000	Loss:0.1000460014	Accuracy:0.8099486348	F1-score:0.5847607797
Epoch:5500	Loss:0.0845231712	Accuracy:0.8732089754	F1-score:0.6543846721
Epoch:6000	Loss:0.0873288512	Accuracy:0.8869964855	F1-score:0.6666666667
Epoch:6500	Loss:0.0915681645	Accuracy:0.8934847256	F1-score:0.6661016949
Epoch:7000	Loss:0.0973141715	Accuracy:0.8123817248	F