In [12]:
import pandas as pd
import numpy as np
from collections import Counter
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, accuracy_score
from sklearn.neural_network import MLPClassifier

# 1、获取数据集
data = pd.read_csv("./datasets/3d.csv")
print(data.head())
counter = Counter(data["开奖号码"])
print("top 15: \n", counter.most_common(15))
most_list = counter.most_common()
print("已中奖组合个数：", len(most_list))
results = []
for i in range(0, 1000):
    found = False
    for v in most_list:
        if i == v[0]:
            found = True
            break
    if not found:
        results.append(i)
print("从未中过奖的：", results)
# 2、划分数据集
size = len(data["期号"])
index = int(size/2)
train_list, test_list = data[::-1]["开奖号码"][:index].values, data[::-1]["开奖号码"][index:].values
X_train, X_test, y_train, y_test = list(), list(), list(), list()
n_step = 5
for i in range(len(train_list)):
    j = i + n_step
    if j > len(train_list)-1:
        break
    X, y = train_list[i:j], train_list[j]
    X_train.append(X)
    y_train.append(y)
for i in range(len(test_list)):
    j = i + n_step
    if j > len(test_list)-1:
        break
    X, y = test_list[i:j], test_list[j]
    X_test.append(X)
    y_test.append(y)
# 3、标准化
transfer = StandardScaler()
X_train = transfer.fit_transform(X_train)
X_test = transfer.transform(X_test)
# 4、岭回归预估器
estimator = Ridge(alpha=0.01, max_iter=10000)
estimator.fit(X_train, y_train)
# 5、模型评估
y_predict = estimator.predict(X_test)
y_predict = y_predict.astype(np.int)
print(y_predict.shape)
print("岭回归--预测值：", y_predict)
error = mean_squared_error(y_test, y_predict)
print("岭回归--均方误差为：", error)
accuracy = accuracy_score(y_test, y_predict)
print("岭回归--精确度为：", accuracy)
# 6、切换为多层感知器
estimator = MLPClassifier(hidden_layer_sizes=128, batch_size=64, max_iter=1000, solver="adam")
estimator.fit(X_train, y_train)
y_predict = estimator.predict(X_test)
print("多层感知器--预测值：", y_predict)
error = mean_squared_error(y_test, y_predict)
print("多层感知器--均方误差为：", error)
accuracy = accuracy_score(y_test, y_predict)
print("多层感知器--精确度为：", accuracy)


        期号        开奖日期  开奖号码  试机号
0  2021009  2021-01-09   626  237
1  2021008  2021-01-08   382  212
2  2021007  2021-01-07   290  431
3  2021006  2021-01-06   733  620
4  2021005  2021-01-05   839  882
top 15: 
 [(369, 14), (827, 14), (746, 14), (20, 14), (667, 13), (669, 13), (612, 13), (573, 13), (960, 12), (784, 12), (899, 12), (209, 12), (393, 12), (139, 12), (626, 11)]
已中奖组合个数： 996
从未中过奖的： [25, 89, 92, 445]
(2838,)
岭回归--预测值： [489 503 495 ... 516 488 498]
岭回归--均方误差为： 82222.34848484848
岭回归--精确度为： 0.00035236081747709656
多层感知器--预测值： [128 951 950 ... 129 818 301]
多层感知器--均方误差为： 161962.2255109232
多层感知器--精确度为： 0.0010570824524312897
