<a href="https://colab.research.google.com/github/cxyxk/project/blob/master/xk_com.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install tensorflow
!pip install numpy
!pip install pandas
!pip install scikit-learn
!pip install matplotlib
!pip install requests



In [None]:
pip install pandas scikit-learn tensorflow requests



In [None]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.1.0-py3-none-any.whl.metadata (16 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.14.0-py3-none-any.whl.metadata (7.4 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.8-py3-none-any.whl.metadata (2.9 kB)
Downloading optuna-4.1.0-py3-none-any.whl (364 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m364.4/364.4 kB[0m [31m12.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.14.0-py3-none-any.whl (233 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.5/233.5 kB[0m [31m17.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Downloading Mako-1.3.8-py3-none-any.whl (78 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.6/78.6 kB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: M

In [None]:

import requests
import numpy as np
import pandas as pd
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, LSTM, Dropout, MultiHeadAttention, LayerNormalization, Input, Concatenate, GlobalAveragePooling1D, BatchNormalization
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import LearningRateScheduler
from sklearn.preprocessing import OneHotEncoder

# 1. 从 API 获取数据
def fetch_data_from_api(year):
    url = f"https://history.macaumarksix.com/history/macaujc2/y/{year}"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        if data['result']:
            return data['data']
        else:
            print(f"API 返回失败信息: {data['message']}")
            return []
    else:
        print(f"请求失败，HTTP 状态码: {response.status_code}")
        return []

# 2. 转换 API 数据为 DataFrame 格式并调整顺序
def transform_api_data(api_data):
    records = []
    for item in api_data:
        open_code = list(map(int, item['openCode'].split(',')))
        records.append({
            "期号": item["expect"],
            "特别号码": open_code[-1],
            "正码": open_code[:-1],
            "开盘时间": item["openTime"],
            "波色": item["wave"].split(','),
            "生肖": item["zodiac"].split(',')
        })
    df = pd.DataFrame(records)
    df = df.sort_values(by="期号", ascending=True).reset_index(drop=True)
    return df

# 3. 数据增强
def enhance_features(data):
    data['特别号码尾数'] = data['特别号码'] % 10
    data['红波数'] = data['波色'].apply(lambda x: x.count('red'))
    data['蓝波数'] = data['波色'].apply(lambda x: x.count('blue'))
    data['绿波数'] = data['波色'].apply(lambda x: x.count('green'))

    # 冷热号特征
    热号矩阵 = np.zeros((len(data), 49))
    for i in range(1, 50):
        recent_count = data['特别号码'].rolling(10).apply(lambda x: sum(x == i), raw=True).fillna(0)
        热号矩阵[:, i - 1] = recent_count

    热号_df = pd.DataFrame(热号矩阵, columns=[f'冷热号_{i}' for i in range(1, 50)])

    # 区间特征
    区间_df = pd.DataFrame({
        '区间_1_10': data['特别号码'].apply(lambda x: 1 if 1 <= x <= 10 else 0),
        '区间_11_20': data['特别号码'].apply(lambda x: 1 if 11 <= x <= 20 else 0),
        '区间_21_30': data['特别号码'].apply(lambda x: 1 if 21 <= x <= 30 else 0),
        '区间_31_40': data['特别号码'].apply(lambda x: 1 if 31 <= x <= 40 else 0),
        '区间_41_49': data['特别号码'].apply(lambda x: 1 if 41 <= x <= 49 else 0),
    })

    # 贝叶斯特征
    total_count = len(data)
    for i in range(1, 50):
        count_i = (data['特别号码'] == i).sum()
        data[f'贝叶斯_{i}'] = (count_i / total_count) * (1 / 49)

    # 合并特征
    data = pd.concat([data.reset_index(drop=True), 热号_df, 区间_df], axis=1)
    return data

# 4. 准备数据
def prepare_data(data, time_steps=10, validation_periods=100):
    onehot_encoder = OneHotEncoder(categories='auto', sparse_output=False)
    onehot_encoded = onehot_encoder.fit_transform(data['特别号码'].values.reshape(-1, 1))

    X, y, periods = [], [], []
    for i in range(len(onehot_encoded) - time_steps):
        X.append(onehot_encoded[i:i + time_steps])
        y.append(onehot_encoded[i + time_steps])
        periods.append(data.iloc[i + time_steps]["期号"])

    X_train = X[:-validation_periods]
    y_train = y[:-validation_periods]
    X_val = X[-validation_periods:]
    y_val = y[-validation_periods:]
    periods_val = periods[-validation_periods:]

    return np.array(X_train), np.array(y_train), np.array(X_val), np.array(y_val), periods_val

# 5. 构建混合模型
def build_model(input_shape):
    lstm_input = Input(shape=input_shape)
    lstm_output = LSTM(512, return_sequences=True, dropout=0.3)(lstm_input)
    lstm_output = BatchNormalization()(lstm_output)
    lstm_output = LSTM(256, return_sequences=False, dropout=0.3)(lstm_output)
    lstm_output = BatchNormalization()(lstm_output)

    transformer_output = MultiHeadAttention(num_heads=8, key_dim=128)(lstm_input, lstm_input)
    transformer_output = GlobalAveragePooling1D()(transformer_output)

    merged = Concatenate()([lstm_output, transformer_output])
    final_output = Dense(49, activation='softmax', kernel_regularizer=l2(0.01))(merged)

    model = Model(inputs=lstm_input, outputs=final_output)
    model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
    return model

# 6. 动态学习率
def lr_schedule(epoch):
    lr = 0.001
    if epoch > 50:
        lr *= 0.1
    if epoch > 100:
        lr *= 0.01
    return lr

# 7. 验证逻辑
def calculate_accuracy(predicted, actual, periods, n=16):
    correct = 0
    results = []
    for i in range(len(actual)):
        top_n = np.argsort(predicted[i])[-n:][::-1] + 1
        is_hit = actual[i] in top_n
        if is_hit:
            correct += 1
        results.append({
            "期号": periods[i],
            "预测前 16 特别号码": list(top_n),
            "实际特别号码": actual[i],
            "命中": "是" if is_hit else "否"
        })
    accuracy = correct / len(actual)
    return accuracy, results

# 8. 下一期预测
def predict_next_issue(model, data, time_steps=10):
    onehot_encoder = OneHotEncoder(categories='auto', sparse_output=False)
    onehot_encoded = onehot_encoder.fit_transform(data['特别号码'].values.reshape(-1, 1))
    latest_data = onehot_encoded[-time_steps:].reshape(1, time_steps, -1)
    predicted = model.predict(latest_data)
    top_16 = np.argsort(predicted[0])[-16:][::-1] + 1
    last_issue = int(data.iloc[-1]['期号'])
    next_issue = last_issue + 1
    return next_issue, top_16

# 9. 主程序
if __name__ == "__main__":
    year = 2024
    api_data = fetch_data_from_api(year)
    if not api_data:
        print("未获取到有效数据，退出程序")
        exit()

    data = transform_api_data(api_data)
    data = enhance_features(data)

    # 准备数据
    time_steps = 10
    validation_periods = 100
    X_train, y_train, X_val, y_val, periods_val = prepare_data(data, time_steps, validation_periods)

    # 构建模型
    model = build_model(X_train.shape[1:])
    lr_scheduler = LearningRateScheduler(lr_schedule)
    model.fit(X_train, y_train, epochs=360, batch_size=64, validation_split=0.2, callbacks=[lr_scheduler])

    # 验证准确率
    y_val_actual = np.argmax(y_val, axis=1) + 1
    y_val_predicted = model.predict(X_val)
    accuracy, results = calculate_accuracy(y_val_predicted, y_val_actual, periods_val, n=16)

    print(f"\n验证集命中率：{accuracy * 100:.2f}%")
    for result in results:
        print(result)

    # 下一期预测
    next_issue, next_issue_prediction = predict_next_issue(model, data, time_steps)
    print(f"\n预测下一期期号：{next_issue}")
    print(f"预测的前 16 个特别号码：{next_issue_prediction}")

Epoch 1/300
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 466ms/step - accuracy: 0.0076 - loss: 5.4067 - val_accuracy: 0.0000e+00 - val_loss: 4.6989 - learning_rate: 0.0010
Epoch 2/300
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 272ms/step - accuracy: 0.2271 - loss: 3.9432 - val_accuracy: 0.0000e+00 - val_loss: 4.6779 - learning_rate: 0.0010
Epoch 3/300
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 269ms/step - accuracy: 0.1803 - loss: 3.6793 - val_accuracy: 0.0000e+00 - val_loss: 4.6621 - learning_rate: 0.0010
Epoch 4/300
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 265ms/step - accuracy: 0.2157 - loss: 3.4955 - val_accuracy: 0.0000e+00 - val_loss: 4.6511 - learning_rate: 0.0010
Epoch 5/300
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 268ms/step - accuracy: 0.3067 - loss: 3.2220 - val_accuracy: 0.0000e+00 - val_loss: 4.6416 - learning_rate: 0.0010
Epoch 6/300
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[