<a href="https://colab.research.google.com/github/cu7th0n/ssq/blob/master/3D.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:
import requests
from bs4 import BeautifulSoup
import xlwt
import time

#获取第一页的内容
def get_one_page(url):
    headers = {
        'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36'
    }
    response = requests.get(url,headers=headers)
    if response.status_code == 200:
        return response.text
    return 

#解析第一页内容，数据结构化
def parse_one_page(html):

    soup = BeautifulSoup(html,'lxml')
    i = 0
    for item in soup.select('tr')[2:-1]:

        yield{
            'time':item.select('td')[i].text,
            'digit_1':item.select('td em')[0].text,
            'digit_2':item.select('td em')[1].text,
            'digit_3':item.select('td em')[2].text
        }

#将数据写入Excel表格中
def write_to_excel():
    f = xlwt.Workbook()                             
    sheet1 = f.add_sheet('ssq',cell_overwrite_ok=True)
    row0 = ["date","digit_1","digit_2","digit_3"]
    #写入第一行
    for j in range(0,len(row0)):
        sheet1.write(0,j,row0[j])

    #依次爬取每一页内容的每一期信息，并将其依次写入Excel
    i=0
    for k in range(1,257):
        url = 'http://kaijiang.zhcw.com/zhcw/html/3d/list_%s.html' %(str(k))
        html = get_one_page(url)
        
        #写入每一期的信息
        for item in parse_one_page(html):
            sheet1.write(i+1,0,item['time'])
            sheet1.write(i+1,1,item['digit_1'])
            sheet1.write(i+1,2,item['digit_2'])
            sheet1.write(i+1,3,item['digit_3'])
            i+=1
    
    f.save('3d.xls')
    print('%d页已保存。'%k)
    
def main():
    write_to_excel()

if __name__ == '__main__':
    main()

256页已保存。


In [0]:
import pandas as pd
import numpy as np
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from keras.models import load_model


data = pd.read_excel('3d.xls')

data['date'] = pd.to_datetime(data['date'])
data = data.sort_values(by = 'date')
data.reset_index(inplace=True)
del data['index']
del data['date']

D_1 = data['digit_1']
D_2 = data['digit_2']
D_3 = data['digit_3']

models = ['M1_model.h5', 'M2_model.h5', 'M3_model.h5']
tdatas = [D_1, D_2, D_3]



def create_interval_dataset(dataset, look_back):
    """
    :param dataset: input array of time intervals
    :param look_back: each training set feature length
    :return: convert an array of values into a dataset matrix.
    """
    dataX, dataY = [], []
    for i in range(len(dataset) - look_back):
        dataX.append(dataset[i:i+look_back])
        dataY.append(dataset[i+look_back])
    return np.asarray(dataX), np.asarray(dataY)

  
def train_model(train_set,mname,look_back = 200,data_dim = 11,batch_size = 1):

  timesteps = look_back
 
  # Expected input batch shape: (batch_size, timesteps, data_dim)
  # Note that we have to provide the full batch_input_shape since the network is stateful.
  # the sample of index i in batch k is the follow-up for the sample i in batch k-1.
  model = Sequential()
  model.add(LSTM(data_dim, return_sequences=True, stateful=True,
               batch_input_shape=(batch_size, timesteps, data_dim)))
  model.add(LSTM(data_dim*2, return_sequences=True, stateful=True))
  model.add(Dropout(0.3))
  
  model.add(LSTM(data_dim*4, return_sequences=True, stateful=True))
  model.add(Dropout(0.3))
  
  model.add(LSTM(data_dim*8, return_sequences=True, stateful=True))
  model.add(Dropout(0.3))

  model.add(LSTM(data_dim*8, stateful=True))
  model.add(Dense(data_dim, activation='softmax'))

  model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

  df = to_categorical(train_set,data_dim)
  
  dataX, dataY = create_interval_dataset(df, look_back) 
  
  total = len(train_set)
  split = total*8//10

  X_train = dataX[:split]
  y_train = dataY[:split]

  X_val = dataX[split+1:total-1]
  y_val = dataY[split+1:total-1]
  
  model.fit(X_train, y_train,batch_size=batch_size, epochs=1,
            shuffle=False,validation_data=(X_val, y_val))
  model.save(mname)


In [9]:
look_back=300
for (model,tdata) in zip(models,tdatas):
  train_model(tdata,model,look_back)

Train on 4096 samples, validate on 723 samples
Epoch 1/1
Train on 4096 samples, validate on 723 samples
Epoch 1/1
Train on 4096 samples, validate on 723 samples
Epoch 1/1


In [12]:
for (model,tdata) in zip(models,tdatas):
    
    print(model)
    M_ssq = load_model(model)

    test_X = to_categorical(np.asarray([tdata[-look_back:]]),num_classes=11)

    pred = M_ssq.predict(test_X)
    ranks = np.argsort(pred[0])
    
    for i in range(1,11):
        print(str(ranks[-i]) + ' : %.2f%%' %(pred[0][ranks[-i]]*100))
    print('\n')

M1_model.h5
8 : 13.20%
3 : 12.86%
1 : 12.39%
2 : 11.55%
6 : 9.82%
7 : 9.04%
0 : 8.93%
4 : 8.74%
9 : 7.99%
5 : 5.48%


M2_model.h5
5 : 11.47%
8 : 10.76%
9 : 10.72%
0 : 10.42%
1 : 10.25%
4 : 10.00%
7 : 9.99%
6 : 9.15%
2 : 9.00%
3 : 8.20%


M3_model.h5
9 : 15.28%
3 : 11.97%
8 : 11.30%
0 : 10.96%
4 : 10.65%
7 : 9.85%
5 : 8.50%
6 : 8.04%
1 : 7.09%
2 : 6.36%


