In [35]:
import time
import word_segmentation as ws
import news_similarity_class as ns
import extract_stock_code_class as sc


class NewsLabel():
    '''
    该类作为新闻标记数据的功能载体，实现以下四个主要功能：
        1. 新闻重复标记(is_recur)
        2. 新闻包含股票名/股票代码识别标记(in_stock)
        3. 新闻关联板块推断标记(infer_plate)
        4. 新闻关联股票推断标记(infer_stock)
    
    Attributes:
    
    news_1h: a list of news to compare recurrence with
    stock_id_dict: a dictionary of stock:id 
    id_stock_dict: a dictionary of id:stock
    LDA_model: a lda model object of LDA model
    RF_model: a random forest model object of RF model
    plate_stock_dict: a dictionary of plate:[stock1,2,3]
 
    '''
    
    def __init__(self):
        time_start = time.time()
        self.seg = ws.WordSegmentation()
        print("segmentation initialized in",time.time()-time_start)
#         time_start = time.time()
#         self.sim = ns.NewsSimilarity()
#         print("news similarity initialized in",time.time()-time_start)
        time_start = time.time()
        self.stock = sc.ExtractStockCode()
        print("extract stockcode initialized in",time.time()-time_start)
        
    def news_label(self,news_id,news_text):
        '''
        input: news text as string
        output: dictionary of four labels
        '''
        news_seg = self.seg.word_segmentation(news_text)
#         is_recur = sim.news_similarity(news_seg)
#         sim.add_news(news_seg)
#         print(is_recur)
        in_stock = self.stock.extract_stock_code(news_seg)
        
        ret_labels = {"id":news_id,"is_recur":True,"in_stock":in_stock}
        return ret_labels


In [36]:
from flask import Flask
from flask import request
import json

app = Flask(__name__)

print("Initializing...")
analyzer = NewsLabel()
print("Now let's post!")

@app.route('/')
def hello_world():
    return 'Hello World!'

@app.route('/data/v1.0/getLabels/', methods=['POST'])
def getLabels():
    news_id = request.form['id']
    news_text = request.form['title'] + request.form['content']
    ret_labels = analyzer.news_label(news_id, news_text)
    return json.dumps(ret_labels)
    

if __name__ == '__main__':
    app.run("0.0.0.0")

 * Running on http://0.0.0.0:5000/ (Press CTRL+C to quit)


Initializing...
segmentation initialized in 0.003386259078979492
extract stockcode initialized in 0.003414630889892578
Now let's post!


10.200.44.212 - - [08/May/2018 16:51:17] "POST /data/v1.0/getLabels/ HTTP/1.1" 200 -


text: 新浪财经讯 3月12日消息海航基础（600515）3月12日晚间公告，孙公司海航地产拟与海南融创昌晟签订《股权转让协议》，出售海航地产所持有的海岛物流100%的股权，转让价款约7.97亿元；同时，海航地产拟出售所持有的海南高和房地产开发有限公司100%的股权至海南融创昌晟，转让价款约11.36亿元。责任编辑：张恒"


In [9]:
import json
ret_labels = {"id":123,"repeated":True,"in_stock":"01002,01003,01004","plate":"雄安，电子商务，房地产","ref_stock":"01002,01005,01006"}
json.dumps(ret_labels)

'{"id": 123, "in_stock": "01002,01003,01004", "plate": "\\u96c4\\u5b89\\uff0c\\u7535\\u5b50\\u5546\\u52a1\\uff0c\\u623f\\u5730\\u4ea7", "ref_stock": "01002,01005,01006", "repeated": true}'