In [1]:
import synonyms as syn
import json
import operator
import socket
import sys
import threading
import urlparse

In [18]:
class DataSet:
    Document_Key = 'd'
    Conversation_Key = 'c'
    ProductID_Key  = 'pid'
    Query_Key = 'q'
    Request_Path = '/'
    __answer_sheet = {
        'd0': '进水解决方案 ... 买过',
        'd1': 'some stories',
        'c0': '你好',
        'c1': '早上好'
    }
    def __init__(self, dataset=None, household_d=0.9, household_c=0.9):
        if dataset is not None and type(dataset) is not dict:
            raise TypeError("must be initialized by a object of dict type")
        self.setDataset(dataset)
        self.household_doc = household_d
        self.household_conv = household_c
    def setDataset(self, dataset):
        if dataset is not None:
            if type(dataset) is not dict:
                raise TypeError("must be initialized by a object of dict type")
            if self.Document_Key in dataset:
                self.docs = dataset[self.Document_Key]
            if self.Conversation_Key in dataset:
                self.convs = dataset[self.Conversation_Key]     

In [3]:
class Retrieval:
    def __mean(self, numbers):
        return float(sum(numbers)) / max(len(numbers), 1)
    
    def __max_id(self, items):
        return max(items.iteritems(), key=operator.itemgetter(1))[0]

    def evalQuery(self, q, dataset):
        if not hasattr(dataset, 'docs') or not hasattr(dataset, 'convs'):
            raise ValueError("setDataset shall be called first")
        if type(q) is not str and type(q) is not unicode:
            raise TypeError("query shall be type string")
        query_words = syn.seg(q)[0]
        
        # syn.compare(sen1, sen2, seg=False)
        doc_scores = {}
        conv_scores = {}
        self.doc_matches = {}
        self.conv_matches = {}
        for doc_id, keys in dataset.docs.iteritems():
            matches = [[syn.compare(keys[i], query_words[j], seg=False) for j in range(len(query_words))] for i in range(len(keys))]
            self.doc_matches[doc_id] = matches
            max_matches = [max(matches[i]) for i in range(len(matches))]
            score = self.__mean(max_matches)
            doc_scores[doc_id] = score    
        doc_id_best = self.__max_id(doc_scores)
        
        for conv_id, conv_s in dataset.convs.iteritems():
            matches = [syn.compare(conv_s, query_words[j], seg=False) for j in range(len(query_words))]
            self.conv_matches[conv_id] = matches
            score = max(matches)
            conv_scores[conv_id] = score
        conv_id_best = self.__max_id(conv_scores)
        
        #print doc_scores[doc_id_best], conv_scores[conv_id_best]
        if  doc_scores[doc_id_best] > dataset.household_doc:
            if conv_scores[conv_id_best] > dataset.household_conv: 
                return {dataset.Document_Key: doc_id_best, dataset.Conversation_Key: conv_id_best}
            return {dataset.Document_Key: doc_id_best}
        else :
            if conv_scores[conv_id_best] > dataset.household_conv:
                return {dataset.Conversation_Key: conv_id_best}
        return {}
    
    def evalQuerys(self, qs, dataset):
        if type(qs) is not list and type(qs) is not str and type(qs) is not unicode and type(qs) is dict and dataset.Query_Key not in dict:
            raise TypeError("the argument shall be a list object constituted by strings or a string")
        if type(qs) is str or type(qs) is unicode:
            qs = [qs]
        if type(qs) is dict:
            return self.evalQuerys(qs[dataset.Query_Key], dataset)
        return [self.evalQuery(qs[i], dataset) for i in range(len(qs))]
    
    def __ans(self, qs, dataset):
        ans_arr = self.evalQuerys(qs, dataset)
        ans_ss = []
        for i in range(len(qs)):
            ans = ans_arr[i]
            ans_s = ''
            if dataset.Conversation_Key in ans:
                ans_s += dataset._DataSet__answer_sheet[ans[dataset.Conversation_Key]]
            if dataset.Document_Key in ans:
                if ans_s is not '':
                    ans_s += ','
                ans_s += dataset._DataSet__answer_sheet[ans[dataset.Document_Key]]
            else :
                if ans_s is not '':
                    ans_s += ','
                ans_s += 'There is nothing i can help you, Sorry'
            ans_ss.append(ans_s)
        return {'a': ans_ss}

In [4]:
class HttpRequestParser:
    def __init__(self):
        self.reset()
    def reset(self):
        self.header = {}
        self.data = ''
        self.wait = ''
        self.method = ''
        self.request_url = ''
        self.http_version = ''
        self.got = False
    def check(self):
        if 'Content-Length' in self.header:
            CL = int(self.header['Content-Length'])
        else:
            CL = 0
        if len(self.data) == CL:
            return True
        if len(self.data) > CL:
            raise ValueError('unexpected data')
        return False
    def feed(self, data):
        if not self.got:
            data = self.wait + data
            while 1:
                pos = data.find('\r\n')
                if pos < 0:
                    self.wait = data
                    return False
                elif pos > 0:
                    simicolen = data[0:pos].find(':')
                    if simicolen < 0:
                        (self.method, self.request_url, self.http_version) = data[0:pos].split(' ')
                    else:
                        self.header[data[0:simicolen].strip()] = data[simicolen+1:pos].strip()
                    data = data[pos+2:]
                else:
                    self.got = True
                    self.data = data[pos+2:]
                    break
        else:
            self.data += data
        return self.check()

In [5]:
class Server:
    def __init__(self, host='', port=80, timeout=None):
        self.socket = socket.socket(
            socket.AF_INET, socket.SOCK_STREAM
        )
        try:
            self.socket.bind((HOST, PORT))
        except socket.error as msg:
            print 'Bind failed. Error Code: ' + str(msg[0]) + ' Message ' + msg[1]
            sys.exit()
        self.timeout = timeout
    def listen(self, number):
        self.socket.listen(number)
        print 'Socket listening up to ' + str(number)
        self.handler(self.socket, self.timeout)
    def setHandler(self, handler):
        self.handler = handler

In [6]:
def AutoAns(socket, timeout):
    dataset = DataSet()
    while 1:
        conn, addr = socket.accept()
        threading.Thread(target=Serve, args=(conn, timeout, dataset,)).start()        

In [20]:
def Serve(conn, timeout, dataset):
    parser = HttpRequestParser()
    ret = Retrieval()
    status_code = '404 Not Found'
    try:
        conn.settimeout(timeout)
        while 1:
            print 'Collecting fragment'
            frag = conn.recv(4096)
            if parser.feed(frag): break
        # print 'Connected with ' + addr[0] + ':' + str(addr[1])
        parse_res = urlparse.urlparse(parser.request_url)
        path = parse_res[2]
        queries = urlparse.parse_qs(parse_res[4])
        if dataset.Query_Key not in queries or dataset.ProductID_Key not in queries or path is not dataset.Request_Path:
            res_data = 'Bad Query Format'
            print queries
            print path
        else:
            q = queries['q'][0]
            pid = queries['pid'][0]
            print q, pid
            # res_data = json.dumps(ret._Retrieval__ans(q, pid, dataset))
            res_data = 'Success'
            status_code = '200 OK'
        '''
        json_data = json.loads(parser.data)
        dataset.setDataset(json_data)
        if dataset.Query_Key in json_data:
            query = json_data['q']
            res_data = json.dumps(ret._Retrieval__ans(query, dataset))
        else:
            res_data = ''
        '''
    except Exception as e:
        res_data = e.__class__.__name__ + ': ' + str(e)
    res_header = 'HTTP/1.1 ' + status_code + '\r\nContent-Length: ' + str(len(res_data)) + '\r\nConnection: close\r\n'
    res = res_header + '\r\n' + res_data
    conn.sendall(res)
    conn.close()
    parser.reset()

In [8]:
HOST = ''
PORT = 44442
server = Server(HOST, PORT, 0)
server.setHandler(AutoAns)

In [21]:
server.listen(5)

KeyboardInterrupt: 