In [1]:

class Document():
    def __init__(self, title, author, context):
        print('init function called')
        self.title = title
        self.author = author
        self.__context = context # __开头的属性是私有属性

    def get_context_length(self):
        return len(self.__context)

    def intercept_context(self, length):
        self.__context = self.__context[:length]

In [3]:
harry_potter_book = Document('Harry Potter', 'J. K. Rowling', '... Forever Do not believe any thing is capable of thinking independently ...')

print(harry_potter_book.title)
print(harry_potter_book.author)
print(harry_potter_book.get_context_length())

harry_potter_book.intercept_context(10)
print(harry_potter_book.get_context_length())

init function called
Harry Potter
J. K. Rowling
77
10


In [4]:

class Document():
    
    WELCOME_STR = 'Welcome! The context for this book is {}.'
    
    def __init__(self, title, author, context):
        print('init function called')
        self.title = title
        self.author = author
        self.__context = context
    
    # 类函数
    @classmethod
    def create_empty_book(cls, title, author):
        return cls(title=title, author=author, context='nothing')
    
    # 成员函数
    def get_context_length(self):
        return len(self.__context)
    
    # 静态函数
    @staticmethod
    def get_welcome(context):
        return Document.WELCOME_STR.format(context)

In [5]:

empty_book = Document.create_empty_book('What Every Man Thinks About Apart from Sex', 'Professor Sheridan Simove')

print(empty_book.get_context_length())
print(empty_book.get_welcome('indeed nothing'))

init function called
7
Welcome! The context for this book is indeed nothing.


# 继承

In [6]:

class Entity():
    def __init__(self, object_type):
        print('parent class init called')
        self.object_type = object_type
    
    def get_context_length(self):
        raise Exception('get_context_length not implemented')
    
    def print_title(self):
        print(self.title)

class Document(Entity):
    def __init__(self, title, author, context):
        print('Document class init called')
        Entity.__init__(self, 'document')
        self.title = title
        self.author = author
        self.__context = context
    
    def get_context_length(self):
        return len(self.__context)
    
class Video(Entity):
    def __init__(self, title, author, video_length):
        print('Video class init called')
        Entity.__init__(self, 'video')
        self.title = title
        self.author = author
        self.__video_length = video_length
    
    def get_context_length(self):
        return self.__video_length


In [7]:

harry_potter_book = Document('Harry Potter(Book)', 'J. K. Rowling', '... Forever Do not believe any thing is capable of thinking independently ...')
harry_potter_movie = Video('Harry Potter(Movie)', 'J. K. Rowling', 120)

print(harry_potter_book.object_type)
print(harry_potter_movie.object_type)

harry_potter_book.print_title()
harry_potter_movie.print_title()

print(harry_potter_book.get_context_length())
print(harry_potter_movie.get_context_length())

Document class init called
parent class init called
Video class init called
parent class init called
document
video
Harry Potter(Book)
Harry Potter(Movie)
77
120


# 抽象类
* 抽象类是一种特殊的类，它生下来就是作为父类存在的，一旦对象化就会报错。同样，抽象函数定义在抽象类之中，子类必须重写该函数才能使用。相应的抽象函数，则是使用装饰器 @abstractmethod 来表示。

In [8]:

from abc import ABCMeta, abstractmethod

class Entity(metaclass=ABCMeta):
    @abstractmethod
    def get_title(self):
        pass

    @abstractmethod
    def set_title(self, title):
        pass

class Document(Entity):
    def get_title(self):
        return self.title
    
    def set_title(self, title):
        self.title = title

In [10]:
document = Document()
document.set_title('Harry Potter')
print(document.get_title())

Harry Potter


In [11]:
entity = Entity() # 有通过 Document 继承 Entity 才能正常使用

TypeError: Can't instantiate abstract class Entity with abstract methods get_title, set_title

# 一个搜索引擎由搜索器、索引器、检索器和用户接口四个部分组成
* 搜索器，通俗来讲就是我们常提到的爬虫（scrawler），它能在互联网上大量爬取各类网站的内容，送给索引器。
* 索引器拿到网页和内容后，会对内容进行处理，形成索引（index），存储于内部的数据库等待检索。
* 用户接口是指网页和 App 前端界面，例如百度和谷歌的搜索页面。用户通过用户接口，向搜索引擎发出询问（query），询问解析后送达检索器；
* 检索器高效检索后，再将结果返回给用户。

In [12]:
class SearchEngineBase(object) : 
    def __init__(self):
        pass
    
    # 负责读取文件内容，将文件路径作为 ID，连同内容一起送到 process_corpus 中
    def add_corpus(self, file_path): 
        with open(file_path, 'r') as fin:
            text = fin.read() 
        self.process_corpus(file_path, text)
    
    # 对内容进行处理，然后文件路径为 ID ，将处理后的内容存下来。处理后的内容，就叫做索引（index）
    def process_corpus(self, id, text): 
        raise Exception('process_corpus not implemented.')
    
    # 给定一个询问，处理询问，再通过索引检索，然后返回
    def search(self, query): 
        raise Exception('search not implemented.')

# main() 函数提供搜索器和用户接口
def main (search_engine):
    for file_path in ['./08检索/1.txt', './08检索/2.txt', './08检索/3.txt', './08检索/4.txt', './08检索/5.txt']:
        search_engine.add_corpus(file_path)
    while True:
        query = input()
        results = search_engine.search(query)
        print('found {} result(s):'.format(len(results)))
        for result in results:
            print(result)

In [13]:
class SimpleEngine(SearchEngineBase) :
    def __init__(self):
        super(SimpleEngine, self).__init__()
        self.__id_to_texts = {}
    
    def process_corpus(self, id, text):
        self.__id_to_texts[id] = text

    def search(self, query):
        results = []
        for id, text in self.__id_to_texts.items():
            if query in text:
                results.append(id)
        return results

In [14]:
search_engine = SimpleEngine()

In [11]:
# main(search_engine)

found 0 result(s):
