### 装饰器测试

In [1]:
original_price = [2, 5, 8, 11, 15, 20, 21, 23, 25, 26]

In [2]:
from collections import defaultdict

In [8]:
price_lookup = defaultdict(int)
for i in range(1,11,1):
    price_lookup[i] = original_price[i-1]
price_lookup

defaultdict(int,
            {1: 2,
             2: 5,
             3: 8,
             4: 11,
             5: 15,
             6: 20,
             7: 21,
             8: 23,
             9: 25,
             10: 26})

In [78]:
import numpy as np
import time
from functools import wraps

In [86]:
def print_time(func):
    @wraps(func)
    def wrapper(*args, **kwargs):
        '''count running time'''
        t1 = time.process_time()
        func(*args, **kwargs)
        t2 = time.process_time()
        print('函数运行{}秒。'.format(t2-t1))
    return wrapper    

In [91]:
@print_time
def get_best(lenth):
    '''获取最大切分利润。'''
    group = [(i, lenth-i) for i in range(0,lenth//2+1,1)]
    print(group)
    num = len(group)
    prices = np.arange(0,num,1)
    for i in range(num):
        prices[i] = price_lookup[group[i][0]] + price_lookup[group[i][1]]
    max_idx = np.argmax(prices)
    print('共有{}种切分方案，最佳方案为{}，最大利润为{}。'.format(num, group[max_idx],prices[max_idx]))      
    return prices[max_idx]

In [92]:
get_best(10)

[(0, 10), (1, 9), (2, 8), (3, 7), (4, 6), (5, 5)]
共有6种切分方案，最佳方案为(4, 6)，最大利润为31。
函数运行0.0秒。


In [85]:
# 装饰器中不加@wraps(func)时，打印出的函数信息：
get_best.__name__, get_best.__doc__

('wrapper', 'count running time')

In [88]:
# 装饰器中加@wraps(func)时，打印出的函数信息：
get_best.__name__, get_best.__doc__

('get_best', '获取最大切分利润。')

### 编辑距离

In [95]:
from collections.abc import Iterable

In [96]:
isinstance('asdf', Iterable)

True

In [102]:
c = {1:2, 3:4}
c

{1: 2, 3: 4}

In [103]:
c[5] = 7

In [104]:
c

{1: 2, 3: 4, 5: 7}

In [105]:
c[-1] = -2

In [106]:
c

{1: 2, 3: 4, 5: 7, -1: -2}

In [108]:
's'.join('dsd')

'dsssd'

In [109]:
's'+'jj'

'sjj'

In [111]:
'sjj'.replace('j','h',1)

'shj'

In [112]:
ss = ('sdd','sadf')

In [114]:
ss[0][:-1]

'sd'

### 拼音

In [123]:
chinese_dataset = 'input/article_9k.txt'

In [126]:
CHINESE_CHARATERS = open(chinese_dataset, encoding='utf-8').read()

In [127]:
CHINESE_CHARATERS[:40]

'此外自本周6月12日起除小米手机6等15款机型外其余机型已暂停更新发布含开发版体'

In [134]:
import pinyin
import re

In [131]:
pinyin.get('EOS')

'EOS'

In [136]:
f = open(chinese_dataset, encoding='utf-8').readlines()
f[:3]

['此外自本周6月12日起除小米手机6等15款机型外其余机型已暂停更新发布含开发版体验版内测稳定版暂不受影响以确保工程师可以集中全部精力进行系统优化工作有人猜测这也是将精力主要用到MIUI9的研发之中MIUI8去年5月发布距今已有一年有余也是时候更新换代了当然关于MIUI9的确切信息我们还是等待官方消息\n',
 '骁龙835作为唯一通过Windows10桌面平台认证的ARM处理器高通强调不会因为只考虑性能而去屏蔽掉小核心相反他们正联手微软找到一种适合桌面平台的兼顾性能和功耗的完美方案报道称微软已经拿到了一些新的源码以便Windows10更好地理解biglittle架构资料显示骁龙835作为一款集成了CPUGPU基带蓝牙WiFi的SoC比传统的Wintel方案可以节省至少30的PCB空间按计划今年Q4华硕惠普联想将首发骁龙835Win10电脑预计均是二合一形态的产品当然高通骁龙只是个开始未来也许还能见到三星Exynos联发科华为麒麟小米澎湃等进入Windows10桌面平台\n',
 '此前的一加3T搭载的是3400mAh电池DashCharge快充规格为5V4A至于电池缩水可能与刘作虎所说一加手机5要做市面最轻薄大屏旗舰的设定有关按照目前掌握的资料一加手机5拥有55寸1080P三星AMOLED显示屏6G8GBRAM64GB128GBROM双1600万摄像头备货量惊喜根据京东泄露的信息一加5起售价是xx99元应该是在279928992999中的某个\n']

In [163]:
tokens = [] # 2d的list，存放每个段落的拼音
for line in f[:2]:
    text = pinyin.get(line, format='strip', delimiter=' ')
    token_list = re.findall('[a-z]+', text.lower())
    token_list = [token for token in token_list if len(token)>1]
    tokens.extend(token_list)
print(tokens)

['ci', 'wai', 'zi', 'ben', 'zhou', 'yue', 'ri', 'qi', 'chu', 'xiao', 'mi', 'shou', 'ji', 'deng', 'kuan', 'ji', 'xing', 'wai', 'qi', 'yu', 'ji', 'xing', 'yi', 'zan', 'ting', 'geng', 'xin', 'fa', 'bu', 'han', 'kai', 'fa', 'ban', 'ti', 'yan', 'ban', 'nei', 'ce', 'wen', 'ding', 'ban', 'zan', 'bu', 'shou', 'ying', 'xiang', 'yi', 'que', 'bao', 'gong', 'cheng', 'shi', 'ke', 'yi', 'ji', 'zhong', 'quan', 'bu', 'jing', 'li', 'jin', 'xing', 'xi', 'tong', 'you', 'hua', 'gong', 'zuo', 'you', 'ren', 'cai', 'ce', 'zhe', 'ye', 'shi', 'jiang', 'jing', 'li', 'zhu', 'yao', 'yong', 'dao', 'de', 'yan', 'fa', 'zhi', 'zhong', 'qu', 'nian', 'yue', 'fa', 'bu', 'ju', 'jin', 'yi', 'you', 'yi', 'nian', 'you', 'yu', 'ye', 'shi', 'shi', 'hou', 'geng', 'xin', 'huan', 'dai', 'le', 'dang', 'ran', 'guan', 'yu', 'de', 'que', 'qie', 'xin', 'xi', 'wo', 'men', 'huan', 'shi', 'deng', 'dai', 'guan', 'fang', 'xiao', 'xi', 'xiao', 'long', 'zuo', 'wei', 'wei', 'yi', 'tong', 'guo', 'zhuo', 'mian', 'ping', 'tai', 'ren', 'zheng', 

In [158]:
from collections import defaultdict, Counter

In [164]:
cnt = Counter(tokens)

In [166]:
cnt['yi']

13

In [174]:
file = open('input/pinyin.dict','w')
for key,val in cnt.items():
    file.write('{} {}\n'.format(key,val))
file.close()

In [167]:
import jieba

In [176]:
jieba.load_userdict('input/pinyin.dict')

In [178]:
jieba.lcut('cagiyunyici')

['cagi', 'yun', 'yi', 'ci']

In [24]:
import sys
sys.path.append('..')

In [25]:
from SentenceGeneration.twogram import TwoGrams

In [26]:
sys.path

['C:\\Users\\Administrator\\PycharmProjects\\spell_correction',
 'C:\\ProgramData\\Anaconda3\\envs\\Py37\\python37.zip',
 'C:\\ProgramData\\Anaconda3\\envs\\Py37\\DLLs',
 'C:\\ProgramData\\Anaconda3\\envs\\Py37\\lib',
 'C:\\ProgramData\\Anaconda3\\envs\\Py37',
 '',
 'C:\\ProgramData\\Anaconda3\\envs\\Py37\\lib\\site-packages',
 'C:\\ProgramData\\Anaconda3\\envs\\Py37\\lib\\site-packages\\win32',
 'C:\\ProgramData\\Anaconda3\\envs\\Py37\\lib\\site-packages\\win32\\lib',
 'C:\\ProgramData\\Anaconda3\\envs\\Py37\\lib\\site-packages\\Pythonwin',
 'C:\\ProgramData\\Anaconda3\\envs\\Py37\\lib\\site-packages\\IPython\\extensions',
 'C:\\Users\\Administrator\\.ipython',
 '..']

In [28]:
from edit_distance import decorator

In [185]:
a = TwoGrams()

In [186]:
type(a)

SentenceGeneration.twogram.TwoGrams

In [1]:
import pandas as pd

In [2]:
pd.Series([[1],[2],[3]])

0    [1]
1    [2]
2    [3]
dtype: object

In [19]:
t = ['s','sd']

In [20]:
t.insert(0,'aa')

In [21]:
t

['aa', 's', 'sd']

In [22]:
t.append('eos')
t

['aa', 's', 'sd', 'eos']

In [23]:
''.join(['s','d'])

'sd'