In [1]:
import nltk
import random
import numpy as np

from bs4 import BeautifulSoup


# load the reviews
positive_reviews = BeautifulSoup(open('sorted_data_acl/electronics/positive.review', encoding='ISO-8859-1').read(), "lxml")
positive_reviews = positive_reviews.findAll('review_text')

In [2]:
# 提出 三連詞 並置入字典
# (w1, w3) 當作 key, [ w2 ] 當作值
trigrams = {}
for review in positive_reviews:
    s = review.text.lower()
    tokens = nltk.tokenize.word_tokenize(s)
    for i in range(len(tokens) - 2):
        k = (tokens[i], tokens[i+2])
        if k not in trigrams:
            trigrams[k] = []
        trigrams[k].append(tokens[i+1])

# 將中間字矩陣變成或然率向量
trigram_probabilities = {}
for k, words in (trigrams.items()):
    # 產生一個  word -> count 字典
    if len(set(words)) > 1:
        # 如果中間字middle word不只有一個機率 
        d = {}
        n = 0
        for w in words:
            if w not in d:
                d[w] = 0
            d[w] += 1
            n += 1
        for w, c in (d.items()):
            d[w] = float(c) / n
        trigram_probabilities[k] = d

In [3]:
def random_sample(d):
    # 從字典隨機選出一個帶機率值的樣本，回傳累積機率值最大的字
    r = random.random()
    cumulative = 0
    for w, p in (d.items()):
        cumulative += p
        if r < cumulative:
            return w

In [4]:
def test_spinner():
    review = random.choice(positive_reviews)
    s = review.text.lower()
    print("Original:", s)
    tokens = nltk.tokenize.word_tokenize(s)
    for i in range(len(tokens) - 2):
        if random.random() < 0.2: # 20% chance of replacement
            k = (tokens[i], tokens[i+2])
            if k in trigram_probabilities:
                w = random_sample(trigram_probabilities[k])
                tokens[i+1] = w
    print("Spun:")
    print(" ".join(tokens).replace(" .", ".").replace(" '", "'").replace(" ,", ",").replace("$ ", "$").replace(" !", "!"))

In [5]:
test_spinner()

Original: 
this phone has excellent voice quality, including in conference mode. the only reasons it doesn't get 5 stars from me is that :
a) it is unable to operate if there is a power outage - as opposed to perhaps just losing some of its functionality . just make sure it isn't the only phone in your home.
b) there is a bug which occasionnally renders it unable to operate. i can't figure out what causes it. it may be my cats that walk on it at night and press a weird sequence wit their paws :). the only cure i have found is to pull the power plug, which reboots it

Spun:
this system has excellent product quality, including in raw mode. the only reasons it does n't get 5 stars from me is that : a ) it is unable to operate if there is a power outage - as opposed to perhaps just losing some of its functionality. just be because it is n't the only phone in your home. b ) there is a bug which occasionnally renders it unable to operate. you ca n't figure out what causes it. it may be my ca