In [2]:
import numpy as np
#Import Flask modules
from flask import Flask, request, render_template

# 需要用到的库
import pickle
from nltk.tokenize import RegexpTokenizer
from nltk.stem import WordNetLemmatizer,PorterStemmer
from nltk.corpus import stopwords
import re

In [None]:
#Initialize Flask and set the template folder to "template"
app = Flask(__name__, template_folder = 'template')

# 建立一个class类
class Data_Process:
    
    # 初始化class
    def __init__(self):
        self.categories = ['Kitchens', 'Electronics', 'Sports', 'Cloths', 'Movies']
        return
    
    # 加载模型，处理数据，进行预测流程，最终返回预测结果
    def process(self, text, category='Kitchens'):
        self.text = text
        self.category = category
        self.load_model()
        self.transform_data()
        self.make_prediction()
        return self.fake_prob
    
    
    # 加载对应模型和transformer
    def load_model(self):
        path = './models/' +  self.category + ".pkl" 
        with open(path, 'rb') as file:  
            model = pickle.load(file)
        file.close()
        
        path = './transformers/' +  self.category + ".pickle" 
        with open(path, 'rb') as file:  
            transformer = pickle.load(file)
        file.close()
        
        self.model = model
        self.transformer = transformer
        return
    
    # 对数据进行清洗转换
    def transform_data(self):
        sentence=str(self.text)
    
        # lower case
        sentence = sentence.lower()

        # remove special characters
        sentence=sentence.replace('{html}',"") 
        cleanr = re.compile('<.*?>')
        cleantext = re.sub(cleanr, '', sentence)
        rem_url=re.sub(r'http\S+', '',cleantext)
        rem_num = re.sub('[0-9]+', '', rem_url)

        # tokenization
        tokenizer = RegexpTokenizer(r'\w+')
        tokens = tokenizer.tokenize(rem_num)  
        
        # remove stopwords
        filtered_words = [w for w in tokens if len(w) > 2 if not w in stopwords.words('english')]
        #filtered_words = [w for w in tokens if len(w) > 2]
        
        
        lemmatizer = WordNetLemmatizer()
        stemmer = PorterStemmer() 
        # stemming and lemmatization
        stem_words=[stemmer.stem(w) for w in filtered_words]
        #stem_words = filtered_words
        lemma_words=[lemmatizer.lemmatize(w) for w in stem_words]
    
        cleansed_sentence = " ".join(lemma_words)
        self.transformed_data = self.transformer.transform([cleansed_sentence])
        return
    
    # 进行fake review 可能性预测
    def make_prediction(self):
        self.fake_prob = round(self.model.predict_proba(self.transformed_data)[0][1], 2)
        #print(self.model.classes_)
        return    


#create our "home" route using the "index.html" page
@app.route('/')
def home():
    return render_template('index.html')


#Set a post method to yield predictions on page
@app.route('/', methods = ['POST'])
def predict():
    
    #obtain all form values and place them in an array, convert into integers
    # int_features = [int(x) for x in request.form.values()]
    to_predict_list = request.form.to_dict()
    to_predict_list = list(to_predict_list.values())
    to_predict_list = list(map(str, to_predict_list))
    
    
    category = to_predict_list[0]
    user_text = to_predict_list[1]
    d = Data_Process()
    output = d.process(user_text, category)
    
    
    #If the output is negative, the values entered are unreasonable to the context of the application
    #If the output is greater than 0, return prediction
    if output < 0:
        return render_template('index.html', prediction_text = "Predicted probability is negative, values entered not reasonable")
    elif output >= 0:
        return render_template('index.html', prediction_text = '{:.0%}'.format(output))   

    
#Run app
if __name__ == "__main__":
    app.run(debug=True)