In [2]:

from bs4 import BeautifulSoup
from os.path import expanduser, isfile

from gensim import models
import warnings
import numpy as np
import re,math
from keras.preprocessing import sequence
from keras.preprocessing.text import Tokenizer
from keras.layers.core import Flatten,Reshape,Dropout,Dense,Activation
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.models import Sequential, Model
from keras.engine import Input
from keras.engine.topology import Merge
from keras.regularizers import l2
from keras.layers.embeddings import Embedding
from keras.callbacks import TensorBoard, ModelCheckpoint
import pandas as pd
import dateparser
import pickle
warnings.filterwarnings('ignore')

emotion_map={"anger":"😾", "enjoyment":"😺", "fear":"🙀", "disgust":"😼", "sadness":"😿"}




word2vec_file='word2vec'
tokenizer_file='tokenizer.pickle'

from nltk.stem.snowball import SnowballStemmer 
stemmer = SnowballStemmer("russian")
def stem(word):return stemmer.stem(word)

def split_sentence(sentence):
    if sentence is None:
        return None
    url_regex =r"(\bhttps?://)?[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]"
    sentence=re.sub(url_regex , "" , sentence)
    sentence=sentence.replace("?" , " questionmark ")
    sentence=sentence.replace("!" , " exclamationmark ")
    sentence=re.sub(r"\){2,}" , " megasmilemoji ",sentence)
    sentence=sentence.replace(")" , " smilemoji ")
    sentence=sentence.lower()
    words = re.split("[\W\d_\"']" , sentence)
    words = filter(lambda item: 10>len(item)>1, words)
    return [stem(word) for word in words]




Using TensorFlow backend.


In [3]:

if not isfile(tokenizer_file):
    file_path = expanduser("~")+"/Downloads/facebook-chroneus/html/messages.htm" 
    soup = BeautifulSoup(open(file_path),'html.parser')
    sentences=[]
    names =["Сергей Левченко","1145575406@facebook.com"]
    with open('text.csv', 'w') as messages:
        for val in reversed(soup.select('div.message_header span.user')):
            text=val.parent.parent.next_sibling.text.replace('\n', ' ')
            if len(text)>1:
                text=bytes(text, 'utf-8').decode('utf-8','ignore')
                sentences.append(" ".join(split_sentence(text)))
                if val.text in names:
                    date=dateparser.parse(val.next_sibling.text.split("UTC")[0])
                    messages.write ('"'+text+'",'+str(date)+',\n')

    w2v_model = models.Word2Vec([sentence.split() for sentence in sentences],
                         min_count=1, workers=4, size=200, iter=10, sg=1, window=10)
    w2v_model.save(word2vec_file)
    w2v_model.vocab
    print (w2v_model.most_similar(positive=[stem('счастье')], negative=[stem('жопа')]))
   
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(sentences)
    word_index = tokenizer.word_index
    print('Found %s unique tokens.' % len(word_index))
    with open(tokenizer_file, 'wb') as f:
        pickle.dump(tokenizer, f, pickle.HIGHEST_PROTOCOL)


In [4]:
from IPython.display import clear_output
import time

marked_file = open('text_marked.csv','w')
marked_file.write("text,date,emotions\n")
with open('text.csv', 'r') as messages:
    for line in messages:
        print(line.split(",")[0])
        question="what emotion are you feeling about it?("+" ".join(emotion_map.keys())+")"
        emotion = input(question).strip("\n")
        marked_file.write(line.strip("\n")+"\""+emotion+"\"\n")
        time.sleep(1)
        clear_output()
marked_file.close()

"хаха отличнот"


KeyboardInterrupt: 

In [1]:
df=pd.read_csv('text_marked.csv').dropna()

texts=df.text.apply(lambda x: " ".join(split_sentence(x)))   

def prepare_text(texts):
    with open(tokenizer_file, 'rb') as f:
        tokenizer = pickle.load(f)
    text_seq=[]
    for text in texts:
        text_seq.append(" ".join(split_sentence(text)))
    sqs = tokenizer.texts_to_sequences(text_seq)
    
    return sequence.pad_sequences(sqs, maxlen=32)
                                    
X=prepare_text(texts)


for emotion in emotion_map.keys():
    df[emotion]=df.apply(lambda x: 1 if emotion in x["emotions"].lower() else 0, axis=1)
    
Y=df.as_matrix(emotion_map.keys())
print (Y)

NameError: name 'pd' is not defined

In [None]:
def ngram_cnn( max_length=32, embedding_size=200, ngram_filters=[1,2,3,7], n_feature_maps=32, dropout=0.5, n_hidden=16):
  

    convs = []
    functional_input = Input(shape=(1,max_length, embedding_size))
    
    for n_gram in ngram_filters:
        conv = Convolution2D(nb_filter=n_feature_maps, nb_row=n_gram, nb_col=embedding_size,
                             activation='relu', dim_ordering='th')(functional_input)
        pool = MaxPooling2D(pool_size=(max_length - n_gram + 1, 1), dim_ordering='th')(conv)
        flatten=Flatten()(pool)
        convs.append(flatten)
    merged=Merge(mode='concat')(convs)
    functionalModel = Model(input=functional_input, output=merged)

    w2v_model=models.Word2Vec.load(word2vec_file)

    weights = w2v_model.syn0
    
    model = Sequential()


    embedding_layer = Embedding(input_dim=weights.shape[0],input_length=max_length,
                                output_dim=weights.shape[1], weights=[weights], 
                                name='embedding')    
    model.add(embedding_layer)
    model.add(Reshape((1, max_length, embedding_size)))
    model.add(Dropout(dropout))
    model.add(functionalModel)
    model.add(Dense(n_hidden))
    model.add(Dropout(dropout))
    model.add(Activation('relu'))
    model.add(Dense(len(emotion_map.keys()),))
    model.add(Activation('sigmoid'))
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

    return model

model=ngram_cnn()

In [None]:
callbacks_list=[ModelCheckpoint("checkpoints"),TensorBoard()]
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)

model.fit(X, Y,
          batch_size=64,
          nb_epoch=42,
          validation_split=0.2, show_accuracy=True, shuffle=True,
          verbose=0,callbacks=callbacks_list) 
model.save("model.h5")

In [None]:
from keras.models import load_model
model = load_model('model.h5')
def reply(message):
    messages=[]
    messages.append(message)
    marks=model.predict(prepare_text(messages))
    replies=""
    for (emotion,value) in zip(emotion_map.keys(),marks[0].tolist()):
        if value>0.6 :
            replies=replies+emotion_map.get(emotion) +' '
    return (replies,marks[0])


In [None]:
reply("hi")[0]

In [None]:
import numpy as np

from flask import Flask, request
from fbmq import Page
import socket,os 


DEBUG=False 

if DEBUG:
    hostname=socket.getfqdn()
    from werkzeug.serving import make_ssl_devcert

    if not os.path.exists(hostname+".crt"):
        make_ssl_devcert(hostname, host=hostname)
else:
    hostname="localhost"

CONFIG = {
        'VERIFY_TOKEN': 'verify',
        'SERVER_URL': 'https://',
        'PAGE_KEY':""
}



app = Flask(__name__)
page = Page(CONFIG['PAGE_KEY'])


@app.route('/webhook', methods=['GET'])
def validate():
        print (request.args)
        if request.args.get('hub.mode', '') == 'subscribe' and \
                                        request.args.get('hub.verify_token', '') == CONFIG['VERIFY_TOKEN']:

                print("Validating webhook")

                return request.args.get('hub.challenge', '')
        else:
                return 'Failed validation. Make sure the validation tokens match.'

@app.route('/webhook', methods=['POST'])
def webhook():
    payload = request.get_data(as_text=True)
    print(payload)
    page.handle_webhook(payload)
    return "ok"

@app.route('/', methods=['GET'])
def hi():
    if(request.args.get('message', None)):
        return " \n".join(str(reply(request.args.get('message', None))))
    return "I am ready"

@page.handle_message
def message_handler(event):
    sender_id = event.sender_id
    message = event.message_text
    print(event)
    if (event.message.get("sticker_id", None)):
        page.send(sender_id, "Dont understand stickers")
    if(message):
        page.send(sender_id,reply(message)[0])

@page.after_send
def after_send(payload, response):
    print("complete")
    


context = (hostname+'.crt', hostname+'.key')
if __name__ == '__main__':
    app.run(debug=False,host="0.0.0.0",ssl_context=context,port=8000)

