In [10]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn import model_selection
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, plot_confusion_matrix
import pickle
import matplotlib.pyplot as plt
from collections import Counter
import pandas as pd
import json
import spacy as sp
import random

import nltk
# nltk.download('stopword')
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.stem import WordNetLemmatizer
from nltk.stem import SnowballStemmer

nlp = sp.load("es_core_news_md")

In [11]:
df = pd.read_csv("Data/last-cook.csv")
df_cook = pd.read_csv("Data/intents_cook.csv")
df_cook_others = pd.read_csv('Data/intents_cook_others.csv')

print("Processing the Intents.....")
with open('Data/intents.json') as json_data:
    intents = json.load(json_data)

df_cook_others["label"] = df_cook_others.label.map({ 'cook': 1, 'others': 0})
df_cook["label"] = df_cook.label.map({ 'name_to_prepare': 1, 'ingredient_to_name': 0})
df_cook_others

Processing the Intents.....


Unnamed: 0.1,Unnamed: 0,text,label
0,0,¿Cuál es tu receta favorita para cocinar?,1
1,1,¿Cómo se hace una salsa de tomate casera?,1
2,2,¿Qué plato prepararías para una cena romántica?,1
3,3,¿Cuál es la diferencia entre freír y saltear?,1
4,4,¿Cómo se hace un buen risotto?,1
...,...,...,...
1089,1089,¿Qué opinas sobre el matrimonio entre personas...,0
1090,1090,¿Cuál es tu estilo de música favorito y por qué?,0
1091,1091,¿Crees en la evolución?,0
1092,1092,¿Cuál ha sido tu trabajo favorito y por qué?,0


In [12]:
def preprocess(text):
    text_lower = text.lower()
    token_word = nltk.word_tokenize(text_lower, "spanish")
    # algunos textos tienen caracteres raros al pricipio, por tanto se eliminan
    token_word[0] = clear_first_token(token_word[0])
    # stopwords
    stopwords_esp = stopwords.words("spanish")
    i = 0
    while i < len(token_word):
        # se eliminan los tokens que se encuentren dentro de las stopwords
        if token_word[i] in stopwords_esp:
            token_word.remove(token_word[i])
        # se elimina cualquier token que sea distinto de caracteres alfanumericos
        elif not token_word[i].isalpha():
            token_word.remove(token_word[i])
        else:
            # como nltk no lematiza textos en esp se usa spacy
            word = nlp(token_word[i])[0]
            token_word[i] = word.lemma
            i = i + 1
    return token_word

def clear_first_token(token):
    result = ""
    char = '\ufeff'
    for i in range(len(token)):
        if token[i] != char : result += token[i]
    return result

In [13]:
# from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
# print('Accuracy score: ', format(accuracy_score(y_test, predictions)))

# print('Precision score: ', format(precision_score(y_test, predictions)))
# print('Recall score: ', format(recall_score(y_test, predictions)))
# print('F1 score: ', format(f1_score(y_test, predictions)))

### clasificar si me estas hablando de cocina o no

In [14]:
# modelo usando support vector machine y vectorizando con tfidf
def cook_others(text):
    # Dividir el conjunto de datos de entrenamiento y test
    X_train, X_test, y_train, y_test = train_test_split(df_cook_others['text'], df_cook_others['label'], random_state=1)
    
    tfidf_vect = TfidfVectorizer()
    tfidf_vect.fit(df_cook_others["text"])
    train_X_tfidf = tfidf_vect.transform(X_train)
    text_data = tfidf_vect.transform([text])

    svm = SVC(C=4.0, kernel='linear', degree=3, random_state=0, probability=True)
    svm.fit(train_X_tfidf, y_train)
    
    
    return svm.predict(text_data)

### clasificar que tipo de pregunta de cocina

In [15]:
def cook(text):
    # Dividir el conjunto de datos de entrenamiento y test
    X_train, X_test, y_train, y_test = train_test_split(df_cook['text'], df_cook['label'], random_state=1)
    
    count_vector = CountVectorizer()
    
    training_data = count_vector.fit_transform(X_train)
    
    testing_data = count_vector.transform(X_test)
    text_data = count_vector.transform([text])

    naive_bayes = MultinomialNB()
    naive_bayes.fit(training_data, y_train)
    
    return naive_bayes.predict(text_data)

### Clasificar en que tipo de pregunta varia

In [16]:
def others(text):
    x = []
    y_train = []
    for i in range(4):
    #     print(intents["intents"][i])
        label = intents["intents"][i]["tag"]
        for j in intents["intents"][i]["patterns"]:
            x.append(j)
            y_train.append(label)
            
    # Instantiate the CountVectorizer method
    count_vector = CountVectorizer()
    # Fit the training data and then return the matrix
    training_data = count_vector.fit_transform(x)
    text_data = count_vector.transform([text])

    naive_bayes = MultinomialNB()
    naive_bayes.fit(training_data, y_train)
    
    return naive_bayes.predict(text_data)

### Modelos que devuelven la respuesta

In [17]:
# devolver una respuesta de cocina segun el intent
def search(text, first, second):
    ma = -1000000
    sol = ""
    doc = nlp(text)
    for i in range(df.shape[0]): 
        aux = nlp(str(df[first][i]))
        value = doc.similarity(aux)
        if (value > ma):
            ma = value
            sol = df[second][i]
    return sol
  
# Devolver una respuesta varia aleatoria segun el intent
def response_others(tag):
    for i in range(4):
    #     print(intents["intents"][i])
        label = intents["intents"][i]["tag"]
        if label == tag:
            rand = int(random.uniform(0,len(intents["intents"][i]["responses"])))
            print(rand)
            return intents["intents"][i]["responses"][rand]

### Obtener la respuesta

In [19]:
def get_response(text):
    firstIntent = cook_others(text)[0]
    if firstIntent == 0:
        intentOthers = others(text)[0]
        return response_others(intentOthers)
    else:
        cook_intents = cook(text) 
        if cook_intents == 0:
            return search(text, "ingredients", "title")

        if cook_intents == 1:
            return search(text, "title", "instructions")
    return firstIntent

get_response("alguna receta que me sugieras")

'Pepinos y zanahorias helados '