# Modelo usando Sklearn

In [1]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import LambdaCallback
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.optimizers import RMSprop
import pandas as pd
import numpy as np
import json
import spacy as sp

In [2]:
df = pd.read_json("Data/train-esp.json")
nlp = sp.load("es_core_news_md")
vectorizer = CountVectorizer(min_df=0, lowercase=False) 
vectorizer.fit(nlp.vocab.strings) 

In [3]:
df

Unnamed: 0,title,ingredients,instructions
0,"Tarta de queso de higos confitados, avellanas ...","1 1/2 tazas de Oporto leonado, 1 taza de higos...",Para los higos y la corteza: Combinar el Oport...
1,Bisque de chirivía con jengibre,"3 cucharadas de mantequilla, 2 1/2 tazas de pu...",Derretir la mantequilla en una olla grande a f...
2,Posole de pavo,"2 chiles anchos o pasilla secos, sin semillas,...",Retire las semillas de 2 chiles anchos o pasil...
3,Rabe de brócoli con bulgur y nueces,"1 taza de bulgur de grano medio*., 2 1/2 tazas...",Poner el bulgur en un bol mediano y verter 2 1...
4,Lechuga Frisée y Bibb con rábanos y cebollas d...,"1/4 de taza de yogur natural de leche entera, ...","En un bol pequeño, bata el yogur, la mayonesa,..."
...,...,...,...
9108,Natillas de vainilla con especias y Streusel d...,"1 cucharadita de gelatina sin sabor, 1 1/2 taz...",Natillas: Poner 1 cucharada de agua en un bol ...
9109,Sno-Cones de fruta fresca,1 libra de fresas frescas (aproximadamente 1 c...,"Triturar las fresas, las frambuesas y el azúca..."
9110,Orzo con jamón y queso de cabra,"1 libra de orzo (pasta con forma de arroz), 1/...",Precalentar la parrilla y engrasar ligeramente...
9111,Spiedini de cordero y romero,"5 cucharadas de zumo de limón fresco, 1/4 de t...",Preparar la barbacoa (a fuego medio-alto). Bat...


In [4]:
print("Processing the Intents.....")
with open('Data/intents.json') as json_data:
    intents = json.load(json_data)
intents

Processing the Intents.....


{'intents': [{'tag': 'greeting',
   'patterns': ['Hola', 'Como estás', 'Hay alquien ?', 'Que tal', 'Buen día'],
   'responses': ['Hola, soy Chefbot'],
   'context_set': ''},
  {'tag': 'goodbye',
   'patterns': ['Adiós', 'Nos vemos', 'Me voy'],
   'responses': ['Adiós', 'Adios  ! Vuelva pronto .']},
  {'tag': 'thanks',
   'patterns': ['Gracias',
    'Muchas gracias',
    'Es muy útil',
    'Gracias por ayudar'],
   'responses': ['Feliz de ayudar !', 'De nada!', 'Para eso estoy']},
  {'tag': 'who',
   'patterns': ['Quien eres ?', 'A que te dedicas', 'Que haces'],
   'responses': ['Soy un chatbot especializado en cocina',
    'Soy un chatbot para hablar de recetas de cocina']}]}

# Modelo para los intents de cocina y varios con Sklearn

In [5]:
def cook_others():
    dataset = pd.read_csv('Data/intents_cook_others.csv')

    sentences = dataset['text'].values
    y = dataset['label'].values
    sentences_train, sentences_test, y_train, y_test = train_test_split(
        sentences, y, test_size=0.20, random_state=1000)
    x_train = vectorizer.transform(sentences_train)
    x_test = vectorizer.transform(sentences_test)

    from sklearn.linear_model import LogisticRegression

    classifier = LogisticRegression()
    classifier.fit(x_train, y_train) 
    
    score = classifier.score(x_test, y_test)
    print('Score cook and others {:.4f}'.format(score))
    
    return classifier

# Modelo para los intents de varios

In [6]:
def others():
    x = []
    y_train = []
    for i in range(4):
    #     print(intents["intents"][i])
        label = intents["intents"][i]["tag"]
        for j in intents["intents"][i]["patterns"]:
            x.append(j)
            y_train.append(label)
    x_train = vectorizer.transform(x)

    from sklearn.linear_model import LogisticRegression

    classifier = LogisticRegression()
    classifier.fit(x_train, y_train) 

    return classifier

# Modelo para los intents de cocina

In [7]:
def cook():
    df = pd.read_csv('Data/intents_cook.csv') 

    sentences = df['text'].values
    y = df['label'].values
    sentences_train, sentences_test, y_train, y_test = train_test_split(
        sentences, y, test_size=0.20, random_state=1000)
    x_train = vectorizer.transform(sentences_train)
    x_test = vectorizer.transform(sentences_test)

    from sklearn.linear_model import LogisticRegression
    classifier = LogisticRegression()
    classifier.fit(x_train, y_train)
    
    score = classifier.score(x_test, y_test)
    print('Score Cook{:.4f}'.format(score))
    
    return classifier

## Crear los modelos

In [8]:
classifier_cook_other = cook_others()
classifier_cook = cook()
classifiers_others = others()

Score cook and others 0.9992


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Score Cook0.9888


In [9]:
def search(text, first, second):
    ma = -1000000
    sol = ""
    doc = nlp(text)
    for i in range(df.shape[0]):
        text = df[first][i]
        text = str(text) 
        text = nlp(text)
        value = doc.similarity(text)
        if (value > ma):
            ma = value
            sol = df[second][i]
    return sol
        
def response_others(tag):
    for i in range(4):
    #     print(intents["intents"][i])
        label = intents["intents"][i]["tag"]
        if label == tag:
            return intents["intents"][i]["responses"][0]

# Modelo para devolver una respuesta usando Spacy

In [10]:
def response(text):
    text_vectorize = vectorizer.transform([text])
    doc = nlp(text)

    cook_others = classifier_cook_other.predict(text_vectorize)
    response = ""

    if cook_others[0] == "cook":
        response = "hola"
        cook_intents = classifier_cook.predict(text_vectorize)

#         if cook_intents == "instructions":
#             return search(text, "ingredients", "instructions")

#         if cook_intents == "title":
#             return search(text, "ingredients", "title")

#         if cook_intents == "title_instructions":
#             return search(text, "title", "instructions")
        return response;

    else:
        others = classifiers_others.predict(text_vectorize)
        return response_others(others)

In [16]:
response("S Tengo aceite, que puedo hacer")

'hola'

In [108]:
d = 1
for i in range(2):
    d += 0.001
    
1 +(.100e-2 + .1e-2)

1.002

In [105]:
d = 0
for i in range(2):
    d += 0.001 h              
d = d + 1

(0.001 + 0.001) + 1

1.002

In [65]:
print(dc(0.4))
print(dc(6))
print(dc(0.4*6))
print (0.4*6)

0.40000000000000002220446049250313080847263336181640625
6
2.4000000000000003552713678800500929355621337890625
2.4000000000000004
