Firstly, we install the dependencies for thr Rasa package, which will handle the NLP model.

The second step is to download the language from Spacy, in our case the english dictionary.

In [None]:
!pip install tensorflow
#Rasa NLU
!pip install rasa_nlu[spacy]
#Rasa Core
!pip install -U rasa_core
#Language Model
!python -m spacy download en_core_web_md
!python -m spacy link en_core_web_md en --force;

Next we import some basic Python packages and set-up the notebook.

In [54]:
%matplotlib inline

import logging, io, json, warnings
logging.basicConfig(level="INFO")
warnings.filterwarnings('ignore')
import sys, os
import pandas as pd


And then we import the previously-downloaded modules.

In [None]:
import rasa_nlu
import rasa_core
import spacy

In [32]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


This function handles the creation of a yaml or md file (necessary for the rasa training) starting from the dataset (as a cvs file)

In [70]:
def create_training_data_yaml():
  df = pd.read_csv(os.path.join(path, "survey.csv"), sep=",", header=None)
  qs1 = df[2][1:]
  qs2 = df[3][1:]
  qs3 = df[4][1:]
  qs4 = df[5][1:]
  with open(os.path.join(path, "nlu.yaml"), "wt", encoding="utf-8") as f:
    f.write('version: "3.0"\n')
    f.write("nlu: \n- intent: get_smells\n  examples: | \n")
    for q in qs1:
      f.write(f"    - {q}\n")
    f.write("\n- intent: get_smells_date\n  examples: | \n")
    for q in qs2:
      f.write(f"    - {q}\n")
    f.write("\n- intent: report\n  examples: | \n")
    for q in qs3:
      f.write(f"    - {q}\n")
    f.write("\n- intent: info\n  examples: | \n")
    for q in qs4:
      f.write(f"    - {q}\n")

def create_training_data_md():
  df = pd.read_csv(os.path.join(path, "survey.csv"), sep=",", header=None)
  qs1 = df[2][1:]
  qs2 = df[3][1:]
  qs3 = df[4][1:]
  qs4 = df[5][1:]
  with open(os.path.join(path, "nlu.md"), "wt", encoding="utf-8") as f:
    f.write("## intent: get_smells\n")
    for q in qs1:
      f.write(f"- {q}\n")
    f.write("## intent: get_smells_date\n")
    for q in qs2:
      f.write(f"- {q}\n")
    f.write("## intent: report\n")
    for q in qs3:
      f.write(f"- {q}\n")
    f.write("## intent: info\n")
    for q in qs4:
      f.write(f"- {q}\n")
  
create_training_data_md()

After having everything prepared, we can start training the NLU model

In [72]:
from rasa_nlu.training_data import load_data
from rasa_nlu.config import RasaNLUModelConfig
from rasa_nlu.model import Trainer
from rasa_nlu import config


path = os.path.join(os.getcwd(), "drive", "My Drive", "Colab Notebooks", "cadocs")

# loading the nlu training samples
training_data = load_data(os.path.join(path, "nlu.md"))

# trainer to educate our pipeline
trainer = Trainer(config.load(os.path.join(path, "config.yml")))

# train the model!
interpreter = trainer.train(training_data)

# store it for future use
model_directory = trainer.persist(os.path.join(path,"models/nlu"), fixed_model_name="current")

INFO:rasa_nlu.training_data.loading:Training data format of /content/drive/My Drive/Colab Notebooks/cadocs/nlu.md is md
INFO:rasa_nlu.training_data.training_data:Training data stats: 
	- intent examples: 144 (4 distinct intents)
	- Found intents: 'get_smells', 'info', 'get_smells_date', 'report'
	- entity examples: 0 (0 distinct entities)
	- found entities: 

INFO:rasa_nlu.utils.spacy_utils:Trying to load spacy model with name 'en'
INFO:rasa_nlu.components:Added 'SpacyNLP' to component cache. Key 'SpacyNLP-en'.
INFO:rasa_nlu.model:Starting to train component SpacyNLP
INFO:rasa_nlu.model:Finished training component.
INFO:rasa_nlu.model:Starting to train component SpacyTokenizer
INFO:rasa_nlu.model:Finished training component.
INFO:rasa_nlu.model:Starting to train component CRFEntityExtractor
INFO:rasa_nlu.model:Finished training component.
INFO:rasa_nlu.model:Starting to train component SpacyFeaturizer
INFO:rasa_nlu.model:Finished training component.
INFO:rasa_nlu.model:Starting to trai

Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=1)]: Done  30 out of  30 | elapsed:    0.7s finished
INFO:rasa_nlu.model:Finished training component.
INFO:rasa_nlu.model:Starting to train component EntitySynonymMapper
INFO:rasa_nlu.model:Finished training component.
INFO:rasa_nlu.model:Successfully saved model into '/content/drive/My Drive/Colab Notebooks/cadocs/models/nlu/default/current'


In [73]:
def pprint(o):   
    print(json.dumps(o, indent=2))
    
pprint(interpreter.parse("hello CADOCS, show me the community smells in the repository LINK"))

{
  "intent": {
    "name": "get_smells",
    "confidence": 0.7352955598823949
  },
  "entities": [],
  "intent_ranking": [
    {
      "name": "get_smells",
      "confidence": 0.7352955598823949
    },
    {
      "name": "get_smells_date",
      "confidence": 0.23659214894215788
    },
    {
      "name": "report",
      "confidence": 0.01945754683728844
    },
    {
      "name": "info",
      "confidence": 0.008654744338158748
    }
  ],
  "text": "hello CADOCS, show me the community smells in the repository LINK"
}
