In [1]:
%matplotlib notebook

Guided Data Science
================
#### Next-Line Recommendation chatbot for Data-Scientists
#### Recommendation Engine

<p>In this notebook you can test the model for yourself and see how it works... </p>
    <p>For a user input (code cell), we transform it to a masked representation, get its workflow stage (see @LINK), and call the relavant trained model accordingly to get a recommendation for the next line of code. </p>
<p>To see how we built the model and trained it, check out @LINK </p>


In [2]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import torch
from torch.jit import script, trace
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import csv
import random
import re
import os
import unicodedata
import codecs
from io import open
import itertools
import math
import pandas as pd
import numpy as np
import pickle
from keras.models import model_from_json
from keras.preprocessing.sequence import pad_sequences

import sys
sys.path.insert(0, '../data_gathering')
from masking import init_trans_dict
from masking import parse_imports_to_trans_dict
from masking import mask_source
from masking import unmask_source
from masking import consts

USE_CUDA = torch.cuda.is_available()
device = torch.device("cuda" if USE_CUDA else "cpu")

save_dir = os.path.join("save")
corpus_name = "cells"
corpus = os.path.join("data", corpus_name)

Using TensorFlow backend.


first, we'll create the vocabulary for our models:

In [3]:
im_pairs = "../Data/Import.tsv"
lo_pairs = "../Data/Load.tsv"
ex_pairs = "../Data/Explore.tsv"
pr_pairs = "../Data/Prep.tsv"
tr_pairs = "../Data/Train.tsv"
ev_pairs = "../Data/Eval.tsv"

In [4]:
# Default word tokens
PAD_token = 0  # Used for padding short sentences
SOS_token = 1  # Start-of-sentence token
EOS_token = 2  # End-of-sentence token

class Voc:
    def __init__(self, name):
        self.name = name
        self.trimmed = False
        self.word2index = {}
        self.word2count = {}
        self.index2word = {PAD_token: "PAD", SOS_token: "SOS", EOS_token: "EOS"}
        self.num_words = 3  # Count SOS, EOS, PAD

    def addSentence(self, sentence):
        for word in sentence.split(' '):
            self.addWord(word)

    def addWord(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.num_words
            self.word2count[word] = 1
            self.index2word[self.num_words] = word
            self.num_words += 1
        else:
            self.word2count[word] += 1

    # Remove words below a certain count threshold
    def trim(self, min_count):
        if self.trimmed:
            return
        self.trimmed = True

        keep_words = []

        for k, v in self.word2count.items():
            if v >= min_count:
                keep_words.append(k)

        print('keep_words {} / {} = {:.4f}'.format(
            len(keep_words), len(self.word2index), len(keep_words) / len(self.word2index)
        ))

        # Reinitialize dictionaries
        self.word2index = {}
        self.word2count = {}
        self.index2word = {PAD_token: "PAD", SOS_token: "SOS", EOS_token: "EOS"}
        self.num_words = 3 # Count default tokens

        for word in keep_words:
            self.addWord(word)

In [5]:
MAX_LENGTH = 100 #30  # Maximum code line length to consider

# Turn a Unicode string to plain ASCII
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
    )

# Lowercase, strip, and remove special chars
def normalizeString(s):
    s = unicodeToAscii(s.lower().strip())
    s = re.sub(r"([!?])", r" \1", s) # .
    s = re.sub(r"\\[rn]+", r" ", s)
    s = re.sub(r"[^a-zA-Z0-9.!=?_]+", r" ", s) # 0-9
    s = re.sub(r"\s+", r" ", s).strip()
    return s

# Read query/response pairs and return a voc object
def readVocs(datafile, corpus_name):
    print("Reading lines...")
    # Read the file and split into lines
    file = open(datafile, encoding='utf-8')
    next(file) #skip header line
    lines = file.read().strip().split('\n')
    # Split every line into pairs and normalize
    pairs = [[normalizeString(s) for s in l.split('\t')] for l in lines]
    voc = Voc(corpus_name)
    return voc, pairs

# Returns True iff both sentences in a pair 'p' are under the MAX_LENGTH threshold
def filterPair(p):
    # Input sequences need to preserve the last word for EOS token
    try:
        return len(p[0].split(' ')) < MAX_LENGTH and len(p[1].split(' ')) < MAX_LENGTH
    except:
        return False

# Filter pairs using filterPair condition
def filterPairs(pairs):
    return [pair for pair in pairs if filterPair(pair)]

# Using the functions defined above, return a populated voc object and pairs list
def loadPrepareData(corpus, corpus_name, datafile, save_dir):
    print("Start preparing training data ...")
    voc, pairs = readVocs(datafile, corpus_name)
    print("Read {!s} sentence pairs".format(len(pairs)))
    pairs = filterPairs(pairs)
    print("Trimmed to {!s} sentence pairs".format(len(pairs)))
    print("Counting words...")
    for pair in pairs:
        voc.addSentence(pair[0])
        voc.addSentence(pair[1])
    print("Counted words:", voc.num_words)
    return voc, pairs


# Load/Assemble voc and pairs for each model
save_dir = os.path.join("data", "save")
print("Import Cells:")
Import_voc, Import_pairs = loadPrepareData(corpus, corpus_name, im_pairs, save_dir)
print("Load Data Cells:")
Load_voc, Load_pairs = loadPrepareData(corpus, corpus_name, lo_pairs, save_dir)
print("Data Exploration Cells:")
Explore_voc, Explore_pairs = loadPrepareData(corpus, corpus_name, ex_pairs, save_dir)
print("Data Preparation Cells:")
Prep_voc, Prep_pairs = loadPrepareData(corpus, corpus_name, pr_pairs, save_dir)
print("Model Training and parameter Tuning Cells:")
Train_voc, Train_pairs = loadPrepareData(corpus, corpus_name, tr_pairs, save_dir)
print("Model Evaluation Cells:")
Eval_voc, Eval_pairs = loadPrepareData(corpus, corpus_name, ev_pairs, save_dir)

Import Cells:
Start preparing training data ...
Reading lines...
Read 10652 sentence pairs
Trimmed to 10652 sentence pairs
Counting words...
Counted words: 2921
Load Data Cells:
Start preparing training data ...
Reading lines...
Read 37624 sentence pairs
Trimmed to 37624 sentence pairs
Counting words...
Counted words: 6506
Data Exploration Cells:
Start preparing training data ...
Reading lines...
Read 133369 sentence pairs
Trimmed to 133369 sentence pairs
Counting words...
Counted words: 15783
Data Preparation Cells:
Start preparing training data ...
Reading lines...
Read 91557 sentence pairs
Trimmed to 91557 sentence pairs
Counting words...
Counted words: 20163
Model Training and parameter Tuning Cells:
Start preparing training data ...
Reading lines...
Read 33162 sentence pairs
Trimmed to 33162 sentence pairs
Counting words...
Counted words: 11258
Model Evaluation Cells:
Start preparing training data ...
Reading lines...
Read 73514 sentence pairs
Trimmed to 73514 sentence pairs
Count

Now, we'll define functions to load and use our trained models:

<i> note: For more information see our model training process</i>

In [6]:
def indexesFromSentence(voc, sentence):
    return [voc.word2index[word] for word in sentence.split(' ')] + [EOS_token]


def zeroPadding(l, fillvalue=PAD_token):
    return list(itertools.zip_longest(*l, fillvalue=fillvalue))

def binaryMatrix(l, value=PAD_token):
    m = []
    for i, seq in enumerate(l):
        m.append([])
        for token in seq:
            if token == PAD_token:
                m[i].append(0)
            else:
                m[i].append(1)
    return m

# Returns padded input sequence tensor and lengths
def inputVar(l, voc):
    indexes_batch = [indexesFromSentence(voc, sentence) for sentence in l]
    lengths = torch.tensor([len(indexes) for indexes in indexes_batch])
    padList = zeroPadding(indexes_batch)
    padVar = torch.LongTensor(padList)
    return padVar, lengths

# Returns padded target sequence tensor, padding mask, and max target length
def outputVar(l, voc):
    indexes_batch = [indexesFromSentence(voc, sentence) for sentence in l]
    max_target_len = max([len(indexes) for indexes in indexes_batch])
    padList = zeroPadding(indexes_batch)
    mask = binaryMatrix(padList)
    mask = torch.ByteTensor(mask)
    padVar = torch.LongTensor(padList)
    return padVar, mask, max_target_len

# Returns all items for a given batch of pairs
def batch2TrainData(voc, pair_batch):
    pair_batch.sort(key=lambda x: len(x[0].split(" ")), reverse=True)
    input_batch, output_batch = [], []
    for pair in pair_batch:
        input_batch.append(pair[0])
        output_batch.append(pair[1])
    inp, lengths = inputVar(input_batch, voc)
    output, mask, max_target_len = outputVar(output_batch, voc)
    return inp, lengths, output, mask, max_target_len

In [7]:
class EncoderRNN(nn.Module):
    def __init__(self, hidden_size, embedding, n_layers=1, dropout=0):
        super(EncoderRNN, self).__init__()
        self.n_layers = n_layers
        self.hidden_size = hidden_size
        self.embedding = embedding

        # Initialize GRU; the input_size and hidden_size params are both set to 'hidden_size'
        #   because our input size is a word embedding with number of features == hidden_size
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers,
                          dropout=(0 if n_layers == 1 else dropout), bidirectional=True)

    def forward(self, input_seq, input_lengths, hidden=None):
        # Convert word indexes to embeddings
        embedded = self.embedding(input_seq)
        # Pack padded batch of sequences for RNN module
        packed = torch.nn.utils.rnn.pack_padded_sequence(embedded, input_lengths)
        # Forward pass through GRU
        outputs, hidden = self.gru(packed, hidden)
        # Unpack padding
        outputs, _ = torch.nn.utils.rnn.pad_packed_sequence(outputs)
        # Sum bidirectional GRU outputs
        outputs = outputs[:, :, :self.hidden_size] + outputs[:, : ,self.hidden_size:]
        # Return output and final hidden state
        return outputs, hidden

In [8]:
# Luong attention layer
class Attn(torch.nn.Module):
    def __init__(self, method, hidden_size):
        super(Attn, self).__init__()
        self.method = method
        if self.method not in ['dot', 'general', 'concat']:
            raise ValueError(self.method, "is not an appropriate attention method.")
        self.hidden_size = hidden_size
        if self.method == 'general':
            self.attn = torch.nn.Linear(self.hidden_size, hidden_size)
        elif self.method == 'concat':
            self.attn = torch.nn.Linear(self.hidden_size * 2, hidden_size)
            self.v = torch.nn.Parameter(torch.FloatTensor(hidden_size))

    def dot_score(self, hidden, encoder_output):
        return torch.sum(hidden * encoder_output, dim=2)

    def general_score(self, hidden, encoder_output):
        energy = self.attn(encoder_output)
        return torch.sum(hidden * energy, dim=2)

    def concat_score(self, hidden, encoder_output):
        energy = self.attn(torch.cat((hidden.expand(encoder_output.size(0), -1, -1), encoder_output), 2)).tanh()
        return torch.sum(self.v * energy, dim=2)

    def forward(self, hidden, encoder_outputs):
        # Calculate the attention weights (energies) based on the given method
        if self.method == 'general':
            attn_energies = self.general_score(hidden, encoder_outputs)
        elif self.method == 'concat':
            attn_energies = self.concat_score(hidden, encoder_outputs)
        elif self.method == 'dot':
            attn_energies = self.dot_score(hidden, encoder_outputs)

        # Transpose max_length and batch_size dimensions
        attn_energies = attn_energies.t()

        # Return the softmax normalized probability scores (with added dimension)
        return F.softmax(attn_energies, dim=1).unsqueeze(1)

In [9]:
class LuongAttnDecoderRNN(nn.Module):
    def __init__(self, attn_model, embedding, hidden_size, output_size, n_layers=1, dropout=0.1):
        super(LuongAttnDecoderRNN, self).__init__()

        # Keep for reference
        self.attn_model = attn_model
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        self.dropout = dropout

        # Define layers
        self.embedding = embedding
        self.embedding_dropout = nn.Dropout(dropout)
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers, dropout=(0 if n_layers == 1 else dropout))
        self.concat = nn.Linear(hidden_size * 2, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)

        self.attn = Attn(attn_model, hidden_size)

    def forward(self, input_step, last_hidden, encoder_outputs):
        # Note: we run this one step (word) at a time
        # Get embedding of current input word
        embedded = self.embedding(input_step)
        embedded = self.embedding_dropout(embedded)
        # Forward through unidirectional GRU
        rnn_output, hidden = self.gru(embedded, last_hidden)
        # Calculate attention weights from the current GRU output
        attn_weights = self.attn(rnn_output, encoder_outputs)
        # Multiply attention weights to encoder outputs to get new "weighted sum" context vector
        context = attn_weights.bmm(encoder_outputs.transpose(0, 1))
        # Concatenate weighted context vector and GRU output using Luong eq. 5
        rnn_output = rnn_output.squeeze(0)
        context = context.squeeze(1)
        concat_input = torch.cat((rnn_output, context), 1)
        concat_output = torch.tanh(self.concat(concat_input))
        # Predict next word using Luong eq. 6
        output = self.out(concat_output)
        output = F.softmax(output, dim=1)
        # Return output and final hidden state
        return output, hidden

In [10]:
# load the relavant model according to the workflow stage
def load_model(workflow_stage, workflow_stage_voc):
    # Configure models
    model_name = 'cb_model'
    attn_model = 'dot'
    hidden_size = 500
    encoder_n_layers = 2
    decoder_n_layers = 2
    dropout = 0.1
    batch_size = 64

    # Set checkpoint to load from - choose model according to workflow stage
    loadFilename = "./Models/"+workflow_stage
    
    #set voacbulary according to workflow stage
    voc = workflow_stage_voc


    # Load model
    # If loading on same machine the model was trained on
    checkpoint = torch.load(loadFilename)
    # If loading a model trained on GPU to CPU
    #checkpoint = torch.load(loadFilename, map_location=torch.device('cpu'))
    encoder_sd = checkpoint['en']
    decoder_sd = checkpoint['de']
    encoder_optimizer_sd = checkpoint['en_opt']
    decoder_optimizer_sd = checkpoint['de_opt']
    embedding_sd = checkpoint['embedding']
    voc.__dict__ = checkpoint['voc_dict']


    print('Building encoder and decoder ...')
    # Initialize word embeddings
    embedding = nn.Embedding(voc.num_words, hidden_size)
    # load embeddings
    embedding.load_state_dict(embedding_sd)
    # Initialize encoder & decoder models
    encoder = EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout)
    decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.num_words, decoder_n_layers, dropout)
    encoder.load_state_dict(encoder_sd)
    decoder.load_state_dict(decoder_sd)
    # Use appropriate device
    encoder = encoder.to(device)
    decoder = decoder.to(device)
    
    print('Models built and ready to go!')
    return encoder, decoder

Each trained model size is more than 100MB so we split them to 14 rar files...
Run this cell to extract the trained models (<b>if not already extracted</b>)

In [None]:
consts.unrar_trained_models();

In [11]:
# set encoder decoder for each stage model
print("Import Model:")
encoder_Import, decoder_Import = load_model("Import.tar", Import_voc)
print("Load Data Model:")
encoder_Load, decoder_Load = load_model("Load.tar", Load_voc)
print("Data Exploration Model:")
encoder_Explore, decoder_Explore = load_model("Explore.tar", Explore_voc)
print("Data Preparation Model:")
encoder_Prep, decoder_Prep = load_model("Prep.tar", Prep_voc)
print("Model Training and parameter Tuning Model:")
encoder_Train, decoder_Train = load_model("Train.tar", Train_voc)
print("Model Evaluation Model:")
encoder_Eval, decoder_Eval = load_model("Eval.tar", Eval_voc)

# Set dropout layers to eval mode (for training see previous notebook)
encoder_Import.eval()
decoder_Import.eval()
encoder_Load.eval()
decoder_Load.eval()
encoder_Explore.eval()
decoder_Explore.eval()
encoder_Prep.eval()
decoder_Prep.eval()
encoder_Train.eval()
decoder_Train.eval()
encoder_Eval.eval()
decoder_Eval.eval()

Import Model:
Building encoder and decoder ...
Models built and ready to go!
Load Data Model:
Building encoder and decoder ...
Models built and ready to go!
Data Exploration Model:
Building encoder and decoder ...
Models built and ready to go!
Data Preparation Model:
Building encoder and decoder ...
Models built and ready to go!
Model Training and parameter Tuning Model:
Building encoder and decoder ...
Models built and ready to go!
Model Evaluation Model:
Building encoder and decoder ...
Models built and ready to go!


LuongAttnDecoderRNN(
  (embedding): Embedding(12164, 500)
  (embedding_dropout): Dropout(p=0.1)
  (gru): GRU(500, 500, num_layers=2, dropout=0.1)
  (concat): Linear(in_features=1000, out_features=500, bias=True)
  (out): Linear(in_features=500, out_features=12164, bias=True)
  (attn): Attn()
)

In [12]:
class GreedySearchDecoder(nn.Module):
    def __init__(self, encoder, decoder):
        super(GreedySearchDecoder, self).__init__()
        self.encoder = encoder
        self.decoder = decoder

    def forward(self, input_seq, input_length, max_length):
        # Forward input through encoder model
        encoder_outputs, encoder_hidden = self.encoder(input_seq, input_length)
        # Prepare encoder's final hidden layer to be first hidden input to the decoder
        decoder_hidden = encoder_hidden[:self.decoder.n_layers] # added self. to avoid 'undefined'
        # Initialize decoder input with SOS_token
        decoder_input = torch.ones(1, 1, device=device, dtype=torch.long) * SOS_token
        # Initialize tensors to append decoded words to
        all_tokens = torch.zeros([0], device=device, dtype=torch.long)
        all_scores = torch.zeros([0], device=device)
        # Iteratively decode one word token at a time
        for _ in range(max_length):
            # Forward pass through decoder
            decoder_output, decoder_hidden = self.decoder(decoder_input, decoder_hidden, encoder_outputs)
            # Obtain most likely word token and its softmax score
            decoder_scores, decoder_input = torch.max(decoder_output, dim=1)
            # Record token and score
            all_tokens = torch.cat((all_tokens, decoder_input), dim=0)
            all_scores = torch.cat((all_scores, decoder_scores), dim=0)
            # Prepare current token to be next decoder input (add a dimension)
            decoder_input = torch.unsqueeze(decoder_input, 0)
        # Return collections of word tokens and scores
        return all_tokens, all_scores

In [13]:
def load_classifier_model():
    # load the trained model (not needed if you train again)
    with open('../Classification/tokenizer.pickle', 'rb') as handle:
        load_tokenizer = pickle.load(handle)

    json_file = open('../Classification/model.json', 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    load_model = model_from_json(loaded_model_json)

    load_model.load_weights("../Classification/model.h5")

    return load_model, load_tokenizer

In [14]:
var_dict = init_trans_dict()

class_model, class_tokenizer = load_classifier_model()
labels_arr = ['Load', 'Prep', 'Train', 'Eval', 'Explore', 'Import']

def get_workflow_stage(cell, model, tokenizer, labels):
    seq = tokenizer.texts_to_sequences([cell])
    padded = pad_sequences(seq, maxlen=120)
    pred = model.predict(padded)
    return labels[np.argmax(pred)]

# use masking and get the summed representation of a cell, also fills the variables dictionary
def get_summed_rep(cell, var_dict):
    parse_imports_to_trans_dict(cell, var_dict)
    summed_rep = mask_source(cell, var_dict)[1]
    return summed_rep

# get Model output (generalized) for input sentence (summed representation)
def evaluate(encoder, decoder, searcher, voc, sentence, max_length=MAX_LENGTH):
    ### Format input sentence as a batch
    # words -> indexes
    indexes_batch = [indexesFromSentence(voc, sentence)]
    # Create lengths tensor
    lengths = torch.tensor([len(indexes) for indexes in indexes_batch])
    # Transpose dimensions of batch to match models' expectations
    input_batch = torch.LongTensor(indexes_batch).transpose(0, 1)
    # Use appropriate device
    input_batch = input_batch.to(device)
    lengths = lengths.to(device)
    # Decode sentence with searcher
    tokens, scores = searcher(input_batch, lengths, max_length)
    # indexes -> words
    decoded_words = [voc.index2word[token.item()] for token in tokens]
    return decoded_words

# gets the output tokens and turns them to a next-line reccomendation, using the var_dict
def specificalization(masked, var_dict):
    #turn vars to their mapping in the dict
    recommendation = unmask_source(masked, var_dict)
    return recommendation

# get an ready-to-execute next line recommendation for input_sentence
def get_recommendation(input_sentence, prints=False):
    if prints:
        print("Getting workflow stage for input cell...")
    stage = get_workflow_stage(input_sentence, class_model, class_tokenizer, labels_arr) # get worklow stage
    if prints:
        print("Worklfow stage is:" + stage +"\nSetting model accordingly...")
    
    # set encoder,decoder,voc according to workflow stage (choose relevant model)
    if stage == "Import":
        encoder, decoder = encoder_Import, decoder_Import
        voc = Import_voc
        if prints:
            print("Encoder, Decoder, Voc set to Import.")
    elif stage == "Load":
        encoder, decoder = encoder_Load, decoder_Load
        voc = Load_voc
        if prints:
            print("Encoder, Decoder, Voc set to Load.")
    elif stage == "Explore":
        encoder, decoder = encoder_Explore, decoder_Explore
        voc = Explore_voc
        if prints:
            print("Encoder, Decoder, Voc set to Explore.")
    elif stage == "Prep":
        encoder, decoder = encoder_Prep, decoder_Prep
        voc = Prep_voc
        if prints:
            print("Encoder, Decoder, Voc set to Prep.")
    elif stage == "Train":
        encoder, decoder = encoder_Train, decoder_Train
        voc = Train_voc
        if prints:
            print("Encoder, Decoder, Voc set to Train.")
    elif stage == "Eval":
        encoder, decoder = encoder_Eval, decoder_Eval
        voc = Eval_voc
        if prints:
            print("Encoder, Decoder, Voc set to Eval.")
    else:
        print("Error getting workflow stage")
        
    searcher = GreedySearchDecoder(encoder, decoder) # set searcher
    
    if prints:
        print("Getting Masked summed representation...")
    input_sentence = get_summed_rep(input_sentence, var_dict) # Normalize sentence
    if prints:
        print("Masked cell: "+input_sentence+"\nGetting next-line recommendation from model...")
    input_sentence = normalizeString(input_sentence) ######################################
    output_words = evaluate(encoder, decoder, searcher, voc, input_sentence) # Evaluate sentence
    output_words[:] = [x for x in output_words if not (x == 'EOS' or x == 'PAD')]
    output_words = ''.join(output_words)
    if prints:
        print("Generalized Recommendation: "+output_words+"\nSpecificalizing...")
    recommendation = specificalization(output_words, var_dict) # adjust recommendation to user
    return recommendation
    
# chat with the chatbot (input from user)
def chat(prints=False):
    input_sentence = ''
    while(1):
        try:
            # Get input sentence
            input_sentence = input('> ')
            # Check if it is quit case
            if input_sentence == 'q' or input_sentence == 'quit': break
                
            recommendation = get_recommendation(input_sentence, prints)
            print('Bot: ', recommendation)

        except KeyError:
            print("Error: Encountered unknown word or masking failed.")

#### DEMO
let's see some example recommendations:

In [15]:
######### DEMO - TODO: add good examples

You can <b>try it out</b> yourself, "chat" with the chatbot. Insert your cell of code and get a next-line recommendation:

<i>Note: call chat(True) to see the entire process or chat() to just get a recommendation</i>

In [17]:
chat(True)

> df = pd.read_csv('path.csv')
Getting workflow stage for input cell...
Worklfow stage is:Load
Setting model accordingly...
Encoder, Decoder, Voc set to Load.
Getting Masked summed representation...
Masked cell: var0=pandas.read_csv 
Getting next-line recommendation from model...
Generalized Recommendation: var1=pandas.read_csv
Specificalizing...
['var1=pandas.read_csv']


TypeError: unsupported operand type(s) for +: 'NoneType' and 'str'