In [1]:
import random
import string
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity



In [2]:
from google.colab import drive
drive.mount('/content/gdrive/')

Mounted at /content/gdrive/


In [3]:
CORPUS_PATH=\
'gdrive/MyDrive/Colab Notebooks/MSDS453/FinalProject/parks_clean.csv'
national_parks_corpus = pd.read_csv(CORPUS_PATH)
national_parks_corpus.head()

Unnamed: 0,park,text,char_length,token_lengths
0,Acadia National Park,Acadia National Park is an American national p...,46957,7341
1,Arches National Park,Arches National Park is a national park in eas...,16175,2536
2,Badlands National Park,Badlands National Park (Lakota: Makȟóšiča) is ...,19424,3049
3,Big Bend National Park,Big Bend National Park is an American national...,23315,3722
4,Biscayne National Park,Biscayne National Park is an American national...,49014,7817


In [14]:
import time
import warnings
warnings.filterwarnings('ignore')

## Import NLTK Packages

In [4]:
# !pip install nltk
import nltk
from nltk.corpus import stopwords
# Only run this once, they will be downloaded.
nltk.download('stopwords',quiet=True)
nltk.download('wordnet',quiet=True)
nltk.download('punkt',quiet=True)
nltk.download('omw-1.4',quiet=True)

True

## Preprocessing the Raw Text

In [5]:
national_parks_corpus['lower_text'] = national_parks_corpus['text'].str.lower()
national_parks_corpus.head()

Unnamed: 0,park,text,char_length,token_lengths,lower_text
0,Acadia National Park,Acadia National Park is an American national p...,46957,7341,acadia national park is an american national p...
1,Arches National Park,Arches National Park is a national park in eas...,16175,2536,arches national park is a national park in eas...
2,Badlands National Park,Badlands National Park (Lakota: Makȟóšiča) is ...,19424,3049,badlands national park (lakota: makȟóšiča) is ...
3,Big Bend National Park,Big Bend National Park is an American national...,23315,3722,big bend national park is an american national...
4,Biscayne National Park,Biscayne National Park is an American national...,49014,7817,biscayne national park is an american national...


In [6]:
national_parks_corpus['sent_tokens'] = national_parks_corpus['lower_text'].apply(nltk.sent_tokenize)
national_parks_corpus['word_tokens'] = national_parks_corpus['lower_text'].apply(nltk.word_tokenize)
national_parks_corpus.head()

Unnamed: 0,park,text,char_length,token_lengths,lower_text,sent_tokens,word_tokens
0,Acadia National Park,Acadia National Park is an American national p...,46957,7341,acadia national park is an american national p...,[acadia national park is an american national ...,"[acadia, national, park, is, an, american, nat..."
1,Arches National Park,Arches National Park is a national park in eas...,16175,2536,arches national park is a national park in eas...,[arches national park is a national park in ea...,"[arches, national, park, is, a, national, park..."
2,Badlands National Park,Badlands National Park (Lakota: Makȟóšiča) is ...,19424,3049,badlands national park (lakota: makȟóšiča) is ...,[badlands national park (lakota: makȟóšiča) is...,"[badlands, national, park, (, lakota, :, makȟó..."
3,Big Bend National Park,Big Bend National Park is an American national...,23315,3722,big bend national park is an american national...,[big bend national park is an american nationa...,"[big, bend, national, park, is, an, american, ..."
4,Biscayne National Park,Biscayne National Park is an American national...,49014,7817,biscayne national park is an american national...,[biscayne national park is an american nationa...,"[biscayne, national, park, is, an, american, n..."


In [7]:
sent_tokens = [item for sublist in national_parks_corpus['sent_tokens'].tolist() for item in sublist]
word_tokens = [item for sublist in national_parks_corpus['word_tokens'].tolist() for item in sublist]

In [8]:
lemmer = nltk.stem.WordNetLemmatizer()
def LemTokens(tokens):
    return [lemmer.lemmatize(token) for token in tokens]
remove_punct_dict = dict((ord(punct), None) for punct in string.punctuation)
def LemNormalize(text):
  stop = set(stopwords.words('english')+list(string.punctuation))
  text2 = ' '.join([i for i in nltk.word_tokenize(text.lower()) if i not in stop])
  return LemTokens(nltk.word_tokenize(text2.lower().translate(remove_punct_dict)))




## Keyword Matching for Greetings

In [9]:
GREETING_INPUTS = ("hello", "hi", "greetings", "sup", "what's up", "hey")
GREETING_RESPONSES = ["hi", "hey", "*nods*", "hi there", "hello", "I am glad! You are talking to me"]

def greeting(sentence):
  for word in sentence.split():
    if word.lower() in GREETING_INPUTS:
      return random.choice(GREETING_RESPONSES)

## Generating a Response

In [10]:
def response(user_response):
    robo_response=''
    TfidfVec = TfidfVectorizer(tokenizer=LemNormalize, stop_words='english')
    tfidf = TfidfVec.fit_transform(sent_tokens)
    vals = cosine_similarity(tfidf[-1], tfidf)
    idx=vals.argsort()[0][-2]
    flat = vals.flatten()
    flat.sort()
    req_tfidf = flat[-2]
    if(req_tfidf==0):
        robo_response=robo_response+"I am sorry! I don't understand you"
        return robo_response
    else:
        robo_response = robo_response+sent_tokens[idx]
        return robo_response


In [16]:
flag = True
print("NPSChat: My name is NPSChat. I will answer your queries about US National Parks. If you want to exit, type 'Bye'!")
times = []
while(flag==True):
    user_response = input()
    start = time.time()
    user_response=user_response.lower()
    if(user_response!='bye'):
        if(user_response=='thanks' or user_response=='thank you' ):
            flag=False
            print("NPSChat: You are welcome..")
        else:
            if(greeting(user_response)!=None):
                print("NPSChat: "+greeting(user_response))
            else:
                sent_tokens.append(user_response)
                word_tokens=word_tokens+nltk.word_tokenize(user_response)
                final_words=list(set(word_tokens))
                print("NPSChat: ",end="")
                print(response(user_response))
                end = time.time()
                times.append(end-start)
                sent_tokens.remove(user_response)
    else:
        flag=False
        print("NPSChat: Bye! take care..")
        print(f"\nAverage response time: {round(np.mean(times),2)} seconds")

NPSChat: My name is NPSChat. I will answer your queries about US National Parks. If you want to exit, type 'Bye'!
When was Yellowstone National Park established?
NPSChat: the yellowstone national park archives maintain collections of historical records of yellowstone and the national park service.
What factors make the Grand Canyon unique?
NPSChat: grand canyon association.
What activities are available in SHenandoah National Park?
NPSChat: the results of long's residency were featured in the photography exhibit "wild beauty: the artful nature of shenandoah national park" held at the looking glass art gallery in the historic hawley silk mill, in hawley, pennsylvania.list of amphibians of shenandoah national parklist of birds of shenandoah national parklist of national parks of the united statesofficial website  of the national park serviceshenandoah national park— when past is present: archaeology of the displaced in shenandoah national parknasa earth observatory satellite images of sh