In [1]:
# -*- coding: utf-8 -*-
"""
Created on Tue Jan 29 11:30:07 2019

@author: Vaishnavi Killekar

This file performs all the background processing.
The model is trained with the dataset.
All user queries are processed here and a response is generated which is sent to the controller.
"""

class Balaram:

	
	def __init__(self):
		import pandas as pd
		import numpy as np
		
		global pd, np
		global X, y
		global X_train, X_test, y_train, y_test, y_pred
		global label_map
		
		print("Instance created successfully!")



	def setup_model(self):
		'''
		Load the dataset and prepare it to the train the model
		'''
		# Importing dataset and splitting into words and labels
		dataset = pd.read_csv('data-tags.csv')
		X = dataset.iloc[:, :-1].values
		y = dataset.iloc[:, 1].values
		X = X.reshape(628,)
		print("Dataset successfully loaded!")
		
		# Create a bag of words model for words
		from sklearn.feature_extraction.text import CountVectorizer
		cv = CountVectorizer(max_features=1500)
		X = cv.fit_transform(X.astype('U')).toarray()
		print("Bag of words created!")

		# Save CountVectorizer state
		import pickle
		filename = 'countVectorizer.sav'
		pickle.dump(cv, open(filename, 'wb'))
		print("CountVectorizer state saved!")
		
		# Encoding categorical data of labels
		from sklearn.preprocessing import LabelEncoder
		labelencoder_y = LabelEncoder()
		y = labelencoder_y.fit_transform(y.astype(str))
		print("Encoded the classes!")

		# Return a dict mapping labels to their integer values
		res = {}
		for cl in labelencoder_y.classes_:
			res.update({cl:labelencoder_y.transform([cl])[0]})
	    
		global label_map
		label_map = res
		print("Label mapping obtained!")
		
		# Fit the classifier to dataset
		from sklearn.naive_bayes import GaussianNB
		classifier = GaussianNB()
		classifier.fit(X, y)
		print("Model trained successfully!")
		
		# Save trained model state
		filename = 'classifier.sav'
		pickle.dump(classifier, open(filename, 'wb'))
		print("Trained model saved!")
	
	
	
	def get_entities(self, text):
		'''
		Extract entities from user text
		'''
		# Restore model state to make prediction
		import pickle
		load_cv = pickle.load(open('countVectorizer.sav', 'rb'))
		text = load_cv.transform(text).toarray()
# 		print("CV loaded!")

		load_classifier = pickle.load(open('classifier.sav', 'rb'))
		response_tags = load_classifier.predict(text)

		entity_list=[]
		for tag in response_tags:
			if tag in label_map.values():
				entity_list.append(list(label_map.keys())[list(label_map.values()).index(tag)])

		return entity_list
	
	
	
	def generateResponse(self, text):
		'''
		Generate response for user text
		'''
# 		print("Received: ", text)
		
		# Extract entities from text
		entities = self.get_entities(text);
		
# 		print(entities)
		
		# Mapping between tokens and entity tags
		token_entity_map=dict(zip(entities, text))
		print(token_entity_map)
		
		# Fetch data from database based on available information provided by user
		self.fetch_data(token_entity_map)

		
	def fetch_data(self, entity_tags):
		'''
		Fetch the data from database based on received entities
		'''
		
		if "GREET" in entity_tags:
			if "USR" and "INT" in entity_tags:
				print("Namaste, "+entity_tags.get("NAME")+"!")
			else:
				print("Namaste! I'm Balaram.")
				
		elif "WTR" and "TIME" and not "CROP" or "ADJ" in entity_tags:
			print("Weather for the asked day/week")
				
		elif "CROP" in entity_tags:
			if "CUL" in entity_tags:
				if "SOIL" in entity_tags:
					print("Crop can be Cultivated in mentioned soil(s)")
				else:
					print("Cultivation info along with season")
			
			elif "IRR" in entity_tags:
				print("Irrigation info for crop")
				
			elif "TIME" in entity_tags:
				print("Crop timing")
				
			elif "FTLZ" in entity_tags:
				print("Fertilizer for crop")
				
			elif "PEST" in entity_tags:
				print("Pesticide for crop")
				
			elif "RAIN" in  entity_tags:
				print("Rainfall info for crop")
				
			elif "SOIL" in entity_tags:
				print("Soil info for crop")
				
			elif "WTR" in  entity_tags:
				print("Crop weather info")
				
			elif "SOW" in entity_tags:
				print("Crop sowing info")
				
			elif "REAP" in entity_tags:
				print("Crop reaping info")

			elif "YLD" in entity_tags:
				print("Crop yield info")
				
			elif "SEED" and "TYPE" in entity_tags:
				print("Crop seed info")
				
			elif "COST" in entity_tags:
				print("Market price info of crop")		
				
			elif "QTY" and "MSR" or "SEED" in entity_tags:
				print("Seed density of crop")

In [None]:
ask = Balaram()

# Load data and train model
ask.setup_model()

print("Namaste! I am Balaram.")

def process_input(text):
	'''
	Process the user input i.e., remove all punctuation and special symbols.
	Tokenize the input for tagging
	'''
	# Strip text of all symbols
	import re
	text = re.sub('[^A-Za-z]', ' ', text)
	
	# Convert text to lower
	text = text.lower()
	
	# Tokenize text into individual words
	text = text.split()
	
	return text

while True:
	# Get user input
	text = input()
	
	# Process input
	text = process_input(text)
# 	print(text)
	
	# Response generation
	ask.generateResponse(text)

Instance created successfully!
Dataset successfully loaded!
Bag of words created!
CountVectorizer state saved!
Encoded the classes!
Label mapping obtained!
Model trained successfully!
Trained model saved!
Namaste! I am Balaram.
Hello
{'GREET': 'hello'}
Namaste! I'm Balaram.
Rainfall for rice?
{'RAIN': 'rainfall', 'SW': 'for', 'CROP': 'rice'}
Rainfall info for crop
Weather today?
{'WTR': 'weather', 'TIME': 'today'}
