In [1]:
import re
import math
from collections import defaultdict, Counter

# ---------------------------
# 0) Toy training dataset
# ---------------------------
TRAIN = {
    "greet": [
        "hi", "hello", "hey", "good morning", "good evening"
    ],
    "goodbye": [
        "bye", "goodbye", "see you", "catch you later"
    ],
    "smalltalk": [
        "how are you", "what's up", "how's it going", "how are things"
    ],
    "check_weather": [
        "what's the weather", "weather today", "will it rain", "forecast for tomorrow",
        "how hot is it", "temperature in bengaluru"
    ],
    "book_flight": [
        "book a flight", "i need to fly", "can you book tickets", "flight to delhi",
        "i want to go to mumbai tomorrow", "get me a flight from bengaluru to paris"
    ],
}

# Small gazetteer for cities
CITIES = {"bengaluru", "bangalore", "mumbai", "delhi", "paris", "london", "hyderabad", "chennai", "kolkata"}

# ---------------------------
# 1) Preprocessing
# ---------------------------
PUNCT_RE = re.compile(r"[^a-z0-9\s]")

def tokenize(text: str):
    text = text.lower().strip()
    text = PUNCT_RE.sub("", text)
    return [t for t in text.split() if t]

# ---------------------------
# 2) Build vocabulary + Naive Bayes
# ---------------------------
class NaiveBayesIntents:
    def __init__(self):
        self.priors = {}               # P(intent)
        self.likelihoods = {}          # P(word | intent)
        self.vocab = set()
        self.word_counts = {}          # per-intent word counts
        self.total_words = {}          # per-intent total tokens
        self.intents = []

    def fit(self, data):
        self.intents = list(data.keys())
        doc_counts = {intent: 0 for intent in self.intents}
        self.word_counts = {intent: Counter() for intent in self.intents}
        self.total_words = {intent: 0 for intent in self.intents}

        total_docs = 0
        for intent, examples in data.items():
            for ex in examples:
                total_docs += 1
                doc_counts[intent] += 1
                for w in tokenize(ex):
                    self.vocab.add(w)
                    self.word_counts[intent][w] += 1
                    self.total_words[intent] += 1

        # priors
        self.priors = {intent: doc_counts[intent] / total_docs for intent in self.intents}

        # likelihoods with Laplace smoothing
        V = len(self.vocab)
        self.likelihoods = {intent: {} for intent in self.intents}
        for intent in self.intents:
            denom = self.total_words[intent] + V
            for w in self.vocab:
                self.likelihoods[intent][w] = (self.word_counts[intent][w] + 1) / denom

    def predict(self, text: str):
        words = tokenize(text)
        # compute argmax_intent log P(intent) + sum log P(w | intent)
        best_intent, best_score = None, -1e18
        for intent in self.intents:
            score = math.log(self.priors[intent] + 1e-12)  # guard tiny
            for w in words:
                if w in self.vocab:
                    score += math.log(self.likelihoods[intent][w])
                else:
                    # unseen word: pretend uniform smoothing
                    score += math.log(1 / (self.total_words[intent] + len(self.vocab)))
            if score > best_score:
                best_score, best_intent = score, intent
        return best_intent, best_score

# ---------------------------
# 3) Entity extraction
# ---------------------------
DATE_REGEXES = [
    (re.compile(r"\b(today|tomorrow|day after tomorrow)\b"), "relative_day"),
    (re.compile(r"\b(monday|tuesday|wednesday|thursday|friday|saturday|sunday)\b"), "weekday"),
    (re.compile(r"\b(\d{1,2}[/\-]\d{1,2}([/\-]\d{2,4})?)\b"), "date_numeric"),  # 12/11 or 12-11-2025
    (re.compile(r"\b(\d{1,2}:\d{2})\b"), "time_hhmm")
]

def extract_cities(text):
    toks = tokenize(text)
    found = [t for t in toks if t in CITIES]
    # Convert common alias
    found = ["bengaluru" if f == "bangalore" else f for f in found]
    return list(dict.fromkeys(found))  # unique, in order

def extract_dates(text):
    hits = []
    lower = text.lower()
    for rx, kind in DATE_REGEXES:
        for m in rx.finditer(lower):
            hits.append((kind, m.group(0)))
    return hits

def extract_entities(text):
    entities = {
        "cities": extract_cities(text),   # e.g. ["bengaluru","paris"]
        "dates": extract_dates(text)      # list of (kind, value)
    }
    # Simple "from ... to ..." heuristic
    m = re.search(r"\bfrom ([a-z\s]+?) to ([a-z\s]+)\b", text.lower())
    if m:
        entities["from_city_guess"] = m.group(1).strip().split()[-1]
        entities["to_city_guess"] = m.group(2).strip().split()[-1]
    return entities

# ---------------------------
# 4) Dialogue Manager (finite-state)
# ---------------------------
class DialogueManager:
    def __init__(self):
        self.reset()

    def reset(self):
        self.state = "READY"
        self.slots = {"from_city": None, "to_city": None, "date": None}

    def is_booking_complete(self):
        return all(self.slots.values())

    def update_with_entities(self, entities):
        # map extracted cities into slots if possible
        cities = entities.get("cities", [])
        if "from_city_guess" in entities:
            guess = entities["from_city_guess"]
            if guess in CITIES:
                self.slots["from_city"] = self.slots["from_city"] or ("bengaluru" if guess == "bangalore" else guess)
        if "to_city_guess" in entities:
            guess = entities["to_city_guess"]
            if guess in CITIES:
                self.slots["to_city"] = self.slots["to_city"] or ("bengaluru" if guess == "bangalore" else guess)

        # if two distinct cities found and not yet assigned
        if len(cities) >= 2:
            if not self.slots["from_city"]:
                self.slots["from_city"] = cities[0]
            if not self.slots["to_city"]:
                # pick the next distinct city
                for c in cities[1:]:
                    if c != self.slots["from_city"]:
                        self.slots["to_city"] = c
                        break
        elif len(cities) == 1:
            # if one city only and "to" appears in text, treat as destination
            self.slots["to_city"] = self.slots["to_city"] or cities[0]

        # date: take first match as the date signal
        dates = entities.get("dates", [])
        if dates and not self.slots["date"]:
            self.slots["date"] = dates[0][1]

    def next_action(self, intent, text, entities):
        # Any time user says goodbye, reset.
        if intent == "goodbye":
            self.reset()
            return "utter_goodbye", {}

        if intent == "greet" and self.state == "READY":
            return "utter_greet", {}

        if intent == "smalltalk":
            return "utter_smalltalk", {}

        if intent == "check_weather":
            # use first city if present, else default
            city = (entities.get("cities") or ["bengaluru"])[0]
            day = "today"
            if entities.get("dates"):
                day = entities["dates"][0][1]
            return "action_weather", {"city": city, "day": day}

        if intent == "book_flight":
            self.state = "BOOKING"
            self.update_with_entities(entities)
            if not self.slots["from_city"]:
                return "ask_from_city", {}
            if not self.slots["to_city"]:
                return "ask_to_city", {}
            if not self.slots["date"]:
                return "ask_date", {}
            return "action_book_flight", dict(self.slots)

        # When we’re in BOOKING, even if user says something general, treat as slot-filling
        if self.state == "BOOKING":
            self.update_with_entities(entities)
            # Try to infer slot from simple patterns
            if not self.slots["from_city"]:
                # look for "from X"
                m = re.search(r"\bfrom ([a-z]+)\b", text.lower())
                if m and m.group(1) in CITIES:
                    self.slots["from_city"] = "bengaluru" if m.group(1) == "bangalore" else m.group(1)
                if not self.slots["from_city"]:
                    return "ask_from_city", {}
            if not self.slots["to_city"]:
                m = re.search(r"\bto ([a-z]+)\b", text.lower())
                if m and m.group(1) in CITIES:
                    self.slots["to_city"] = "bengaluru" if m.group(1) == "bangalore" else m.group(1)
                if not self.slots["to_city"]:
                    return "ask_to_city", {}
            if not self.slots["date"]:
                if entities.get("dates"):
                    self.slots["date"] = entities["dates"][0][1]
                if not self.slots["date"]:
                    return "ask_date", {}
            # complete
            return "action_book_flight", dict(self.slots)

        # fallback
        return "utter_fallback", {}

# ---------------------------
# 5) NLG (templated responses)
# ---------------------------
def nlg(action, params):
    if action == "utter_greet":
        return "Hi! How can I help you today?"
    if action == "utter_goodbye":
        return "Goodbye! Have a great day."
    if action == "utter_smalltalk":
        return "I'm doing great — thanks for asking! How can I assist you?"
    if action == "ask_from_city":
        return "From which city are you flying?"
    if action == "ask_to_city":
        return "To which city would you like to go?"
    if action == "ask_date":
        return "On which date would you like to travel (e.g., tomorrow, 12/11, Monday)?"
    if action == "action_weather":
        city = params.get("city", "bengaluru").title()
        day = params.get("day", "today")
        return f"The weather in {city} {day} looks pleasant with mild temperatures."
    if action == "action_book_flight":
        fc = params["from_city"].title()
        tc = params["to_city"].title()
        dt = params["date"]
        return f"Done! I’ve (hypothetically) booked your flight from {fc} to {tc} on {dt}. ✈️"
    if action == "utter_fallback":
        return "Sorry, I didn’t quite get that. Could you rephrase?"
    return "..."

# ---------------------------
# 6) Wire it up: the chat loop
# ---------------------------
def main():
    clf = NaiveBayesIntents()
    clf.fit(TRAIN)
    dm = DialogueManager()

    print("Bot: Hello! (type 'quit' to exit)")
    while True:
        user = input("You: ").strip()
        if not user:
            continue
        if user.lower() in {"quit", "exit"}:
            print("Bot: Bye!")
            break

        intent, _ = clf.predict(user)
        entities = extract_entities(user)
        action, params = dm.next_action(intent, user, entities)
        reply = nlg(action, params)
        print("Bot:", reply)

if __name__ == "__main__":
    main()


Bot: Hello! (type 'quit' to exit)


Bot: Hi! How can I help you today?
Bot: Bye!
