In [None]:
import sys
import wikipedia
import wikipedia.exceptions as wiki_exceptions
import nltk
from nltk.tokenize import sent_tokenize
import re
import spacy

nlp = spacy.load("en_core_web_sm")


class QA_System:
    def __init__(self, logfile):
        self.logfile = logfile

    def run(self):
        print("*** This is a QA system. I will try to answer questions that start with Who, What, When, and Where. Type Exit to quit. ***")
        while True:
            question = input("Question: ").strip().lower()
            if question == 'exit':
                print("Thank you, Goodbye.")
                break
            answer = self.answer_question(question)
            print(answer)
            self.log_question(question, answer)

    def answer_question(self, question):
        doc = nlp(question)
        question_type = self.identify_question_type(question)

        if question_type is None:
            return "Sorry, I do not know the answer."

        refined_query = self.extract_context(question)
        if not refined_query:
            refined_query = self.extract_dynamic_entity(doc, question_type)

        if refined_query:
            print(f"Attempting to search Wikipedia for: {refined_query}")
            return self.search_wikipedia(refined_query, question_type, question)
        else:
            return "Sorry, I couldn't extract relevant information from your question."

    def identify_question_type(self, question):
        question_lower = question.lower()
        if question_lower.startswith("who"):
            return "Who"
        elif question_lower.startswith("what"):
            return "What"
        elif question_lower.startswith("where"):
            return "Where"
        elif question_lower.startswith("when"):
            return "When"
        return None

    def extract_context(self, question):
        patterns = [
            r'Where (?:is|was|are|did) (.+)',
            r'Who (?:is|was|are) (.+)',
            r'What (?:is|was) (.+)',
            r'When (?:is|was) (.+) born',
            r'When (?:is|was) (.+) birthday',
            r'When did (.+)'
        ]
        for pattern in patterns:
            match = re.match(pattern, question, re.IGNORECASE)
            if match:
                return match.group(1).strip()
        return None

    def extract_dynamic_entity(self, doc, question_type):
        entities = [ent.text for ent in doc.ents if ent.label_ in {"PERSON", "ORG", "GPE", "DATE"}]
        if entities:
            return " ".join(entities)
        return re.sub(r'[^a-zA-Z0-9\s]', '', doc.text).strip().lower()

    def search_wikipedia(self, query, question_type, question):
        try:
            search_results = wikipedia.search(query)
            if not search_results:
                return "I am sorry, I couldn't find relevant Wikipedia pages."

            summary = wikipedia.summary(search_results[0], sentences=5)
            meaningful_summary = self.summarize_text(summary, question_type, query)

            return meaningful_summary if meaningful_summary else "I am sorry, I don't know the answer."

        except wiki_exceptions.DisambiguationError:
            return "I am sorry, I don't know the answer (disambiguation error)."
        except wiki_exceptions.PageError:
            return "I am sorry, I couldn't find the page."
        except Exception as e:
            return f"An error occurred while searching: {e}"

    def summarize_text(self, text, question_type, query):
        sentences = sent_tokenize(text)
        results = []

        if question_type == "Who":
            for sentence in sentences:
                if query.lower() in sentence.lower() and ("is" in sentence or "was" in sentence):
                    results.append(sentence)
            if not results:
                results.append(sentences[0])
            return " ".join(results[:2])

        elif question_type == "What":
            return " ".join(sentences[:1])

        elif question_type == "When":
            clean_name = self.clean_display_name(query)
            doc = nlp(query)
            is_person = any(ent.label_ == "PERSON" for ent in doc.ents)
            if is_person:
                birth_match = re.search(r'\b([A-Z][a-z]+ \d{1,2}(?:, \d{4})?)', text)
                if birth_match:
                    name = query.title()
                    date = birth_match.group(1)
                    return f"{name} was born on {date}."
            else:
                match = re.search(
                r'(?:started|began|occurred|took place|was fought|was held|broke out|commenced)(?: in| on)? ([A-Z][a-z]+ \d{1,2}, \d{4}|\d{4})', text)
                if match:
                    date = match.group(1)
                    return f"{clean_name} began in {date}."
                date_match = re.search(r'\b(?:in )?(\d{4})\b', text)
                if date_match:
                    return f"{clean_name} occurred in {date_match.group(1)}."
            return "Date or time information not found."

        elif question_type == "Where":
            for sentence in sentences:
                if "GPE" in [ent.label_ for ent in nlp(sentence).ents]:
                    results.append(sentence)
            if results:
                return " ".join(results[:1])
            else:
                return "Location information not found."

        return "I am sorry, I don't know the answer."

    def log_question(self, question, answer):
        with open(self.logfile, 'a', encoding='utf-8') as log:
            log.write(f"Question: {question}\n")
            log.write(f"Answer: {answer}\n\n")
    def clean_display_name(self, query):
        query = re.sub(r'\b(start(ed)?|begin|began|occur(red)?|happen(ed)?|was|did|when)\b', '', query, flags=re.IGNORECASE)
        query = re.sub(r'\s+', ' ', query).strip("? ").strip()
        return query.title()


def main():
    log_filename = input("Enter the name of the log file: ").strip()
    try:
        qa_system = QA_System(log_filename)
        qa_system.run()
    except Exception as ex:
        print(ex)
    finally:
        print("Logging Complete")

In [None]:
if __name__ == "__main__":
    main()