In [None]:
 import nltk
 nltk.download('punkt')
 nltk.download('stopwords')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [None]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize
from heapq import nlargest

# Redefined document collection with descriptions on world economics
document_collection = {
    1: "Global Economic Outlook (2023): This document provides a comprehensive overview of the global economic outlook for the year 2023, discussing economic trends, growth prospects, and potential challenges that the world economy is expected to face in the upcoming year.",
    2: "Inflation's Impact on Trade and Investments: This document explores the intricate relationship between inflation and its influence on international trade and investments. It delves into the effects of inflation on exchange rates, trade policies, and investment decisions.",
    3: "Emerging Market Economies in Global Finance: Focusing on the significance of emerging market economies, this document examines their role in the global financial system. It analyzes their growth, contributions, and the impact they have on the interconnected world of finance.",
    4: "Central Banks and Economic Stability: This document emphasizes the critical role played by central banks in stabilizing world economies. It discusses central banks' monetary policies, their influence on interest rates, and the tools they employ to ensure economic stability.",
    5: "Trade Wars' Disruption of Supply Chains: Exploring the ramifications of trade wars on a global scale, this document investigates how trade conflicts disrupt and alter global supply chains. It scrutinizes the evolving landscape of trade dynamics and its consequences on the world economy.",
    6: "Sustainable Development and Green Finance: Focusing on the increasing importance of sustainable development, this document delves into the concepts of green finance and its relevance in the global economy. It spotlights environmentally responsible financial practices.",
    7: "Lessons from the 2008 Global Economic Crisis: This document revisits the 2008 global economic crisis, extracting valuable lessons learned from that financial meltdown. It places emphasis on the regulatory changes, risk management strategies, and measures put in place to prevent future crises.",
    8: "Cryptocurrencies' Impact on Finance: The document explores the disruptive influence of cryptocurrencies in the financial sector. It delves into the challenges they pose to traditional financial systems and the potential transformations they bring to the financial landscape.",
    9: "Belt and Road Initiative and Global Infrastructure: Shining a spotlight on China's Belt and Road Initiative, this document elucidates this vast global infrastructure development project. It scrutinizes its implications for world economics and its role in shaping global geopolitics.",
    10: "The Eurozone Crisis and the Future of the EU: This document delves." }



# Build an inverted index for words in the documents
inverted_index = {}
for doc_id, doc_text in document_collection.items():
    for word in word_tokenize(doc_text.lower()):
        if word not in inverted_index:
            inverted_index[word] = []
        inverted_index[word].append(doc_id)

def retrieve_documents(query):
    relevant_documents = set()

    # IR technique: Use the inverted index to find relevant documents
    for term in query.lower().split():
        if term in inverted_index:
            relevant_documents.update(inverted_index[term])

    return [document_collection[doc_id] for doc_id in relevant_documents]

def summarize(text, n):
    stop_words = set(stopwords.words('english'))
    word_freq = {}
    for word in word_tokenize(text.lower()):
        if word not in stop_words:
            if word not in word_freq:
                word_freq[word] = 1
            else:
                word_freq[word] += 1

    max_freq = max(word_freq.values())
    for word in word_freq:
        word_freq[word] = (word_freq[word] / max_freq)

    sent_list = sent_tokenize(text)
    sent_score = {}
    for sent in sent_list:
        for word in word_tokenize(sent.lower()):
            if word in word_freq:
                if len(sent.split(' ')) < 30:
                    if sent not in sent_score:
                        sent_score[sent] = word_freq[word]
                    else:
                        sent_score[sent] += word_freq[word]

    summary_sents = nlargest(n, sent_score, key=sent_score.get)
    summary = ' '.join(summary_sents)
    return summary

query = input('Enter a query: ')
retrieved_docs = retrieve_documents(query)

if retrieved_docs:
    # Summarize the retrieved documents
    summarized_text = " ".join(retrieved_docs)
    summary = summarize(summarized_text, 3)
    print("Summary based on the retrieved documents:")
    print(summary)
else:
    print("No relevant documents found for the query.")


Enter a query: inflation
Summary based on the retrieved documents:
Inflation's Impact on Trade and Investments: This document explores the intricate relationship between inflation and its influence on international trade and investments. It delves into the effects of inflation on exchange rates, trade policies, and investment decisions.
