In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from transformers import DistilBertModel, DistilBertTokenizer
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np
import pptx

# Load PowerPoint data
def extract_text_from_pptx(pptx_file):
    text = []
    prs = pptx.Presentation(pptx_file)
    for slide in prs.slides:
        slide_text = []
        for shape in slide.shapes:
            if hasattr(shape, "text"):
                slide_text.append(shape.text)
        text.append("\n".join(slide_text))
    return text

# Preprocess the slides
def preprocess_text(text):
    tokens = text.split()
    tokens = [word for word in tokens if len(word) > 1]
    preprocessed_text = ' '.join(tokens)
    return preprocessed_text

def train_summarizer(pptx_filepath):
    slides_text = extract_text_from_pptx(pptx_filepath)
    preprocessed_slides = [preprocess_text(text) for text in slides_text]

    model_name = "distilbert-base-uncased"
    tokenizer = DistilBertTokenizer.from_pretrained(model_name)
    distilbert = DistilBertModel.from_pretrained(model_name)

    input_ids = tokenizer.batch_encode_plus(preprocessed_slides, padding=True, return_tensors="pt")["input_ids"]

    class DistilBertSummarizer(nn.Module):
        def __init__(self, distilbert):
            super(DistilBertSummarizer, self).__init__()
            self.distilbert = distilbert
            self.fc = nn.Linear(distilbert.config.hidden_size, 1)
            self.sigmoid = nn.Sigmoid()
        
        def forward(self, input_ids):
            outputs = self.distilbert(input_ids)[0]
            cls_output = outputs[:, 0, :]
            scores = self.fc(cls_output)
            return self.sigmoid(scores)

    num_epochs = 5
    learning_rate = 1e-5

    model = DistilBertSummarizer(distilbert)
    criterion = nn.BCELoss()
    optimizer = optim.AdamW(model.parameters(), lr=learning_rate)

    labels = torch.tensor([1] * (len(slides_text) // 2) + [0] * (len(slides_text) - len(slides_text) // 2), dtype=torch.float32).unsqueeze(1)

    for epoch in range(num_epochs):
        outputs = model(input_ids)
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

    print('Training finished!')

    with torch.no_grad():
        outputs = model(input_ids)
        predicted_labels = torch.round(outputs).squeeze().tolist()

    final_summary = ""
    for slide_text, label in zip(slides_text, predicted_labels):
        if label == 1:
            final_summary += slide_text + "\n"

    return final_summary, slides_text


In [2]:
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation
import pptx

def calculate_slide_importance(pptx_filepath):
    def extract_text_from_pptx(pptx_file):
        text = []
        prs = pptx.Presentation(pptx_file)
        for slide in prs.slides:
            slide_text = []
            for shape in slide.shapes:
                if hasattr(shape, "text"):
                    slide_text.append(shape.text)
            text.append("\n".join(slide_text))
        return text

    def calculate_importance_lda(slides):
        vectorizer = CountVectorizer(max_df=0.8, min_df=2, stop_words='english')
        X = vectorizer.fit_transform(slides)
        lda_model = LatentDirichletAllocation(n_components=2, random_state=42)
        lda_output = lda_model.fit_transform(X)
        importance_scores = np.sum(lda_output, axis=1)
        return importance_scores

    def calculate_importance_keywords(slides, keywords):
        importance_scores = []
        for slide in slides:
            score = sum(1 for keyword in keywords if keyword.lower() in slide.lower())
            importance_scores.append(score)
        return importance_scores

    slides_text = extract_text_from_pptx(pptx_filepath)
    keywords = ["Equation", "Formula", "Calculation", "Derivation", "Theorem", "Law", "Principle", "Axiom", "Postulate", "Application", "Example", "Illustration", "Use case", "Method", "Procedure", "Algorithm", "Definition", "Explanation", "Overview", "Introduction", "Analysis", "Comparison", "Limitation", "Challenge", "Result", "Conclusion", "Observation", "Summary"]
    importance_scores_lda = calculate_importance_lda(slides_text)
    importance_scores_keywords = calculate_importance_keywords(slides_text, keywords)
    hybrid_importance_scores = np.add(importance_scores_lda, importance_scores_keywords)
    
    slide_importance = {i+1: importance for i, importance in enumerate(hybrid_importance_scores)}
    return slide_importance


In [3]:
import spacy
from spacy.matcher import PhraseMatcher

context_terms = {
    "renewable_energy": ["renewable energy", "solar energy", "wind energy", "hydropower", "biomass energy", "photovoltaic cells",
                         "solar thermal systems", "wind turbines", "wind farms", "hydropower plants", "tidal energy"],
    "finance": ["finance", "investment", "stock market", "portfolio", "dividend", "equity", "asset", "bond", "capital", "risk management"],
    "technology": ["technology", "innovation", "digital", "automation", "artificial intelligence", "machine learning", "internet of things", "cybersecurity", "blockchain", "cloud computing"],
    "healthcare": ["healthcare", "medical", "hospital", "doctor", "patient", "medicine", "treatment", "vaccine", "pandemic", "health insurance"],
    "education": ["education", "school", "teacher", "student", "university", "college", "learning", "curriculum", "online education", "e-learning"],
    "environment": ["environment", "climate change", "sustainability", "pollution", "conservation", "biodiversity", "carbon footprint", "global warming", "greenhouse gas", "natural resource"],
    "business": ["business", "company", "entrepreneur", "startup", "management", "strategy", "marketing", "sales", "customer", "productivity"],
    "travel": ["travel", "tourism", "destination", "hotel", "flight", "vacation", "holiday", "adventure", "sightseeing", "cultural"],
    "food": ["food", "nutrition", "diet", "restaurant", "cooking", "recipe", "organic", "vegan", "healthy eating", "culinary"],
    "art": ["art", "artist", "painting", "sculpture", "gallery", "music", "performance", "creative", "design", "culture"],
    "sports": ["sports", "athlete", "game", "competition", "fitness", "exercise", "team", "coach", "training", "championship"],
    "politics": ["politics", "government", "election", "democracy", "policy", "law", "political party", "voting", "legislation", "diplomacy"],
    "science": ["science", "research", "experiment", "discovery", "scientific", "theory", "scientist", "laboratory", "data", "analysis"],
    "history": ["history", "historical", "civilization", "archaeology", "ancient", "event", "documentary", "artifact", "heritage", "culture"],
    "fashion": ["fashion", "style", "clothing", "designer", "trend", "couture", "brand", "runway", "model", "accessory"],
    "literature": ["literature", "book", "author", "novel", "poetry", "fiction", "literary", "reading", "classic", "writing"],
    "architecture": ["architecture", "building", "design", "structure", "urban", "construction", "architect", "skyscraper", "landmark", "modern"],
    "space": ["space", "astronomy", "galaxy", "planet", "exploration", "cosmos", "universe"]
}

def identify_context(text):
    context = None
    for ctx, terms in context_terms.items():
        for term in terms:
            if term in text:
                context = ctx
                break
        if context:
            break
    return context

def extract_entities(text, context):
    nlp = spacy.load("en_core_web_lg")
    entities = set()
    
    if context:
        matcher = PhraseMatcher(nlp.vocab)
        pattern_docs = [nlp(term) for term in context_terms[context]]
        matcher.add("ContextTerms", pattern_docs)

        doc = nlp(text)
        matches = matcher(doc)
        entities = {doc[start:end].text for match_id, start, end in matches}
    
    return list(entities)

def link_concepts(text, context):
    entities = extract_entities(text, context)
    return entities


In [5]:
import nltk
import numpy as np
from pptx import Presentation

class SubjectiveTest:
    def __init__(self, data):
        self.question_pattern = [
            "Explain in detail ",
            "Define ",
            "Write a short note on ",
            "What do you mean by "
        ]
        self.grammar = r"""
            CHUNK: {<NN>+<IN|DT>*<NN>+}
            {<NN>+<IN|DT>*<NNP>+}
            {<NNP>+<NNS>*}
        """
        self.summary = data

    @staticmethod
    def word_tokenizer(sequence):
        word_tokens = []
        for sent in nltk.sent_tokenize(sequence):
            for w in nltk.word_tokenize(sent):
                word_tokens.append(w)
        return word_tokens

    def generate_test(self, no_of_questions=5):
        sentences = nltk.sent_tokenize(self.summary)
        cp = nltk.RegexpParser(self.grammar)
        question_answer_dict = dict()
        for sentence in sentences:
            tagged_words = nltk.pos_tag(nltk.word_tokenize(sentence))
            tree = cp.parse(tagged_words)
            for subtree in tree.subtrees():
                if subtree.label() == "CHUNK":
                    temp = ""
                    for sub in subtree:
                        temp += sub[0]
                        temp += " "
                    temp = temp.strip()
                    temp = temp.upper()
                    if temp not in question_answer_dict:
                        if len(nltk.word_tokenize(sentence)) > 20:
                            question_answer_dict[temp] = sentence
                    else:
                        question_answer_dict[temp] += sentence
        keyword_list = list(question_answer_dict.keys())
        question_answer = []
        for _ in range(int(no_of_questions)):
            rand_num = np.random.randint(0, len(keyword_list))
            selected_key = keyword_list[rand_num]
            rand_num %= 4
            question = self.question_pattern[rand_num] + selected_key + "."
            question_answer.append(question)
        return question_answer

def extract_text_from_ppt(ppt_file):
    prs = Presentation(ppt_file)
    text = ""
    for slide in prs.slides:
        for shape in slide.shapes:
            if hasattr(shape, "text"):
                text += shape.text + "\n"
    return text.strip()

def generate_subjective_test(pptx_filepath, no_of_questions=5):
    lecture_text = extract_text_from_ppt(pptx_filepath)
    subjective_generator = SubjectiveTest(lecture_text)
    subjective_questions = subjective_generator.generate_test(no_of_questions)
    return subjective_questions



In [12]:
def main(pptx_filepath, n_quiz_questions=5):
    # Step 1: Generate summary
    summary, slides_text = train_summarizer(pptx_filepath)
    
    # Step 2: Calculate slide importance
    slide_importance = calculate_slide_importance(pptx_filepath)
    
    # Step 3: Identify context and link concepts
    context = identify_context(summary)
    linked_concepts = link_concepts(summary, context)
    
    # Step 4: Generate quiz questions
    quiz = generate_subjective_test(pptx_filepath, no_of_questions=n_quiz_questions)
    
    return {
        "summary": summary,
        "slide_importance": slide_importance,
        "context": context,
        "linked_concepts": linked_concepts,
        "quiz": quiz
    }

# Example usage
pptx_filepath = "/Users/vaishnavikamisetti/Desktop/money.pptx"
result = main(pptx_filepath)
for key, value in result.items():
    print(key + ":")
    print(value)
    print()




Epoch [1/5], Loss: 0.7130
Epoch [2/5], Loss: 0.6821
Epoch [3/5], Loss: 0.6668
Epoch [4/5], Loss: 0.6563
Epoch [5/5], Loss: 0.6469
Training finished!
summary:
What is Inflation ?

Inflation is a situation in which the prices of goods, services go on rising substantially and at a fast pace.

Inflation is a sustained increase in the general price level of goods/services in an economy over a period of time.

An increase in the average price level of all products in an economy.

There is no unanimity among economist regarding the origin, causes and effects of inflation.
Inflation: Definitions

Inflation as a "state in which the value of money is falling, i.e., the prices are rising“. 
Crowther
“Inflation is a persistent and considerable rise in the general level of prices.“
Gardner Ackley
Inflation is a situation of "too much money chasing too few goods". 
Coulborn
1. Creeping Inflation
Creeping or mild inflation is when prices rise 3 percent a year or less.

According to the Federal Reserv