In [7]:
# Install required packages
!pip3 install transformers sentence-transformers

Collecting transformers
  Downloading transformers-4.51.3-py3-none-any.whl.metadata (38 kB)
Collecting sentence-transformers
  Downloading sentence_transformers-4.1.0-py3-none-any.whl.metadata (13 kB)
Collecting filelock (from transformers)
  Downloading filelock-3.18.0-py3-none-any.whl.metadata (2.9 kB)
Collecting huggingface-hub<1.0,>=0.30.0 (from transformers)
  Downloading huggingface_hub-0.30.2-py3-none-any.whl.metadata (13 kB)
Collecting regex!=2019.12.17 (from transformers)
  Downloading regex-2024.11.6-cp313-cp313-macosx_11_0_arm64.whl.metadata (40 kB)
Collecting tokenizers<0.22,>=0.21 (from transformers)
  Downloading tokenizers-0.21.1-cp39-abi3-macosx_11_0_arm64.whl.metadata (6.8 kB)
Collecting safetensors>=0.4.3 (from transformers)
  Downloading safetensors-0.5.3-cp38-abi3-macosx_11_0_arm64.whl.metadata (3.8 kB)
Collecting tqdm>=4.27 (from transformers)
  Downloading tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Collecting torch>=1.11.0 (from sentence-transformers)
  Downloa

In [8]:
from transformers import pipeline
from sentence_transformers import SentenceTransformer, util
import torch

class EnhancedAgentWithPretrainedNLP:
    def __init__(self):
        self.goal = None
        self.plan = []
        self.current_step = 0
        self.completed = False
        self.history = []
        self.entities = {}
        self.intent = None
        
        # Load pre-trained models
        print("Loading pre-trained NLP models...")
        # Zero-shot classification for intent
        self.classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
        # Sentence embeddings for entity matching
        self.sentence_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
        print("Models loaded successfully!")
        
    def set_goal(self, goal_text):
        """Set the agent's goal and analyze it with pre-trained models"""
        self.goal = goal_text
        self.plan = []
        self.current_step = 0
        self.completed = False
        self.history = [f"Goal set: {goal_text}"]
        
        # Analyze the goal text with pre-trained models
        self.intent = self.classify_intent_with_model(goal_text)
        self.entities = self.extract_entities_with_model(goal_text)
        
        return f"Goal set: {goal_text}\nDetected intent: {self.intent}"
    
    def classify_intent_with_model(self, text):
        """Classify intent using a pre-trained zero-shot classification model"""
        # Define possible intents
        candidate_intents = ["research", "scheduling", "writing", "analysis", "general"]
        
        # Use the model to classify the text
        result = self.classifier(text, candidate_intents)
        
        # Return the highest scoring intent
        return result['labels'][0]
    
    def extract_entities_with_model(self, text):
        """Extract entities using semantic similarity with pre-defined entity types"""
        entities = {
            'topics': [],
            'timeframes': [],
            'quantities': []
        }
        
        # For simplicity, we'll still use regex for timeframes and quantities
        import re
        
        # Extract timeframes
        timeframe_patterns = [
            r'(?:by|before|after|during|for)\s+([A-Za-z]+\s+\d+(?:st|nd|rd|th)?)',
            r'(?:next|this|coming)\s+([A-Za-z]+)',
            r'(?:in)\s+(\d+)\s+(?:days|weeks|months)'
        ]
        
        for pattern in timeframe_patterns:
            matches = re.findall(pattern, text.lower())
            for match in matches:
                if match.strip():
                    entities['timeframes'].append(match.strip())
        
        # Extract quantities
        quantity_patterns = [
            r'(\d+)\s+(?:pages|items|points|sections|paragraphs)'
        ]
        
        for pattern in quantity_patterns:
            matches = re.findall(pattern, text.lower())
            for match in matches:
                if match.strip():
                    entities['quantities'].append(match.strip())
        
        # For topics, we'll use a more sophisticated approach with sentence embeddings
        # First, split the text into potential topic phrases
        sentences = re.split(r'[.,;!?]|\band\b|\bor\b|\bthe\b|\ba\b|\ban\b|\bto\b|\bfor\b|\bin\b|\bon\b|\bwith\b', text)
        sentences = [s.strip() for s in sentences if len(s.strip()) > 3]
        
        # Define some topic indicators
        topic_indicators = [
            "about", "regarding", "concerning", "on the subject of", 
            "related to", "focusing on", "in relation to"
        ]
        
        # Encode sentences and topic indicators
        if sentences:
            sentence_embeddings = self.sentence_model.encode(sentences)
            indicator_embeddings = self.sentence_model.encode(topic_indicators)
            
            # Find sentences that are semantically similar to topic indicators
            for i, sentence_emb in enumerate(sentence_embeddings):
                # Calculate similarity with topic indicators
                similarities = util.pytorch_cos_sim(
                    torch.tensor([sentence_emb]), 
                    torch.tensor(indicator_embeddings)
                )[0]
                
                max_sim = torch.max(similarities).item()
                
                # If the sentence is similar to a topic indicator, the next phrase might be a topic
                if max_sim > 0.3 and i < len(sentences) - 1:
                    potential_topic = sentences[i+1].strip()
                    if potential_topic and len(potential_topic) > 3 and potential_topic not in entities['topics']:
                        entities['topics'].append(potential_topic)
                
                # Also check if the current sentence itself might be a topic
                # This helps when the goal directly mentions the topic
                words = sentences[i].split()
                if len(words) >= 2 and len(words) <= 5:  # Reasonable length for a topic
                    potential_topic = sentences[i].strip()
                    if potential_topic and potential_topic not in entities['topics']:
                        entities['topics'].append(potential_topic)
        
        # If no topics were found, try extracting nouns as potential topics
        if not entities['topics']:
            import nltk
            try:
                nltk.data.find('tokenizers/punkt')
            except LookupError:
                nltk.download('punkt')
            try:
                nltk.data.find('taggers/averaged_perceptron_tagger')
            except LookupError:
                nltk.download('averaged_perceptron_tagger')
            
            tokens = nltk.word_tokenize(text)
            tagged = nltk.pos_tag(tokens)
            
            # Extract nouns (NN, NNS, NNP, NNPS)
            nouns = [word for word, pos in tagged if pos.startswith('NN')]
            
            if nouns:
                entities['topics'].append(' '.join(nouns[:2]))  # Just use the first couple of nouns
        
        return entities
    
    # The rest of the methods remain the same as in the EnhancedAgent class
    def create_plan(self):
        """Create a context-aware plan based on intent and entities"""
        if not self.goal:
            return "Please set a goal first."
        
        # Create plan based on intent and extracted entities
        if self.intent == 'research':
            topics = self.entities['topics'] or ["the requested subject"]
            topic_str = ', '.join(topics)
            
            self.plan = [
                f"Search for information about {topic_str}",
                f"Identify key aspects of {topic_str}",
                "Organize findings into a coherent structure",
                "Prepare a comprehensive summary"
            ]
        
        elif self.intent == 'scheduling':
            timeframe = self.entities['timeframes'][0] if self.entities['timeframes'] else "the specified period"
            
            self.plan = [
                f"Identify all tasks for {timeframe}",
                "Determine priorities and dependencies",
                "Allocate time slots for each task",
                "Create a structured schedule"
            ]
        
        elif self.intent == 'writing':
            topic = self.entities['topics'][0] if self.entities['topics'] else "the requested topic"
            
            self.plan = [
                f"Outline the main points for {topic}",
                "Develop an introduction and structure",
                "Write the main content",
                "Review and refine the final text"
            ]
        
        elif self.intent == 'analysis':
            topic = self.entities['topics'][0] if self.entities['topics'] else "the subject"
            
            self.plan = [
                f"Gather data about {topic}",
                "Identify patterns and relationships",
                "Evaluate implications and significance",
                "Formulate conclusions and recommendations"
            ]
        
        else:
            # Default general plan
            self.plan = [
                "Analyze the request requirements",
                "Gather necessary information",
                "Process the information systematically",
                "Prepare appropriate response"
            ]
        
        plan_text = "Plan created:\n" + "\n".join([f"{i+1}. {step}" for i, step in enumerate(self.plan)])
        self.history.append(plan_text)
        return plan_text
    
    def execute_step(self):
        """Execute the current step with natural language generation"""
        if not self.plan:
            return "No plan exists. Please create a plan first."
            
        if self.completed:
            return "All steps have been completed."
            
        current = self.plan[self.current_step]
        
        # Generate a natural language response based on the step type
        response = self.generate_step_response(self.current_step, current)
        
        self.current_step += 1
        if self.current_step >= len(self.plan):
            self.completed = True
            final_result = f"{response}\nAll steps completed!"
            self.history.append(final_result)
            return final_result
        
        next_step = f"{response}\nNext step: {self.plan[self.current_step]}"
        self.history.append(next_step)
        return next_step
    
    def generate_step_response(self, step_index, step_text):
        """Generate a natural language response for a step execution"""
        # Templates for different step types
        search_templates = [
            "I've gathered information about {topic}. The main sources include recent articles and trusted references.",
            "Research complete on {topic}. I found several relevant resources with up-to-date information.",
            "I've collected data on {topic} from multiple sources to ensure comprehensive coverage."
        ]
        
        analysis_templates = [
            "I've analyzed the information and identified {count} key points about {topic}.",
            "The data has been processed and organized into main categories for better understanding.",
            "Analysis complete. I've structured the information to highlight the most important aspects."
        ]
        
        creation_templates = [
            "I've prepared a complete summary that covers all the essential aspects of {topic}.",
            "The final output is ready, organized in a clear and logical structure.",
            "I've created a comprehensive response that addresses all parts of your request."
        ]
        
        # Determine template category based on step keywords
        if any(word in step_text.lower() for word in ['search', 'find', 'identify', 'gather', 'collect']):
            templates = search_templates
        elif any(word in step_text.lower() for word in ['analyze', 'organize', 'process', 'determine', 'develop']):
            templates = analysis_templates
        else:
            templates = creation_templates
        
        # Extract topic if available
        topic = self.entities['topics'][0] if self.entities['topics'] else "the requested subject"
        
        # Select a template and fill it
        import random
        template = random.choice(templates)
        response = template.format(topic=topic, count=random.randint(3, 7))
        
        # Add progress information
        progress = f"Step {step_index + 1}/{len(self.plan)} completed"
        
        return f"{response}\n{progress}"
    
    def get_status(self):
        """Get the current status of the agent"""
        if not self.goal:
            return "No goal set."
        if not self.plan:
            return f"Goal: {self.goal}\nIntent: {self.intent}\nNo plan created yet."
        if self.completed:
            return f"Goal: {self.goal}\nIntent: {self.intent}\nStatus: Completed all {len(self.plan)} steps."
        
        progress = f"{self.current_step}/{len(self.plan)} steps completed"
        next_step = f"Next: {self.plan[self.current_step]}" if self.current_step < len(self.plan) else "All done!"
        return f"Goal: {self.goal}\nIntent: {self.intent}\nProgress: {progress}\n{next_step}"
    
    def get_history(self):
        """Get the full history of agent actions"""
        return "\n\n".join(self.history)
    
    def get_nlp_analysis(self):
        """Return the NLP analysis of the current goal"""
        if not self.goal:
            return "No goal set."
        
        topic_str = ", ".join(self.entities['topics']) if self.entities['topics'] else "None detected"
        timeframe_str = ", ".join(self.entities['timeframes']) if self.entities['timeframes'] else "None detected"
        quantity_str = ", ".join(self.entities['quantities']) if self.entities['quantities'] else "None detected"
        
        return f"""NLP Analysis (Using Pre-trained Models):
Goal: "{self.goal}"
Detected Intent: {self.intent}
Extracted Entities:
  - Topics: {topic_str}
  - Timeframes: {timeframe_str}
  - Quantities: {quantity_str}"""

In [12]:
# Demonstrate the agent with pre-trained NLP models
def demonstrate_pretrained_agent(goal):
    """Run a complete agent cycle with pre-trained NLP models"""
    print(f"🎯 GOAL: \"{goal}\"")
    print("-" * 80)
    
    # Create a new agent (only do this once as model loading takes time)
    if 'pretrained_agent' not in globals():
        global pretrained_agent
        pretrained_agent = EnhancedAgentWithPretrainedNLP()
    
    agent = pretrained_agent
    
    # Step 1: Set Goal and analyze with NLP
    print("STEP 1: Setting Goal & Advanced NLP Analysis")
    result = agent.set_goal(goal)
    print(f"  {result}")
    print("\nNLP ANALYSIS (Using Pre-trained Models):")
    print(f"  {agent.get_nlp_analysis()}")
    print()
    
    # Step 2: Create Context-Aware Plan
    print("STEP 2: Creating Context-Aware Plan")
    plan_result = agent.create_plan()
    print(f"  {plan_result}")
    print()
    
    # Step 3: Execute Steps with NLG
    print("STEP 3: Executing Plan with Natural Language Generation")
    step_results = []
    
    # Execute all steps
    for i in range(len(agent.plan)):
        step_result = agent.execute_step()
        step_results.append(step_result)
        print(f"  Step {i+1}: {step_result}")
        print()
    
    # Step 4: Final Status
    print("STEP 4: Final Status")
    status = agent.get_status()
    print(f"  {status}")
    print()
    
    # Create a summary table
    import pandas as pd
    print("📊 EXECUTION SUMMARY:")
    summary_data = []
    for i, (plan_step, result) in enumerate(zip(agent.plan, step_results)):
        summary_data.append({
            "Step": i+1,
            "Plan": plan_step,
            "Result": result.split('\n')[0]  # Just take the first line of the result
        })
    
    # Display as a formatted table
    summary_df = pd.DataFrame(summary_data)
    display(summary_df)
    
    print("-" * 80)
    print()

# Display a title
display(HTML("<h1>Agentic AI with Pre-trained NLP Models</h1>"))
display(HTML("<p>This notebook demonstrates an agentic AI using Hugging Face Transformers and Sentence Transformers for advanced NLP capabilities.</p>"))

# Run a demonstration with a complex goal
display(HTML("<h2>Example: Complex Goal with Pre-trained NLP Analysis</h2>"))
demonstrate_pretrained_agent("Research the environmental impact of renewable energy sources and prepare a summary for my presentation next week")

🎯 GOAL: "Research the environmental impact of renewable energy sources and prepare a summary for my presentation next week"
--------------------------------------------------------------------------------
STEP 1: Setting Goal & Advanced NLP Analysis
  Goal set: Research the environmental impact of renewable energy sources and prepare a summary for my presentation next week
Detected intent: research

NLP ANALYSIS (Using Pre-trained Models):
  NLP Analysis (Using Pre-trained Models):
Goal: "Research the environmental impact of renewable energy sources and prepare a summary for my presentation next week"
Detected Intent: research
Extracted Entities:
  - Topics: environmental impact of renewable energy sources, summary, my presentation next week
  - Timeframes: week
  - Quantities: None detected

STEP 2: Creating Context-Aware Plan
  Plan created:
1. Search for information about environmental impact of renewable energy sources, summary, my presentation next week
2. Identify key aspects of 

Unnamed: 0,Step,Plan,Result
0,1,Search for information about environmental imp...,Research complete on environmental impact of r...
1,2,Identify key aspects of environmental impact o...,Research complete on environmental impact of r...
2,3,Organize findings into a coherent structure,I've gathered information about environmental ...
3,4,Prepare a comprehensive summary,"The final output is ready, organized in a clea..."


--------------------------------------------------------------------------------

