# Sentiment analyze of tweet content

## Agent to search data in URLs

In [1]:
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
# ===== JUPYTER NOTEBOOK COMPATIBLE FIRECRAWL AGENT =====
# Run each cell separately in Jupyter

# Cell 1: Imports and Setup
import asyncio
import re
import os
from typing import List, Optional, Dict, Any, Union
from datetime import datetime
from enum import Enum

# Import order is important to avoid conflicts
from pydantic import BaseModel, field_validator, ValidationError
from pydantic import Field as PydanticField  # Alias to avoid conflicts
from pydantic import HttpUrl

# Then import PydanticAI components
from pydantic_ai import Agent
from pydantic_ai.mcp import MCPServerSSE

In [3]:
# For Jupyter environment
import nest_asyncio
nest_asyncio.apply()

print("✅ Imports completed successfully")

✅ Imports completed successfully


In [4]:
import re
import base58
from typing import Union

def is_valid_solana_address(address: str) -> bool:
    """
    Verify if a string is a valid Solana address (token or otherwise).
    
    Args:
        address (str): The address string to validate
        
    Returns:
        bool: True if valid Solana address, False otherwise
    """
    if not isinstance(address, str):
        return False
    
    # Check length (Solana addresses are typically 32-44 characters)
    if len(address) < 32 or len(address) > 44:
        return False
    
    # Check if it contains only valid base58 characters
    # Base58 alphabet: 123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz
    base58_pattern = r'^[1-9A-HJ-NP-Za-km-z]+$'
    if not re.match(base58_pattern, address):
        return False
    
    try:
        # Attempt to decode the base58 string
        decoded = base58.b58decode(address)
        
        # Solana addresses should decode to exactly 32 bytes
        if len(decoded) != 32:
            return False
            
        return True
    except Exception:
        return False

In [5]:
def is_valid_evm_address(address: str) -> bool:
    """
    Validates if the given address is a valid EVM blockchain address.
    
    Args:
        address (str): The address string to validate
        
    Returns:
        bool: True if the address is valid, False otherwise
        
    Examples:
        >>> is_valid_evm_address("0x742d35Cc6765C0532575f5A2c0a078Df8a2D4e5e")
        True
        >>> is_valid_evm_address("0xinvalid")
        False
        >>> is_valid_evm_address("742d35Cc6765C0532575f5A2c0a078Df8a2D4e5e")
        False
    """
    if not isinstance(address, str):
        return False
    
    # EVM address pattern: 0x followed by exactly 40 hexadecimal characters
    pattern = r'^0x[a-fA-F0-9]{40}$'
    
    return bool(re.match(pattern, address))

In [6]:
class TokenDetails(BaseModel):
    """Details of token/coin in blockchain"""
    chain_id: Optional[int] = PydanticField(description = "Id of blockchain")
    chain_name: Optional[str]
    is_release: Optional[bool]
    chain_defined_explicitly: Optional[bool] = PydanticField(description = "Whether name of blockchain been mentioned explicitly in text")
    definition_fragment: Optional[str] = PydanticField(description = "A fragment of content where name of blockchain been mentioned explicitly")
    token_address: str = PydanticField(description="Address of token")

    @field_validator('token_address')
    @classmethod
    def validate_token(cls, address: str) -> str:
        """Ensure token address is matching the pattern"""
        if is_valid_solana_address(address) or is_valid_evm_address(address):
            return address
        raise ValueError("Token address should match the pattern")

In [7]:
class NoTokenFound(BaseModel):
    """When no token details found"""

In [8]:
class RelseaseAnnouncementWithoutDetails(BaseModel):
    """When no token details found but release is announced"""

In [9]:
chain_ids_list = """
Here's a markdown table with chain IDs and names for the most popular public blockchains:

| Chain ID | Chain Name |
|----------|------------|
| 1 | Ethereum Mainnet |
| 56 | BNB Smart Chain (BSC) |
| 137 | Polygon |
| 43114 | Avalanche C-Chain |
| 250 | Fantom Opera |
| 42161 | Arbitrum One |
| 10 | Optimism |
| 25 | Cronos |
| 100 | Gnosis Chain (xDai) |
| 1284 | Moonbeam |
| 1285 | Moonriver |
| 42220 | Celo |
| 128 | Huobi ECO Chain (HECO) |
| 66 | OKExChain |
| 321 | KuCoin Community Chain (KCC) |
| 1666600000 | Harmony One Shard 0 |
| 288 | Boba Network |
| 1313161554 | Aurora |
| 8217 | Klaytn Cypress |
| 82 | Meter |
| 1088 | Metis Andromeda |
| 199 | BitTorrent Chain |
| 324 | zkSync Era |
| 5000 | Mantle |
| 59144 | Linea |
| 534352 | Scroll |
| 8453 | Base |

These are the most commonly used public blockchains with their respective chain IDs as defined in the EIP-155 standard for Ethereum-compatible networks.
"""

contract_address_patterns = """
| Blockchain Type                    | Address Format | Regex Pattern                   | Description                                      |
| ---------------------------------- | -------------- | ------------------------------- | ------------------------------------------------ |
| EVM (Ethereum, BSC, Polygon, etc.) | Hexadecimal    | `^0x[a-fA-F0-9]{40}$`           | 20-byte hex string with "0x" prefix              |
| EVM (Case-insensitive)             | Hexadecimal    | `^0x[a-fA-F0-9]{40}$`           | Standard EVM address format                      |
| EVM (Checksummed)                  | Mixed Case     | `^0x[a-fA-F0-9]{40}$`           | EIP-55 checksummed (case matters for validation) |
| Solana                             | Base58         | `^[1-9A-HJ-NP-Za-km-z]{32,44}$` | Base58 encoded, 32-44 characters                 |
| Solana (Strict)                    | Base58         | `^[1-9A-HJ-NP-Za-km-z]{43,44}$` | More precise length range                        |
| Solana (Most Common)               | Base58         | `^[1-9A-HJ-NP-Za-km-z]{44}$`    | Exactly 44 characters (most common)              |
"""

In [10]:
class FirecrawlAgent:
    """
    A PydanticAI agent that uses Firecrawl MCP server for web scraping capabilities
    """
    
    def __init__(self, model_name: str = "openai:gpt-4o"):
        """
        Initialize the agent with Firecrawl MCP server connection.
        
        Args:
            model_name: The LLM model to use (default: gpt-4o)
        """
        # Create MCP server connection to Firecrawl running in SSE mode
        self.firecrawl_server = MCPServerSSE(
            url='http://localhost:3000/sse',  # Default SSE endpoint for Firecrawl MCP
            tool_prefix='firecrawl'  # Optional: prefix tools to avoid naming conflicts
        )
        
        # Initialize the agent with the MCP server
        self.agent = Agent[None, TokenDetails | NoTokenFound | RelseaseAnnouncementWithoutDetails](
            model=model_name,
            output_type=TokenDetails | NoTokenFound | RelseaseAnnouncementWithoutDetails,
            retries=4,
            system_prompt=(
                "You are a web scraping assistant powered by Firecrawl. ",
                "Your task is to scrape provided webpage and search if it contain an announcement of a new token/coin release.",
                "Parse the token address and blockchain it deployed to",
                "If blockchain is not found determine it based on address fromat(EVM/Solana).",
                chain_ids_list,
                contract_address_patterns
            ),
            mcp_servers=[self.firecrawl_server]
        )
    
    async def run(self, url: str) -> TokenDetails | NoTokenFound | RelseaseAnnouncementWithoutDetails:
        """
        Process a user query using the agent with Firecrawl capabilities.
        
        Args:
            url: The url to crawl
            
        Returns:
            The token data if found
        """
        async with self.agent.run_mcp_servers():
            result = await self.agent.run(user_query)
            return result.output


## Agent to search data in pictures

In [11]:
trump_token_url = "https://pbs.twimg.com/media/GhivrlDWAAA7Ex3?format=jpg&name=medium"
malania_token_url = "https://lh7-rt.googleusercontent.com/docsz/AD_4nXcNy9kHtzbuH-N-F9B8zy7oqkUobYkzXWhAKqI4qXP7JSIihBNHhJJfz-1gmvJDnxYiTPHRinIe8wBQ3VMBZU0aGxyb6U8k6SWGU5NleZg2AVGyxI7WuyJGcUJ73oG_THgfF_bX?key=qxx7aSSAQfwoulEynZFGQMDH"
no_announcement_url="https://bitcoinworld.co.in/wp-content/uploads/Melania-Trumps-Meme-Coin-MELANIA-B.jpg"

In [18]:
from pydantic_ai import ImageUrl

class ImageSearchAgent:
    """
    A PydanticAI agent that analizing images and search for new token release announcements 
    """
    
    def __init__(self, model_name: str = "openai:gpt-4o"):
        """
        Initialize the agent
        
        Args:
            model_name: The LLM model to use (default: gpt-4o)
        """
        self.agent = Agent[None, TokenDetails | NoTokenFound | RelseaseAnnouncementWithoutDetails](
            model=model_name,
            output_type=TokenDetails | NoTokenFound | RelseaseAnnouncementWithoutDetails,
            retries=4,
            system_prompt=(
                "You are text pattern recognition agent that works with images",
                "Your task is to scan every text you found in given image and search for announcement of a new token/coin release.",
                "Parse the token address using Regex Pattern and blockchain it deployed to.",
                "If blockchain is not found, determine it based on address Regex Pattern (EVM/Solana).",
                chain_ids_list,
                contract_address_patterns
            )
        )
    
    async def run(self, image_url: str) -> TokenDetails | NoTokenFound | RelseaseAnnouncementWithoutDetails:
        """
        Process a user query
        Args:
            image_url: The url of an image
            
        Returns:
            The token data if found
        """
        result = await self.agent.run(
            [
                ImageUrl(url=image_url)
            ]
        )
        return result.output


## Agent to search data in text

In [19]:
class TextSearchAgent:
    """
    A PydanticAI agent that analizing text and search for new token release announcements 
    """
    
    def __init__(self, model_name: str = "openai:gpt-4o"):
        """
        Initialize the agent
        
        Args:
            model_name: The LLM model to use (default: gpt-4o)
        """
        self.agent = Agent[None, TokenDetails | NoTokenFound | RelseaseAnnouncementWithoutDetails](
            model=model_name,
            output_type=TokenDetails | NoTokenFound | RelseaseAnnouncementWithoutDetails,
            retries=4,
            system_prompt=(
                "Your task is scan given text and search for announcement of a new token/coin release.",
                "Parse the token address using Regex Pattern and blockchain it deployed to.",
                "If blockchain is not found, determine it based on address Regex Pattern (EVM/Solana).",
                chain_ids_list,
                contract_address_patterns
            )
        )
    
    async def run(self, text: str) -> TokenDetails | NoTokenFound | RelseaseAnnouncementWithoutDetails:
        """
        Process a user query
        Args:
            text: The given text
            
        Returns:
            The token data if found
        """
        result = await self.agent.run(text)
        return result.output

## Test agents

### Firecrawl agen test

In [None]:
agent = FirecrawlAgent()
await agent.run("https://flockerz.com/")

In [None]:
agent = FirecrawlAgent()
await agent.run("https://gettrumpmemes.com/")

### ImageSearchAgent agent test

In [17]:
agent = ImageSearchAgent()
await agent.run(malania_token_url)

TokenDetails(chain_id=None, chain_name=None, is_release=True, chain_defined_explicitly=False, definition_fragment=None, token_address='FUAfBo2jgks6gB4Z4LfZkqSZgzNucisEHqnNebaRxM1P')

### TextSearchAgent test

In [20]:
text_samples = [
    'My new token on Polygon blockchain: 0xA0b86991c6218b36c1d19D4a2e9Eb0cE3606eB48. Buy it now!', #explicit chain info, Polygon
    'My new token is live: 0xA0b86991c6218b36c1d19D4a2e9Eb0cE3606eB48. Happpy trading!', #no chain info, EVM
    'My new token is live on Solana Blockchain: Es9vMFrzaCERmJfrF4H2FYD4KCoNkY11McCe8BenwNYB. Buy now!', #Explicit chain info: Solana
    'My new token is live: Es9vMFrzaCERmJfrF4H2FYD4KCoNkY11McCe8BenwNYB. Buy now!', #No chain info: Solana
    'Join my webinar tomorrow!', #no announcement
    'Just bought a bad of Trump coin (6p6xgHyF7AeE6TZkSmFsko444wqoP15icUSqi2jfGiPN)', #no announcement, Solana address
    'Just bought a bad of Trump coin on solana: (6p6xgHyF7AeE6TZkSmFsko444wqoP15icUSqi2jfGiPN)', #no announcement. Solana address
    'Just bought a bad of Trump coin on solana: (6p6xgHyF7AeE6TZkSmFsko444wqoP15icUSqi2jfGiPN)', #no announcement. Solana address
    'Accumulationg Bonk: 0x1151CB3d861920e07a38e03eEAd12C32178567F6', #no announcement. EVM address
    'Accumulationg Bonk on ETH mainnet: 0x1151CB3d861920e07a38e03eEAd12C32178567F6', #no announcement. Eth mainnet
    'My new token just released - follow the link to buy!' #release announcement, no token data
]

In [21]:
agent = TextSearchAgent()
await agent.run(text_samples[0])

TokenDetails(chain_id=137, chain_name='Polygon', is_release=True, chain_defined_explicitly=True, definition_fragment='new token on Polygon blockchain', token_address='0xA0b86991c6218b36c1d19D4a2e9Eb0cE3606eB48')

In [23]:
import asyncio

max_concurrent = 5
# Semaphore to limit concurrent requests
semaphore = asyncio.Semaphore(max_concurrent)
agent = TextSearchAgent()

async def process_single_case_with_limit(test_case: str, index: int):
    """Process a single test case with concurrency limit"""
    async with semaphore:
        try:
            result = await agent.run(test_case)
            return {
                'test_case': test_case,
                'result': result,
                'success': True,
                'error': None
            }
        except Exception as e:
            return {
                'index': index,
                'test_case': test_case,
                'result': None,
                'success': False,
                'error': str(e)
            }


tasks = [
    process_single_case_with_limit(test_case, index) 
    for index, test_case in enumerate(text_samples)
]
await asyncio.gather(*tasks)

[{'test_case': 'My new token on Polygon blockchain: 0xA0b86991c6218b36c1d19D4a2e9Eb0cE3606eB48. Buy it now!',
  'result': TokenDetails(chain_id=137, chain_name='Polygon', is_release=True, chain_defined_explicitly=True, definition_fragment='My new token on Polygon blockchain:', token_address='0xA0b86991c6218b36c1d19D4a2e9Eb0cE3606eB48'),
  'success': True,
  'error': None},
 {'test_case': 'My new token is live: 0xA0b86991c6218b36c1d19D4a2e9Eb0cE3606eB48. Happpy trading!',
  'result': TokenDetails(chain_id=1, chain_name='Ethereum Mainnet', is_release=True, chain_defined_explicitly=False, definition_fragment=None, token_address='0xA0b86991c6218b36c1d19D4a2e9Eb0cE3606eB48'),
  'success': True,
  'error': None},
 {'test_case': 'My new token is live on Solana Blockchain: Es9vMFrzaCERmJfrF4H2FYD4KCoNkY11McCe8BenwNYB. Buy now!',
  'result': TokenDetails(chain_id=None, chain_name='Solana', is_release=True, chain_defined_explicitly=True, definition_fragment='Solana Blockchain', token_address='Es