In [1]:
# Install required libraries (run this cell once, then comment it out)
# !pip install pandas openpyxl openai anthropic

import pandas as pd
import json
from typing import Dict, Any, Optional
import os
import warnings
warnings.filterwarnings('ignore')

print("‚úÖ Libraries imported successfully")

‚úÖ Libraries imported successfully


In [2]:
# VAANI Registry - Single source of truth
MASTER_REGISTRY_LINK = "https://docs.google.com/spreadsheets/d/e/2PACX-1vQdOVYDNLuMGlykxcTlLcmOP5zDkgMSq-JOZodBWoVPrzhVqrdSiGqfyhBqUvFcPzYhQEy2-gap0Oqn/pub?output=xlsx"

print("‚úÖ Master Registry Link configured")
print(f"üìç Link: {MASTER_REGISTRY_LINK[:60]}...")

‚úÖ Master Registry Link configured
üìç Link: https://docs.google.com/spreadsheets/d/e/2PACX-1vQdOVYDNLuMG...


In [3]:
def load_registry() -> Optional[pd.DataFrame]:
    """
    Load the VAANI Registry from Google Sheets.
    
    Returns:
        DataFrame with columns: S.No, item_name, information_json
        None if loading fails
    """
    try:
        # Read the Google Sheet
        df = pd.read_excel(MASTER_REGISTRY_LINK)
        
        # Validate required columns exist
        required_cols = ['item_name', 'information_json']
        missing_cols = [col for col in required_cols if col not in df.columns]
        
        if missing_cols:
            print(f"‚ùå Missing required columns: {missing_cols}")
            return None
        
        # Clean up: remove rows where item_name is empty
        df = df[df['item_name'].notna()].copy()
        
        print(f"‚úÖ Registry loaded successfully: {len(df)} items found")
        return df
    
    except Exception as e:
        print(f"‚ùå Error loading registry: {e}")
        print("üí° Check if the Google Sheet link is correct and publicly accessible")
        return None

# Test the function
print("\nüîÑ Testing registry load...")
test_df = load_registry()

if test_df is not None:
    print("\nüìã Available items in registry:")
    for idx, item in enumerate(test_df['item_name'].values, 1):
        print(f"  {idx}. {item}")


üîÑ Testing registry load...
‚úÖ Registry loaded successfully: 8 items found

üìã Available items in registry:
  1. default_model
  2. openai_api_key
  3. anthropic_api_key
  4. master_registry_link
  5. usage_data
  6. dropoff_analysis
  7. user_research
  8. excel_db_1000_items


In [4]:
def get_item(item_name: str, verbose: bool = True) -> Optional[Dict[str, Any]]:
    """
    Get a specific item from the registry.
    
    Args:
        item_name: Name of the item to retrieve
        verbose: If True, print status messages
    
    Returns:
        Dictionary with item information, or None if not found/error
    
    Example:
        model_info = get_item('default_model')
        # Returns: {'model': 'gpt-4o-mini', 'temperature': 0.3, ...}
    """
    # Load registry
    df = load_registry()
    
    if df is None:
        if verbose:
            print("‚ùå Could not load registry")
        return None
    
    # Find the item
    item_row = df[df['item_name'] == item_name]
    
    if item_row.empty:
        if verbose:
            print(f"‚ùå Item '{item_name}' not found in registry")
            print(f"üí° Available items: {', '.join(df['item_name'].values)}")
        return None
    
    # Get the JSON string
    json_str = item_row['information_json'].values[0]
    
    # Check if JSON is empty or invalid
    if pd.isna(json_str) or str(json_str).strip() == '':
        if verbose:
            print(f"‚ö†Ô∏è Item '{item_name}' has no information_json data")
        return None
    
    # Try to parse JSON
    try:
        item_dict = json.loads(json_str)
        if verbose:
            print(f"‚úÖ Retrieved '{item_name}'")
        return item_dict
    
    except json.JSONDecodeError as e:
        if verbose:
            print(f"‚ùå Invalid JSON format for '{item_name}': {e}")
            print(f"üìù Raw data: {json_str[:100]}...")
        return None
    
    except Exception as e:
        if verbose:
            print(f"‚ùå Unexpected error for '{item_name}': {e}")
        return None

# Test the function
print("\nüß™ Testing get_item() function:")
print("\n1. Testing with 'default_model':")
model = get_item('default_model')
if model:
    print(f"   Result: {model}")

print("\n2. Testing with non-existent item:")
fake = get_item('this_does_not_exist')

print("\n3. Testing with Sarvam API (might not have data yet):")
sarvam = get_item('sarvam_api')


üß™ Testing get_item() function:

1. Testing with 'default_model':
‚úÖ Registry loaded successfully: 8 items found
‚úÖ Retrieved 'default_model'
   Result: {'model': 'gpt-4o-mini', 'temperature': 0.3, 'max_tokens': 1000, 'provider': 'openai'}

2. Testing with non-existent item:
‚úÖ Registry loaded successfully: 8 items found
‚ùå Item 'this_does_not_exist' not found in registry
üí° Available items: default_model, openai_api_key, anthropic_api_key, master_registry_link, usage_data, dropoff_analysis, user_research, excel_db_1000_items

3. Testing with Sarvam API (might not have data yet):
‚úÖ Registry loaded successfully: 8 items found
‚ùå Item 'sarvam_api' not found in registry
üí° Available items: default_model, openai_api_key, anthropic_api_key, master_registry_link, usage_data, dropoff_analysis, user_research, excel_db_1000_items


In [5]:
def get_all_items(skip_errors: bool = True) -> Dict[str, Any]:
    """
    Get ALL items from registry as a dictionary.
    
    Args:
        skip_errors: If True, skip items with invalid JSON instead of stopping
    
    Returns:
        Dictionary where key = item_name, value = information dictionary
        {
            'default_model': {'model': 'gpt-4o-mini', ...},
            'usage_data': {'link': '...', 'local_path': '...'},
            ...
        }
    """
    df = load_registry()
    
    if df is None:
        print("‚ùå Could not load registry")
        return {}
    
    all_items = {}
    errors = []
    
    for _, row in df.iterrows():
        item_name = row['item_name']
        json_str = row['information_json']
        
        # Skip empty JSON
        if pd.isna(json_str) or str(json_str).strip() == '':
            errors.append(f"'{item_name}': No data")
            continue
        
        # Try to parse JSON
        try:
            item_dict = json.loads(json_str)
            all_items[item_name] = item_dict
        except Exception as e:
            errors.append(f"'{item_name}': {str(e)[:50]}")
            if not skip_errors:
                raise
    
    # Summary
    print(f"‚úÖ Successfully loaded {len(all_items)} items")
    
    if errors:
        print(f"‚ö†Ô∏è Skipped {len(errors)} items with errors:")
        for err in errors[:5]:  # Show first 5 errors
            print(f"   ‚Ä¢ {err}")
        if len(errors) > 5:
            print(f"   ... and {len(errors) - 5} more")
    
    return all_items

# Test it
print("\nüì¶ Loading all items from registry:")
all_config = get_all_items()

if all_config:
    print(f"\n‚úÖ Available configurations:")
    for name, info in all_config.items():
        print(f"  ‚Ä¢ {name}: {list(info.keys())}")


üì¶ Loading all items from registry:
‚úÖ Registry loaded successfully: 8 items found
‚úÖ Successfully loaded 8 items

‚úÖ Available configurations:
  ‚Ä¢ default_model: ['model', 'temperature', 'max_tokens', 'provider']
  ‚Ä¢ openai_api_key: ['key', 'description']
  ‚Ä¢ anthropic_api_key: ['key', 'description']
  ‚Ä¢ master_registry_link: ['link', 'description']
  ‚Ä¢ usage_data: ['link', 'local_path', 'description']
  ‚Ä¢ dropoff_analysis: ['link', 'local_path', 'description']
  ‚Ä¢ user_research: ['link', 'local_path', 'description']
  ‚Ä¢ excel_db_1000_items: ['link', 'local_path', 'description']


In [6]:
def get_api_key(key_name: str) -> Optional[str]:
    """
    Get an API key from registry.
    
    Args:
        key_name: Name like 'openai_api_key', 'anthropic_api_key'
    
    Returns:
        The API key string, or None if not found
    """
    item = get_item(key_name, verbose=False)
    
    if item and 'key' in item:
        return item['key']
    
    print(f"‚ö†Ô∏è API key '{key_name}' not found or not configured")
    return None


def get_model_config(model_name: str = 'default_model') -> Optional[Dict]:
    """
    Get LLM model configuration.
    
    Args:
        model_name: Name of the model config (default: 'default_model')
    
    Returns:
        Dictionary with model settings: {model, temperature, max_tokens, provider}
    """
    config = get_item(model_name, verbose=False)
    
    if config is None:
        print(f"‚ö†Ô∏è Model config '{model_name}' not found")
        # Return default fallback
        return {
            'model': 'gpt-4o-mini',
            'temperature': 0.3,
            'max_tokens': 1000,
            'provider': 'openai'
        }
    
    return config


def get_data_file_link(file_name: str) -> Optional[str]:
    """
    Get Google Sheet link for a data file.
    
    Args:
        file_name: Name like 'usage_data', 'dropoff_analysis'
    
    Returns:
        Google Sheets URL, or None if not found
    """
    item = get_item(file_name, verbose=False)
    
    if item and 'link' in item:
        return item['link']
    
    print(f"‚ö†Ô∏è Data file '{file_name}' not found or link not configured")
    return None


def get_data_file_path(file_name: str) -> Optional[str]:
    """
    Get local file path for a data file.
    
    Args:
        file_name: Name like 'usage_data', 'dropoff_analysis'
    
    Returns:
        Local file path, or None if not found
    """
    item = get_item(file_name, verbose=False)
    
    if item and 'local_path' in item:
        return item['local_path']
    
    return None


# Test the helper functions
print("\nüß™ Testing helper functions:")
print("\n1. API Keys:")
openai_key = get_api_key('openai_api_key')
if openai_key:
    print(f"   OpenAI Key: {openai_key[:20]}... (length: {len(openai_key)})")

print("\n2. Model Config:")
model_cfg = get_model_config()
print(f"   {model_cfg}")

print("\n3. Data File Links:")
usage_link = get_data_file_link('usage_data')
if usage_link:
    print(f"   Usage Data: {usage_link[:50]}...")


üß™ Testing helper functions:

1. API Keys:
‚úÖ Registry loaded successfully: 8 items found
   OpenAI Key: sk-YOUR_KEY_HERE... (length: 16)

2. Model Config:
‚úÖ Registry loaded successfully: 8 items found
   {'model': 'gpt-4o-mini', 'temperature': 0.3, 'max_tokens': 1000, 'provider': 'openai'}

3. Data File Links:
‚úÖ Registry loaded successfully: 8 items found
   Usage Data: https://docs.google.com/spreadsheets/d/SHEET_ID...


In [7]:
def check_registry_health():
    """
    Run a health check on the registry.
    Shows which items are properly configured and which need attention.
    """
    print("üè• REGISTRY HEALTH CHECK")
    print("=" * 60)
    
    all_items = get_all_items(skip_errors=True)
    
    if not all_items:
        print("‚ùå No items could be loaded from registry")
        return
    
    # Check what's configured
    categories = {
        'API Keys': ['openai_api_key', 'anthropic_api_key'],
        'Models': ['default_model'],
        'Data Files': ['usage_data', 'dropoff_analysis', 'user_research', 'excel_db_1000_items'],
        'Config': ['master_registry_link']
    }
    
    for category, items in categories.items():
        print(f"\nüìã {category}:")
        for item_name in items:
            if item_name in all_items:
                item = all_items[item_name]
                # Check if it has required keys
                if 'key' in item:
                    status = "‚úÖ" if item['key'] and 'YOUR' not in item['key'] else "‚ö†Ô∏è Placeholder"
                elif 'link' in item:
                    status = "‚úÖ" if item['link'] and 'YOUR' not in item['link'] else "‚ö†Ô∏è Placeholder"
                else:
                    status = "‚úÖ"
                print(f"   {status} {item_name}")
            else:
                print(f"   ‚ùå {item_name} - Not in registry")
    
    print("\n" + "=" * 60)
    print(f"‚úÖ Total items loaded: {len(all_items)}")

# Run health check
check_registry_health()

üè• REGISTRY HEALTH CHECK
‚úÖ Registry loaded successfully: 8 items found
‚úÖ Successfully loaded 8 items

üìã API Keys:
   ‚ö†Ô∏è Placeholder openai_api_key
   ‚ö†Ô∏è Placeholder anthropic_api_key

üìã Models:
   ‚úÖ default_model

üìã Data Files:
   ‚úÖ usage_data
   ‚úÖ dropoff_analysis
   ‚úÖ user_research
   ‚úÖ excel_db_1000_items

üìã Config:
   ‚úÖ master_registry_link

‚úÖ Total items loaded: 8


In [8]:
print("""
üìö HOW TO USE THIS CONFIG IN OTHER NOTEBOOKS
=====================================================

1Ô∏è‚É£ LOAD CONFIG IN ANY NOTEBOOK:
   %run vyapar_config.ipynb

2Ô∏è‚É£ GET AN ITEM:
   model = get_item('default_model')
   
3Ô∏è‚É£ GET API KEY:
   api_key = get_api_key('openai_api_key')
   
4Ô∏è‚É£ GET DATA FILE LINK:
   link = get_data_file_link('usage_data')
   df = pd.read_excel(link)
   
5Ô∏è‚É£ GET ALL ITEMS:
   all_configs = get_all_items()
   
6Ô∏è‚É£ CHECK HEALTH:
   check_registry_health()

=====================================================
‚úÖ Config loaded and ready to use!
""")


üìö HOW TO USE THIS CONFIG IN OTHER NOTEBOOKS

1Ô∏è‚É£ LOAD CONFIG IN ANY NOTEBOOK:
   %run vyapar_config.ipynb

2Ô∏è‚É£ GET AN ITEM:
   model = get_item('default_model')
   
3Ô∏è‚É£ GET API KEY:
   api_key = get_api_key('openai_api_key')
   
4Ô∏è‚É£ GET DATA FILE LINK:
   link = get_data_file_link('usage_data')
   df = pd.read_excel(link)
   
5Ô∏è‚É£ GET ALL ITEMS:
   all_configs = get_all_items()
   
6Ô∏è‚É£ CHECK HEALTH:
   check_registry_health()

‚úÖ Config loaded and ready to use!

