In [None]:
import asyncio
import sys
import os
import nest_asyncio
import json
from time import time
from typing import List, Dict
from rich.console import Console
from rich.table import Table

# Add the project root to Python path
project_root = os.path.dirname(os.path.dirname(os.getcwd()))
if project_root not in sys.path:
    sys.path.append(project_root)

from agents.sql_with_preprocess.types import AgentState
from langchain_core.messages import HumanMessage
from agents.sql_with_preprocess.main import runworkflow as arun

nest_asyncio.apply()
console = Console()

# Test queries
# queries = [
#     "Show me batting stats of Dhoni vs. Australia.",
#     "How many runs did Sachin baby score at Eden Gardens,kolkata?",
#     "Give me the top 5 players from India who've taken the most wickets.",
#     "Who has the highest strike rate in T20 matches?", 
#     "Fetch me the bowlers with the best economy rate in ODIs.",
#     "Find all matches where Kohli was dismissed for a duck.",
#     "List the grounds in England where Rohit scored centuries.",
#     "Who are the left-handed batsmen in the database?",
#     "Which teams played in the 2011 World Cup final?",
#     "Show me the biggest six-hitters in the IPL."
# ]
queries = {
    # 1. Player Search queries
    "player_search": [
        "fazal haq faroo bowling stats for srh",
        "virat Kohli's batting statistics",
        "M.S. Dhoni's career performance",
        "steve Smith's batting average by year",
        "ishant Sharma's bowling records by competition",
        "shadab khan's performance in 2023",
        "trav head  stats by year",
        "r pant career statistics",
    ],

    # 2. Batsman vs Bowler Matchup queries
    "head_to_head": [
        "v Kohli vs James Anderson head-to-head stats",
        "david Warner vs Indian bowlers statistics",
        "r Ashwin's LBW dismissals against left-handers",
        "fast bowlers' performance against rohit sharma",
        "leg spinners' bowling records against Steve Smith",
    ],

    # 3. Leaders Board queries
    "leaderboard": [
        "most caught behind dismissals batters in uppal stadium ",
        "top cover drive players stats in 1-10 overs ",
        "best bowling averages bowlers in hyderabad",
        "highest run-scorers batter in 2023",
    ],

    # 4. Venue Search queries
    "venue_stats": [
        "match statistics at Lord's Cricket Ground",
        "M Chinnaswamy Stadium batting averages in ipl",
        "mcg batting stats by year ",
        "Sydney bowling stats by batter type",
    ],

    # 5. Team Search queries
    "team_stats": [
        "Australia vs New Zealand head-to-head record",
        "csk bowling stats in cheapuak by year wise",
        "rcb batting stats",
        "india win-loss record by venue",
    ]
}
async def test_query(query: str) -> Dict:
    """Test a single query and return timing + results"""
    start_time = time()
    
    try:
        result = await arun(query+' dont execute..')
        response = result['messages'][-1].content
        sql_query = result['sql_query']
        return {
        "query": query,
        "response": sql_query,
        "search_result":result['search_result'],
        "time_taken": round(end_time - start_time, 2)

    }
    except Exception as e:
        response = f"Error: {str(e)}"
        sql_query = response
        
    end_time = time()
    
    return {
        "query": query,
        "response": sql_query,
        "search_result":result['search_result'],
        "time_taken": round(end_time - start_time, 2)

    }

async def batch_test(queries: Dict[str, List[str]]):
    """Run all queries and display results in a table"""
    
    # Create results table
    table = Table(title="Search Agent Test Results")
    table.add_column("Category", style="magenta")
    table.add_column("Query", style="cyan")
    table.add_column("Response", style="green")
    table.add_column("Time (s)", justify="right", style="yellow")
    
    console.print("\n[bold]Starting batch testing...[/bold]\n")
    
    results = {}
    for category, category_queries in queries.items():
        console.print(f"\n[bold]{category.upper()} Queries[/bold]")
        
        category_results = []
        for query in category_queries:
            result = await test_query(query)
            result['category'] = category
            category_results.append(result)
            
            # Add to table
            table.add_row(
                category,
                result["query"],
                result["response"],
                str(result["time_taken"])
            )
        
        results[category] = category_results
    
    # Display results
    console.print(table)
    
    # Save results to file
    with open('sql_agent_test_results_2.0-flash-mistral.json', 'w') as f:
        json.dump(results, f, indent=2)
        
    console.print("\n[bold green]Results saved to sql_agent_test_results.json[/bold green]")

# Assuming 'queries' is the dictionary we created in the previous response
# Run the tests
asyncio.run(batch_test(queries))

In [1]:
#run on certain dataset
import sys
import os
project_root = os.path.dirname(os.path.dirname(os.getcwd()))
if project_root not in sys.path:
    sys.path.append(project_root)
from datetime import datetime
from csv import DictWriter
from langsmith import Client
from langsmith.evaluation import evaluate, aevaluate
from openai import AsyncOpenAI
from agents.sql_with_preprocess.main import runworkflow
import json
# client = AsyncOpenAI()
import nest_asyncio
nest_asyncio.apply()
from dotenv import load_dotenv
load_dotenv()

async def test_agent():
    langsmith_client = Client()
    # dataset = langsmith_client.read_dataset(dataset_id="07dbb645-2988-41c8-9f85-b3a37f51bdb2")
    dataset = langsmith_client.read_dataset(dataset_id="2d5bd4c9-1c73-4064-905f-00a5c924487a")

    async def runs(inputs: dict) -> dict:
        try:
            query = inputs['inputs']["question"]
            print(f"Processing question: {query}")
            
            result = await runworkflow(query)
            
            # Extract relevant information
            response = result['messages'][-1].content
            sql_query = result.get('sql_query','')
            search_result = result.get('search_result', '')
            
            # Log to CSV
            try:
                # Load existing data if file exists
                # json_file = "logs/sql_agent_evaluation.json"  
                import os

# Ensure the logs directory exists
                log_dir = "logs"
                if not os.path.exists(log_dir):
                    os.makedirs(log_dir)

                # Your existing code for writing to the JSON file
                json_file = os.path.join(log_dir, "sql_agent_evaluation.json")
                if os.path.exists(json_file):
                    with open(json_file, 'r', encoding='utf-8') as f:
                        try:
                            existing_data = json.load(f)
                        except json.JSONDecodeError:
                            existing_data = []
                else:
                    with open(json_file, 'w', encoding='utf-8') as f:
                        json.dump([], f)
                    existing_data = []

                # Append new data
                new_entry = {
                    # "category": inputs.get("type", "category"),
                    "question": query,
                    "sequence":result['sequence'],
                    # "sql_query": sql_query,
                    # "response": response,
                    "search_result": search_result,
                    # "timestamp": datetime.now().isoformat()
                }
                existing_data.append(new_entry)

                # Write back to file
                with open(json_file, 'w', encoding='utf-8') as f:
                    json.dump(existing_data, f, indent=2, ensure_ascii=False)

            except Exception as e:
                print(f"Error writing to JSON: {e}")
                
            return {
                "output": {
                    "sequence":result['sequence'],
                    "search_result": search_result,
                    # "sql_query": sql_query,
                    # "response": response,
                    
                }
            }
        except Exception as e:
            print(f"Error in runs function: {e}")
            raise

    current_date = datetime.now().strftime("%Y%m%d")
    current_time = datetime.now().strftime("%d-%m-%Y_%H-%M-%S")

    # Convert queries dict to dataset format


    try:
        result = await aevaluate(
            runs,
            data=dataset,  # Use your structured queries instead of dataset
            experiment_prefix=f"sql_agent_{current_time}",
            max_concurrency=1,
            metadata={
                "version": f"{current_time}",
                "revision_id": "testing",
                "supervisor":"1.5-flash",
                "search":"qwen-2.5-72b"

            },
        )
        print("Evaluation completed:", result)
    except Exception as e:
        print(f"Evaluation failed: {e}")

import asyncio
asyncio.run(test_agent())

in table dir C:\Users\adith\Documents\Projects\python-projects\csql-agent\agents\tables\hdata
Loading store for category 'hdata_bat_hand' from 'C:\Users\adith\Documents\Projects\python-projects\csql-agent\agents\tables\hdata\hdata_bat_hand'
Loading store for category 'hdata_bat_out' from 'C:\Users\adith\Documents\Projects\python-projects\csql-agent\agents\tables\hdata\hdata_bat_out'
Loading store for category 'hdata_bowl_kind' from 'C:\Users\adith\Documents\Projects\python-projects\csql-agent\agents\tables\hdata\hdata_bowl_kind'
Loading store for category 'hdata_bowl_style' from 'C:\Users\adith\Documents\Projects\python-projects\csql-agent\agents\tables\hdata\hdata_bowl_style'
Loading store for category 'hdata_competition' from 'C:\Users\adith\Documents\Projects\python-projects\csql-agent\agents\tables\hdata\hdata_competition'
Loading store for category 'hdata_country' from 'C:\Users\adith\Documents\Projects\python-projects\csql-agent\agents\tables\hdata\hdata_country'
Loading store fo

  from .autonotebook import tqdm as notebook_tqdm


View the evaluation results for experiment: 'sql_agent_07-03-2025_17-02-59-d32e25cc' at:
https://smith.langchain.com/o/dbea2471-6360-589a-b7bc-3aa89cfaa333/datasets/2d5bd4c9-1c73-4064-905f-00a5c924487a/compare?selectedSessions=3363332f-cdb8-4cfa-b4e7-f8949cad0927




0it [00:00, ?it/s]

Processing question: trav head  stats by year
[36;1m[1;3m[-1:checkpoint][0m [1mState at the end of step -1:
[0m{'messages': []}
[36;1m[1;3m[0:tasks][0m [1mStarting 1 task for step 0:
[0m- [32;1m[1;3m__start__[0m -> {'attempts': 0,
 'change': '',
 'docs_schema': '',
 'execution_choice': False,
 'messages': [HumanMessage(content='trav head  stats by year and dont execute and table name as hdata ', additional_kwargs={}, response_metadata={})],
 'query': '',
 'referenced_values_in_table': '',
 'relevant_sql_queries': '',
 'sequence': '',
 'sql_error': False,
 'sql_query': None,
 'sql_result': '',
 'table_name': None}
[36;1m[1;3m[0:writes][0m [1mFinished step 0 with writes to 13 channels:
[0m- [33;1m[1;3mmessages[0m -> [HumanMessage(content='trav head  stats by year and dont execute and table name as hdata ', additional_kwargs={}, response_metadata={})]
- [33;1m[1;3mquery[0m -> ''
- [33;1m[1;3mexecution_choice[0m -> False
- [33;1m[1;3msql_query[0m -> None
- [3

1it [00:14, 14.54s/it]

[36;1m[1;3m[5:writes][0m [1mFinished step 5 with writes to 2 channels:
[0m- [33;1m[1;3mmessages[0m -> HumanMessage(content=' Supervisor routed to __end__ agent', additional_kwargs={}, response_metadata={})
- [33;1m[1;3msequence[0m -> ' search sql __end__'
[36;1m[1;3m[5:checkpoint][0m [1mState at the end of step 5:
[0m{'attempts': 0,
 'change': '',
 'docs_schema': 'p_match: INTEGER(identifier) (use this to count different '
                'matches) \n'
                '\n'
                'inns: INTEGER (innings number like 1 or 2 or 3 or 4)\n'
                '\n'
                'bat: STRING (batter currently batting)\n'
                '\n'
                "p_bat: INTEGER (the batter's player id)\n"
                '\n'
                'team_bat: STRING (the team that is currently batting)\n'
                '\n'
                'bowl: STRING (the bowler currently bowling)\n'
                '\n'
                "p_bowl: INTEGER (the bowler's player id)\n"
          

2it [00:25, 12.57s/it]

[36;1m[1;3m[5:writes][0m [1mFinished step 5 with writes to 2 channels:
[0m- [33;1m[1;3mmessages[0m -> HumanMessage(content=' Supervisor routed to __end__ agent', additional_kwargs={}, response_metadata={})
- [33;1m[1;3msequence[0m -> ' search sql __end__'
[36;1m[1;3m[5:checkpoint][0m [1mState at the end of step 5:
[0m{'attempts': 0,
 'change': '',
 'docs_schema': 'p_match: INTEGER(identifier) (use this to count different '
                'matches) \n'
                '\n'
                'inns: INTEGER (innings number like 1 or 2 or 3 or 4)\n'
                '\n'
                'bat: STRING (batter currently batting)\n'
                '\n'
                "p_bat: INTEGER (the batter's player id)\n"
                '\n'
                'team_bat: STRING (the team that is currently batting)\n'
                '\n'
                'bowl: STRING (the bowler currently bowling)\n'
                '\n'
                "p_bowl: INTEGER (the bowler's player id)\n"
          

3it [00:37, 12.05s/it]

[36;1m[1;3m[5:writes][0m [1mFinished step 5 with writes to 2 channels:
[0m- [33;1m[1;3mmessages[0m -> HumanMessage(content=' Supervisor routed to __end__ agent', additional_kwargs={}, response_metadata={})
- [33;1m[1;3msequence[0m -> ' search sql __end__'
[36;1m[1;3m[5:checkpoint][0m [1mState at the end of step 5:
[0m{'attempts': 0,
 'change': '',
 'docs_schema': 'p_match: INTEGER(identifier) (use this to count different '
                'matches) \n'
                '\n'
                'inns: INTEGER (innings number like 1 or 2 or 3 or 4)\n'
                '\n'
                'bat: STRING (batter currently batting)\n'
                '\n'
                "p_bat: INTEGER (the batter's player id)\n"
                '\n'
                'team_bat: STRING (the team that is currently batting)\n'
                '\n'
                'bowl: STRING (the bowler currently bowling)\n'
                '\n'
                "p_bowl: INTEGER (the bowler's player id)\n"
          

4it [00:55, 14.46s/it]

[36;1m[1;3m[5:writes][0m [1mFinished step 5 with writes to 2 channels:
[0m- [33;1m[1;3mmessages[0m -> HumanMessage(content=' Supervisor routed to __end__ agent', additional_kwargs={}, response_metadata={})
- [33;1m[1;3msequence[0m -> ' search sql __end__'
[36;1m[1;3m[5:checkpoint][0m [1mState at the end of step 5:
[0m{'attempts': 0,
 'change': '',
 'docs_schema': 'p_match: INTEGER(identifier) (use this to count different '
                'matches) \n'
                '\n'
                'inns: INTEGER (innings number like 1 or 2 or 3 or 4)\n'
                '\n'
                'bat: STRING (batter currently batting)\n'
                '\n'
                "p_bat: INTEGER (the batter's player id)\n"
                '\n'
                'team_bat: STRING (the team that is currently batting)\n'
                '\n'
                'bowl: STRING (the bowler currently bowling)\n'
                '\n'
                "p_bowl: INTEGER (the bowler's player id)\n"
          

5it [01:12, 15.42s/it]

[36;1m[1;3m[5:writes][0m [1mFinished step 5 with writes to 2 channels:
[0m- [33;1m[1;3mmessages[0m -> HumanMessage(content=' Supervisor routed to __end__ agent', additional_kwargs={}, response_metadata={})
- [33;1m[1;3msequence[0m -> ' search sql __end__'
[36;1m[1;3m[5:checkpoint][0m [1mState at the end of step 5:
[0m{'attempts': 0,
 'change': '',
 'docs_schema': 'p_match: INTEGER(identifier) (use this to count different '
                'matches) \n'
                '\n'
                'inns: INTEGER (innings number like 1 or 2 or 3 or 4)\n'
                '\n'
                'bat: STRING (batter currently batting)\n'
                '\n'
                "p_bat: INTEGER (the batter's player id)\n"
                '\n'
                'team_bat: STRING (the team that is currently batting)\n'
                '\n'
                'bowl: STRING (the bowler currently bowling)\n'
                '\n'
                "p_bowl: INTEGER (the bowler's player id)\n"
          

6it [01:24, 14.21s/it]

[36;1m[1;3m[5:writes][0m [1mFinished step 5 with writes to 2 channels:
[0m- [33;1m[1;3mmessages[0m -> HumanMessage(content=' Supervisor routed to __end__ agent', additional_kwargs={}, response_metadata={})
- [33;1m[1;3msequence[0m -> ' search sql __end__'
[36;1m[1;3m[5:checkpoint][0m [1mState at the end of step 5:
[0m{'attempts': 0,
 'change': '',
 'docs_schema': 'p_match: INTEGER(identifier) (use this to count different '
                'matches) \n'
                '\n'
                'inns: INTEGER (innings number like 1 or 2 or 3 or 4)\n'
                '\n'
                'bat: STRING (batter currently batting)\n'
                '\n'
                "p_bat: INTEGER (the batter's player id)\n"
                '\n'
                'team_bat: STRING (the team that is currently batting)\n'
                '\n'
                'bowl: STRING (the bowler currently bowling)\n'
                '\n'
                "p_bowl: INTEGER (the bowler's player id)\n"
          

7it [01:38, 14.34s/it]

[36;1m[1;3m[5:writes][0m [1mFinished step 5 with writes to 2 channels:
[0m- [33;1m[1;3mmessages[0m -> HumanMessage(content=' Supervisor routed to __end__ agent', additional_kwargs={}, response_metadata={})
- [33;1m[1;3msequence[0m -> ' search sql __end__'
[36;1m[1;3m[5:checkpoint][0m [1mState at the end of step 5:
[0m{'attempts': 0,
 'change': '',
 'docs_schema': 'p_match: INTEGER(identifier) (use this to count different '
                'matches) \n'
                '\n'
                'inns: INTEGER (innings number like 1 or 2 or 3 or 4)\n'
                '\n'
                'bat: STRING (batter currently batting)\n'
                '\n'
                "p_bat: INTEGER (the batter's player id)\n"
                '\n'
                'team_bat: STRING (the team that is currently batting)\n'
                '\n'
                'bowl: STRING (the bowler currently bowling)\n'
                '\n'
                "p_bowl: INTEGER (the bowler's player id)\n"
          

8it [01:55, 15.12s/it]

[36;1m[1;3m[5:writes][0m [1mFinished step 5 with writes to 2 channels:
[0m- [33;1m[1;3mmessages[0m -> HumanMessage(content=' Supervisor routed to __end__ agent', additional_kwargs={}, response_metadata={})
- [33;1m[1;3msequence[0m -> ' search sql __end__'
[36;1m[1;3m[5:checkpoint][0m [1mState at the end of step 5:
[0m{'attempts': 0,
 'change': '',
 'docs_schema': 'p_match: INTEGER(identifier) (use this to count different '
                'matches) \n'
                '\n'
                'inns: INTEGER (innings number like 1 or 2 or 3 or 4)\n'
                '\n'
                'bat: STRING (batter currently batting)\n'
                '\n'
                "p_bat: INTEGER (the batter's player id)\n"
                '\n'
                'team_bat: STRING (the team that is currently batting)\n'
                '\n'
                'bowl: STRING (the bowler currently bowling)\n'
                '\n'
                "p_bowl: INTEGER (the bowler's player id)\n"
          

9it [02:06, 13.79s/it]

[36;1m[1;3m[5:writes][0m [1mFinished step 5 with writes to 2 channels:
[0m- [33;1m[1;3mmessages[0m -> HumanMessage(content=' Supervisor routed to __end__ agent', additional_kwargs={}, response_metadata={})
- [33;1m[1;3msequence[0m -> ' search sql __end__'
[36;1m[1;3m[5:checkpoint][0m [1mState at the end of step 5:
[0m{'attempts': 0,
 'change': '',
 'docs_schema': 'p_match: INTEGER(identifier) (use this to count different '
                'matches) \n'
                '\n'
                'inns: INTEGER (innings number like 1 or 2 or 3 or 4)\n'
                '\n'
                'bat: STRING (batter currently batting)\n'
                '\n'
                "p_bat: INTEGER (the batter's player id)\n"
                '\n'
                'team_bat: STRING (the team that is currently batting)\n'
                '\n'
                'bowl: STRING (the bowler currently bowling)\n'
                '\n'
                "p_bowl: INTEGER (the bowler's player id)\n"
          

10it [02:26, 15.74s/it]

[36;1m[1;3m[5:writes][0m [1mFinished step 5 with writes to 2 channels:
[0m- [33;1m[1;3mmessages[0m -> HumanMessage(content=' Supervisor routed to __end__ agent', additional_kwargs={}, response_metadata={})
- [33;1m[1;3msequence[0m -> ' search sql __end__'
[36;1m[1;3m[5:checkpoint][0m [1mState at the end of step 5:
[0m{'attempts': 0,
 'change': '',
 'docs_schema': 'p_match: INTEGER(identifier) (use this to count different '
                'matches) \n'
                '\n'
                'inns: INTEGER (innings number like 1 or 2 or 3 or 4)\n'
                '\n'
                'bat: STRING (batter currently batting)\n'
                '\n'
                "p_bat: INTEGER (the batter's player id)\n"
                '\n'
                'team_bat: STRING (the team that is currently batting)\n'
                '\n'
                'bowl: STRING (the bowler currently bowling)\n'
                '\n'
                "p_bowl: INTEGER (the bowler's player id)\n"
          

11it [02:37, 14.30s/it]

[36;1m[1;3m[5:writes][0m [1mFinished step 5 with writes to 2 channels:
[0m- [33;1m[1;3mmessages[0m -> HumanMessage(content=' Supervisor routed to __end__ agent', additional_kwargs={}, response_metadata={})
- [33;1m[1;3msequence[0m -> ' search sql __end__'
[36;1m[1;3m[5:checkpoint][0m [1mState at the end of step 5:
[0m{'attempts': 0,
 'change': '',
 'docs_schema': 'p_match: INTEGER(identifier) (use this to count different '
                'matches) \n'
                '\n'
                'inns: INTEGER (innings number like 1 or 2 or 3 or 4)\n'
                '\n'
                'bat: STRING (batter currently batting)\n'
                '\n'
                "p_bat: INTEGER (the batter's player id)\n"
                '\n'
                'team_bat: STRING (the team that is currently batting)\n'
                '\n'
                'bowl: STRING (the bowler currently bowling)\n'
                '\n'
                "p_bowl: INTEGER (the bowler's player id)\n"
          

12it [02:51, 14.28s/it]

[36;1m[1;3m[5:writes][0m [1mFinished step 5 with writes to 2 channels:
[0m- [33;1m[1;3mmessages[0m -> HumanMessage(content=' Supervisor routed to __end__ agent', additional_kwargs={}, response_metadata={})
- [33;1m[1;3msequence[0m -> ' search sql __end__'
[36;1m[1;3m[5:checkpoint][0m [1mState at the end of step 5:
[0m{'attempts': 0,
 'change': '',
 'docs_schema': 'p_match: INTEGER(identifier) (use this to count different '
                'matches) \n'
                '\n'
                'inns: INTEGER (innings number like 1 or 2 or 3 or 4)\n'
                '\n'
                'bat: STRING (batter currently batting)\n'
                '\n'
                "p_bat: INTEGER (the batter's player id)\n"
                '\n'
                'team_bat: STRING (the team that is currently batting)\n'
                '\n'
                'bowl: STRING (the bowler currently bowling)\n'
                '\n'
                "p_bowl: INTEGER (the bowler's player id)\n"
          

13it [03:05, 14.12s/it]

[36;1m[1;3m[5:writes][0m [1mFinished step 5 with writes to 2 channels:
[0m- [33;1m[1;3mmessages[0m -> HumanMessage(content=' Supervisor routed to __end__ agent', additional_kwargs={}, response_metadata={})
- [33;1m[1;3msequence[0m -> ' search sql __end__'
[36;1m[1;3m[5:checkpoint][0m [1mState at the end of step 5:
[0m{'attempts': 0,
 'change': '',
 'docs_schema': 'p_match: INTEGER(identifier) (use this to count different '
                'matches) \n'
                '\n'
                'inns: INTEGER (innings number like 1 or 2 or 3 or 4)\n'
                '\n'
                'bat: STRING (batter currently batting)\n'
                '\n'
                "p_bat: INTEGER (the batter's player id)\n"
                '\n'
                'team_bat: STRING (the team that is currently batting)\n'
                '\n'
                'bowl: STRING (the bowler currently bowling)\n'
                '\n'
                "p_bowl: INTEGER (the bowler's player id)\n"
          

14it [03:20, 14.38s/it]

[36;1m[1;3m[5:writes][0m [1mFinished step 5 with writes to 2 channels:
[0m- [33;1m[1;3mmessages[0m -> HumanMessage(content=' Supervisor routed to __end__ agent', additional_kwargs={}, response_metadata={})
- [33;1m[1;3msequence[0m -> ' search sql __end__'
[36;1m[1;3m[5:checkpoint][0m [1mState at the end of step 5:
[0m{'attempts': 0,
 'change': '',
 'docs_schema': 'p_match: INTEGER(identifier) (use this to count different '
                'matches) \n'
                '\n'
                'inns: INTEGER (innings number like 1 or 2 or 3 or 4)\n'
                '\n'
                'bat: STRING (batter currently batting)\n'
                '\n'
                "p_bat: INTEGER (the batter's player id)\n"
                '\n'
                'team_bat: STRING (the team that is currently batting)\n'
                '\n'
                'bowl: STRING (the bowler currently bowling)\n'
                '\n'
                "p_bowl: INTEGER (the bowler's player id)\n"
          

15it [03:33, 14.05s/it]

[36;1m[1;3m[5:writes][0m [1mFinished step 5 with writes to 2 channels:
[0m- [33;1m[1;3mmessages[0m -> HumanMessage(content=' Supervisor routed to __end__ agent', additional_kwargs={}, response_metadata={})
- [33;1m[1;3msequence[0m -> ' search sql __end__'
[36;1m[1;3m[5:checkpoint][0m [1mState at the end of step 5:
[0m{'attempts': 0,
 'change': '',
 'docs_schema': 'p_match: INTEGER(identifier) (use this to count different '
                'matches) \n'
                '\n'
                'inns: INTEGER (innings number like 1 or 2 or 3 or 4)\n'
                '\n'
                'bat: STRING (batter currently batting)\n'
                '\n'
                "p_bat: INTEGER (the batter's player id)\n"
                '\n'
                'team_bat: STRING (the team that is currently batting)\n'
                '\n'
                'bowl: STRING (the bowler currently bowling)\n'
                '\n'
                "p_bowl: INTEGER (the bowler's player id)\n"
          

16it [03:53, 15.61s/it]

[36;1m[1;3m[5:writes][0m [1mFinished step 5 with writes to 2 channels:
[0m- [33;1m[1;3mmessages[0m -> HumanMessage(content=' Supervisor routed to __end__ agent', additional_kwargs={}, response_metadata={})
- [33;1m[1;3msequence[0m -> ' search sql __end__'
[36;1m[1;3m[5:checkpoint][0m [1mState at the end of step 5:
[0m{'attempts': 0,
 'change': '',
 'docs_schema': 'p_match: INTEGER(identifier) (use this to count different '
                'matches) \n'
                '\n'
                'inns: INTEGER (innings number like 1 or 2 or 3 or 4)\n'
                '\n'
                'bat: STRING (batter currently batting)\n'
                '\n'
                "p_bat: INTEGER (the batter's player id)\n"
                '\n'
                'team_bat: STRING (the team that is currently batting)\n'
                '\n'
                'bowl: STRING (the bowler currently bowling)\n'
                '\n'
                "p_bowl: INTEGER (the bowler's player id)\n"
          

17it [04:04, 14.23s/it]

[36;1m[1;3m[5:writes][0m [1mFinished step 5 with writes to 2 channels:
[0m- [33;1m[1;3mmessages[0m -> HumanMessage(content=' Supervisor routed to __end__ agent', additional_kwargs={}, response_metadata={})
- [33;1m[1;3msequence[0m -> ' search sql __end__'
[36;1m[1;3m[5:checkpoint][0m [1mState at the end of step 5:
[0m{'attempts': 0,
 'change': '',
 'docs_schema': 'p_match: INTEGER(identifier) (use this to count different '
                'matches) \n'
                '\n'
                'inns: INTEGER (innings number like 1 or 2 or 3 or 4)\n'
                '\n'
                'bat: STRING (batter currently batting)\n'
                '\n'
                "p_bat: INTEGER (the batter's player id)\n"
                '\n'
                'team_bat: STRING (the team that is currently batting)\n'
                '\n'
                'bowl: STRING (the bowler currently bowling)\n'
                '\n'
                "p_bowl: INTEGER (the bowler's player id)\n"
          

18it [04:17, 13.93s/it]

[36;1m[1;3m[5:writes][0m [1mFinished step 5 with writes to 2 channels:
[0m- [33;1m[1;3mmessages[0m -> HumanMessage(content=' Supervisor routed to __end__ agent', additional_kwargs={}, response_metadata={})
- [33;1m[1;3msequence[0m -> ' search sql __end__'
[36;1m[1;3m[5:checkpoint][0m [1mState at the end of step 5:
[0m{'attempts': 0,
 'change': '',
 'docs_schema': 'p_match: INTEGER(identifier) (use this to count different '
                'matches) \n'
                '\n'
                'inns: INTEGER (innings number like 1 or 2 or 3 or 4)\n'
                '\n'
                'bat: STRING (batter currently batting)\n'
                '\n'
                "p_bat: INTEGER (the batter's player id)\n"
                '\n'
                'team_bat: STRING (the team that is currently batting)\n'
                '\n'
                'bowl: STRING (the bowler currently bowling)\n'
                '\n'
                "p_bowl: INTEGER (the bowler's player id)\n"
          

19it [04:31, 13.99s/it]

[36;1m[1;3m[5:writes][0m [1mFinished step 5 with writes to 2 channels:
[0m- [33;1m[1;3mmessages[0m -> HumanMessage(content=' Supervisor routed to __end__ agent', additional_kwargs={}, response_metadata={})
- [33;1m[1;3msequence[0m -> ' search sql __end__'
[36;1m[1;3m[5:checkpoint][0m [1mState at the end of step 5:
[0m{'attempts': 0,
 'change': '',
 'docs_schema': 'p_match: INTEGER(identifier) (use this to count different '
                'matches) \n'
                '\n'
                'inns: INTEGER (innings number like 1 or 2 or 3 or 4)\n'
                '\n'
                'bat: STRING (batter currently batting)\n'
                '\n'
                "p_bat: INTEGER (the batter's player id)\n"
                '\n'
                'team_bat: STRING (the team that is currently batting)\n'
                '\n'
                'bowl: STRING (the bowler currently bowling)\n'
                '\n'
                "p_bowl: INTEGER (the bowler's player id)\n"
          

20it [04:47, 14.72s/it]

[36;1m[1;3m[5:writes][0m [1mFinished step 5 with writes to 2 channels:
[0m- [33;1m[1;3mmessages[0m -> HumanMessage(content=' Supervisor routed to __end__ agent', additional_kwargs={}, response_metadata={})
- [33;1m[1;3msequence[0m -> ' search sql __end__'
[36;1m[1;3m[5:checkpoint][0m [1mState at the end of step 5:
[0m{'attempts': 0,
 'change': '',
 'docs_schema': 'p_match: INTEGER(identifier) (use this to count different '
                'matches) \n'
                '\n'
                'inns: INTEGER (innings number like 1 or 2 or 3 or 4)\n'
                '\n'
                'bat: STRING (batter currently batting)\n'
                '\n'
                "p_bat: INTEGER (the batter's player id)\n"
                '\n'
                'team_bat: STRING (the team that is currently batting)\n'
                '\n'
                'bowl: STRING (the bowler currently bowling)\n'
                '\n'
                "p_bowl: INTEGER (the bowler's player id)\n"
          

21it [05:02, 14.65s/it]

[36;1m[1;3m[5:writes][0m [1mFinished step 5 with writes to 2 channels:
[0m- [33;1m[1;3mmessages[0m -> HumanMessage(content=' Supervisor routed to __end__ agent', additional_kwargs={}, response_metadata={})
- [33;1m[1;3msequence[0m -> ' search sql __end__'
[36;1m[1;3m[5:checkpoint][0m [1mState at the end of step 5:
[0m{'attempts': 0,
 'change': '',
 'docs_schema': 'p_match: INTEGER(identifier) (use this to count different '
                'matches) \n'
                '\n'
                'inns: INTEGER (innings number like 1 or 2 or 3 or 4)\n'
                '\n'
                'bat: STRING (batter currently batting)\n'
                '\n'
                "p_bat: INTEGER (the batter's player id)\n"
                '\n'
                'team_bat: STRING (the team that is currently batting)\n'
                '\n'
                'bowl: STRING (the bowler currently bowling)\n'
                '\n'
                "p_bowl: INTEGER (the bowler's player id)\n"
          

21it [05:02, 14.42s/it]

Evaluation completed: <AsyncExperimentResults sql_agent_07-03-2025_17-02-59-d32e25cc>





In [1]:
#creating datasets

from langsmith import Client
from langsmith.evaluation import evaluate, aevaluate
from openai import AsyncOpenAI
from dotenv import load_dotenv
import json
load_dotenv()
langsmith_client = Client()
queries = {
    # 1. Player Search queries
    "player_search": [
        "fazal haq faroo bowling stats for srh",
        "virat Kohli's batting statistics",
        "M.S. Dhoni's career performance",
        "steve Smith's batting average by year",
        "ishant Sharma's bowling records by competition",
        "shadab khan's performance in 2023",
        "trav head  stats by year",
        "r pant career statistics",
    ],

    # 2. Batsman vs Bowler Matchup queries
    "head_to_head": [
        "v Kohli vs James Anderson head-to-head stats",
        "david Warner vs Indian bowlers statistics",
        "r Ashwin's LBW dismissals against left-handers",
        "fast bowlers' performance against rohit sharma",
        "leg spinners' bowling records against Steve Smith",
    ],

    # 3. Leaders Board queries
    "leaderboard": [
        "most caught behind dismissals batters in uppal stadium ",
        "top cover drive players stats in 1-10 overs ",
        "best bowling averages bowlers in hyderabad",
        "highest run-scorers batter in 2023",
    ],

    # 4. Venue Search queries
    "venue_stats": [
        "match statistics at Lord's Cricket Ground",
        "M Chinnaswamy Stadium batting averages in ipl",
        "mcg batting stats by year ",
        "Sydney bowling stats by batter type",
    ],

    # 5. Team Search queries
    "team_stats": [
        "Australia vs New Zealand head-to-head record",
        "csk bowling stats in cheapuak by year wise",
        "rcb batting stats",
        "india win-loss record by venue",
    ]
}
dataset = langsmith_client.create_dataset(
            dataset_name=f"sql_agent_evaluation_0401",
            description="cricmetric style questions"
        )

        # Add data to the dataset
evaluation_data = []
for category, category_queries in queries.items():
    for query in category_queries:
        evaluation_data.append({
            "question": query,
            "type": category
        })
for item in evaluation_data:
    langsmith_client.create_example(
        inputs=item,
        dataset_id=dataset.id
    )


In [2]:
dataset.id

UUID('07dbb645-2988-41c8-9f85-b3a37f51bdb2')

In [1]:
from langchain_aws import ChatBedrock
import boto3
bedrock=boto3.client(service_name="bedrock-runtime")

llm = ChatBedrock(
    model_id="us.anthropic.claude-3-5-haiku-20241022-v1:0",
)

In [2]:
llm.invoke("hello")

AIMessage(content='Hi there! How are you doing today? Is there anything I can help you with?', additional_kwargs={'usage': {'prompt_tokens': 8, 'completion_tokens': 21, 'total_tokens': 29}, 'stop_reason': 'end_turn', 'thinking': {}, 'model_id': 'us.anthropic.claude-3-5-haiku-20241022-v1:0'}, response_metadata={'usage': {'prompt_tokens': 8, 'completion_tokens': 21, 'total_tokens': 29}, 'stop_reason': 'end_turn', 'thinking': {}, 'model_id': 'us.anthropic.claude-3-5-haiku-20241022-v1:0'}, id='run-5ef459b3-1c52-4178-a34b-38c208cbdcc1-0', usage_metadata={'input_tokens': 8, 'output_tokens': 21, 'total_tokens': 29})

In [5]:
import pandas as pd
df=pd.read_csv(r"C:\Users\adith\Documents\Projects\python-projects\analytics\datasets\odata_2403.csv",index_col=False)

  df=pd.read_csv(r"C:\Users\adith\Documents\Projects\python-projects\analytics\datasets\odata_2403.csv",index_col=False)


In [2]:
import pandas as pd
df = pd.read_csv(r"C:\Users\adith\Documents\Projects\python-projects\analytics\datasets\hdata_2403.csv", index_col=False)

  df = pd.read_csv(r"C:\Users\adith\Documents\Projects\python-projects\analytics\datasets\hdata_2403.csv", index_col=False)


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2462215 entries, 0 to 2462214
Data columns (total 47 columns):
 #   Column             Dtype  
---  ------             -----  
 0   Unnamed: 0         int64  
 1   fixtureId          int64  
 2   team1              object 
 3   team2              object 
 4   matchDate          object 
 5   format             object 
 6   ground             object 
 7   country            object 
 8   inns               int64  
 9   battingTeam        object 
 10  bowlingTeam        object 
 11  batsman            object 
 12  bowler             object 
 13  batsmanHand        object 
 14  bowlerHand         object 
 15  bowlerType         object 
 16  over               int64  
 17  ball               int64  
 18  dismissalType      object 
 19  dismissedPlayer    object 
 20  shot_angle         float64
 21  shot_magnitude     float64
 22  fielding_position  object 
 23  runs_conceded      float64
 24  runs               int64  
 25  runs_scored       

In [2]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2462215 entries, 0 to 2462214
Data columns (total 47 columns):
 #   Column             Dtype  
---  ------             -----  
 0   Unnamed: 0         int64  
 1   fixtureId          int64  
 2   team1              object 
 3   team2              object 
 4   matchDate          object 
 5   format             object 
 6   ground             object 
 7   country            object 
 8   inns               int64  
 9   battingTeam        object 
 10  bowlingTeam        object 
 11  batsman            object 
 12  bowler             object 
 13  batsmanHand        object 
 14  bowlerHand         object 
 15  bowlerType         object 
 16  over               int64  
 17  ball               int64  
 18  dismissalType      object 
 19  dismissedPlayer    object 
 20  shot_angle         float64
 21  shot_magnitude     float64
 22  fielding_position  object 
 23  runs_conceded      float64
 24  runs               int64  
 25  runs_scored       

In [3]:
df['variation'].unique()

array(['googly', 'leg spinner', 'stock', 'seaming in', 'quicker',
       'seaming away', 'in-swinging', 'out-swinging', 'off break', 'arm',
       'slower', 'off cutter', 'leg cutter', nan], dtype=object)

In [5]:
df['line'].value_counts()

line
outside off         685400
off                 107551
down leg             98054
middle               92319
leg                  63576
wide outside off     18568
Name: count, dtype: int64

In [None]:
import json
import codecs

def try_multiple_encodings(file_path):
    """
    Try multiple encodings that might be used for mathematical content
    """
    encodings_to_try = [
        # Standard encodings
        'utf-8', 'utf-16', 'utf-16le', 'utf-16be', 
        'latin-1', 'windows-1252', 
        
        # Unicode encodings
        'unicode_escape', 'raw_unicode_escape',
        
        # LaTeX and mathematical encodings
        'ascii', 'iso-8859-1', 'cp1252', 
        
        # Additional encoding possibilities
        'mac_roman', 'shift_jis', 'big5', 'euc_jp', 
        'iso-8859-15', 'cp850'
    ]
    
    for encoding in encodings_to_try:
        try:
            with codecs.open(file_path, 'r', encoding=encoding) as file:
                content = file.read()
                # Try parsing as JSON
                try:
                    parsed_json = json.loads(content)
                    print(f"Successfully loaded with {encoding} encoding!")
                    return parsed_json
                except json.JSONDecodeError:
                    # If JSON parsing fails, but reading succeeds
                    print(f"Read succeeded with {encoding}, but JSON parsing failed")
        except (UnicodeDecodeError, LookupError):
            continue
    
    print("Could not load file with any of the attempted encodings")
    return None

def read_raw_bytes(file_path):
    """
    Read raw bytes to inspect the file content
    """
    with open(file_path, 'rb') as file:
        raw_bytes = file.read()
        print("First 100 bytes (hex):")
        print(' '.join(f'{byte:02x}' for byte in raw_bytes[:100]))
        
        try:
            # Try to print as different encodings
            print("\nAttempting to decode:")
            print("UTF-8 decode (may show partial content):", raw_bytes.decode('utf-8', errors='replace'))
            print("Latin-1 decode:", raw_bytes.decode('latin-1', errors='replace'))
        except Exception as e:
            print("Decoding error:", e)

# Use the functions
file_path = r"C:\Users\adith\Documents\placements\gate\linear_algebra_notes\5a9578c3728d3a2c2111538c.json"

# First, inspect raw bytes
read_raw_bytes(file_path)

# Then try multiple encodings
data = try_multiple_encodings(file_path)

# If successful, you can work with the data
if data:
    print("\nFirst few keys or items:")
    if isinstance(data, dict):
        print(list(data.keys())[:5])
    elif isinstance(data, list):
        print(data[:5])

In [1]:
import chardet

# Read the file in binary mode
with open( r"C:\Users\adith\Documents\placements\gate\linear_algebra_notes\5a9578c3728d3a2c2111538c.json", 'rb') as f:
    rawdata = f.read()

# Detect the encoding
result = chardet.detect(rawdata)
encoding = result['encoding']
print("Detected encoding:", encoding)


Detected encoding: None
