In [None]:
import asyncio
import sys
import os
import nest_asyncio
import json
from time import time
from typing import List, Dict
from rich.console import Console
from rich.table import Table

# Add the project root to Python path
project_root = os.path.dirname(os.path.dirname(os.getcwd()))
if project_root not in sys.path:
    sys.path.append(project_root)

from agents.sql_with_preprocess.types import AgentState
from langchain_core.messages import HumanMessage
from agents.sql_with_preprocess.main import runworkflow as arun

nest_asyncio.apply()
console = Console()

# Test queries
# queries = [
#     "Show me batting stats of Dhoni vs. Australia.",
#     "How many runs did Sachin baby score at Eden Gardens,kolkata?",
#     "Give me the top 5 players from India who've taken the most wickets.",
#     "Who has the highest strike rate in T20 matches?", 
#     "Fetch me the bowlers with the best economy rate in ODIs.",
#     "Find all matches where Kohli was dismissed for a duck.",
#     "List the grounds in England where Rohit scored centuries.",
#     "Who are the left-handed batsmen in the database?",
#     "Which teams played in the 2011 World Cup final?",
#     "Show me the biggest six-hitters in the IPL."
# ]
queries = {
    # 1. Player Search queries
    "player_search": [
        "fazal haq faroo bowling stats for srh",
        "virat Kohli's batting statistics",
        "M.S. Dhoni's career performance",
        "steve Smith's batting average by year",
        "ishant Sharma's bowling records by competition",
        "shadab khan's performance in 2023",
        "trav head  stats by year",
        "r pant career statistics",
    ],

    # 2. Batsman vs Bowler Matchup queries
    "head_to_head": [
        "v Kohli vs James Anderson head-to-head stats",
        "david Warner vs Indian bowlers statistics",
        "r Ashwin's LBW dismissals against left-handers",
        "fast bowlers' performance against rohit sharma",
        "leg spinners' bowling records against Steve Smith",
    ],

    # 3. Leaders Board queries
    "leaderboard": [
        "most caught behind dismissals batters in uppal stadium ",
        "top cover drive players stats in 1-10 overs ",
        "best bowling averages bowlers in hyderabad",
        "highest run-scorers batter in 2023",
    ],

    # 4. Venue Search queries
    "venue_stats": [
        "match statistics at Lord's Cricket Ground",
        "M Chinnaswamy Stadium batting averages in ipl",
        "mcg batting stats by year ",
        "Sydney bowling stats by batter type",
    ],

    # 5. Team Search queries
    "team_stats": [
        "Australia vs New Zealand head-to-head record",
        "csk bowling stats in cheapuak by year wise",
        "rcb batting stats",
        "india win-loss record by venue",
    ]
}
async def test_query(query: str) -> Dict:
    """Test a single query and return timing + results"""
    start_time = time()
    
    try:
        result = await arun(query)
        response = result['messages'][-1].content
        sql_query = result['sql_query']
        return {
        "query": query,
        "response": sql_query,
        "search_result":result['search_result'],
        "time_taken": round(end_time - start_time, 2)

    }
    except Exception as e:
        response = f"Error: {str(e)}"
        sql_query = response
        
    end_time = time()
    
    return {
        "query": query,
        "response": sql_query,
        "search_result":result['search_result'],
        "time_taken": round(end_time - start_time, 2)

    }

async def batch_test(queries: Dict[str, List[str]]):
    """Run all queries and display results in a table"""
    
    # Create results table
    table = Table(title="Search Agent Test Results")
    table.add_column("Category", style="magenta")
    table.add_column("Query", style="cyan")
    table.add_column("Response", style="green")
    table.add_column("Time (s)", justify="right", style="yellow")
    
    console.print("\n[bold]Starting batch testing...[/bold]\n")
    
    results = {}
    for category, category_queries in queries.items():
        console.print(f"\n[bold]{category.upper()} Queries[/bold]")
        
        category_results = []
        for query in category_queries:
            result = await test_query(query)
            result['category'] = category
            category_results.append(result)
            
            # Add to table
            table.add_row(
                category,
                result["query"],
                result["response"],
                str(result["time_taken"])
            )
        
        results[category] = category_results
    
    # Display results
    console.print(table)
    
    # Save results to file
    with open('sql_agent_test_results_2.0-flash-mistral.json', 'w') as f:
        json.dump(results, f, indent=2)
        
    console.print("\n[bold green]Results saved to sql_agent_test_results.json[/bold green]")

# Assuming 'queries' is the dictionary we created in the previous response
# Run the tests
asyncio.run(batch_test(queries))

In [None]:
#run on certain dataset
import sys
import os
project_root = os.path.dirname(os.path.dirname(os.getcwd()))
if project_root not in sys.path:
    sys.path.append(project_root)
from datetime import datetime
from csv import DictWriter
from langsmith import Client
from langsmith.evaluation import evaluate, aevaluate
from openai import AsyncOpenAI
from agents.sql_with_preprocess.main import runworkflow
import json
# client = AsyncOpenAI()
import nest_asyncio
nest_asyncio.apply()
from dotenv import load_dotenv
load_dotenv()

async def test_agent():
    langsmith_client = Client()
    # dataset = langsmith_client.read_dataset(dataset_id="07dbb645-2988-41c8-9f85-b3a37f51bdb2")
    dataset = langsmith_client.read_dataset(dataset_id="2d5bd4c9-1c73-4064-905f-00a5c924487a")

    async def runs(inputs: dict) -> dict:
        try:
            query = inputs['inputs']["question"]
            print(f"Processing question: {query}")
            
            result = await runworkflow(query)
            
            # Extract relevant information
            response = result['messages'][-1].content
            sql_query = result.get('sql_query','')
            search_result = result.get('search_result', '')
            
            # Log to CSV
            try:
                # Load existing data if file exists
                # json_file = "logs/sql_agent_evaluation.json"  
                import os

# Ensure the logs directory exists
                log_dir = "logs"
                if not os.path.exists(log_dir):
                    os.makedirs(log_dir)

                # Your existing code for writing to the JSON file
                json_file = os.path.join(log_dir, "sql_agent_evaluation.json")
                if os.path.exists(json_file):
                    with open(json_file, 'r', encoding='utf-8') as f:
                        try:
                            existing_data = json.load(f)
                        except json.JSONDecodeError:
                            existing_data = []
                else:
                    with open(json_file, 'w', encoding='utf-8') as f:
                        json.dump([], f)
                    existing_data = []

                # Append new data
                new_entry = {
                    # "category": inputs.get("type", "category"),
                    "question": query,
                    "sequence":result['sequence'],
                    # "sql_query": sql_query,
                    # "response": response,
                    "search_result": search_result,
                    # "timestamp": datetime.now().isoformat()
                }
                existing_data.append(new_entry)

                # Write back to file
                with open(json_file, 'w', encoding='utf-8') as f:
                    json.dump(existing_data, f, indent=2, ensure_ascii=False)

            except Exception as e:
                print(f"Error writing to JSON: {e}")
                
            return {
                "output": {
                    "sequence":result['sequence'],
                    "search_result": search_result,
                    # "sql_query": sql_query,
                    # "response": response,
                    
                }
            }
        except Exception as e:
            print(f"Error in runs function: {e}")
            raise

    current_date = datetime.now().strftime("%Y%m%d")
    current_time = datetime.now().strftime("%d-%m-%Y_%H-%M-%S")

    # Convert queries dict to dataset format


    try:
        result = await aevaluate(
            runs,
            data=dataset,  # Use your structured queries instead of dataset
            experiment_prefix=f"sql_agent_{current_time}",
            max_concurrency=1,
            metadata={
                "version": f"{current_time}",
                "revision_id": "testing",
                "supervisor":"1.5-flash",
                "search":"mistral-small"

            },
        )
        print("Evaluation completed:", result)
    except Exception as e:
        print(f"Evaluation failed: {e}")

import asyncio
asyncio.run(test_agent())

in table dir C:\Users\adith\Documents\Projects\python-projects\csql-agent\agents\tables\hdata
Loading store for category 'hdata_bat_hand' from 'C:\Users\adith\Documents\Projects\python-projects\csql-agent\agents\tables\hdata\hdata_bat_hand'
Loading store for category 'hdata_bat_out' from 'C:\Users\adith\Documents\Projects\python-projects\csql-agent\agents\tables\hdata\hdata_bat_out'
Loading store for category 'hdata_bowl_kind' from 'C:\Users\adith\Documents\Projects\python-projects\csql-agent\agents\tables\hdata\hdata_bowl_kind'
Loading store for category 'hdata_bowl_style' from 'C:\Users\adith\Documents\Projects\python-projects\csql-agent\agents\tables\hdata\hdata_bowl_style'
Loading store for category 'hdata_competition' from 'C:\Users\adith\Documents\Projects\python-projects\csql-agent\agents\tables\hdata\hdata_competition'
Loading store for category 'hdata_country' from 'C:\Users\adith\Documents\Projects\python-projects\csql-agent\agents\tables\hdata\hdata_country'
Loading store fo

  from .autonotebook import tqdm as notebook_tqdm


KeyboardInterrupt: 

In [1]:
#creating datasets

from langsmith import Client
from langsmith.evaluation import evaluate, aevaluate
from openai import AsyncOpenAI
from dotenv import load_dotenv
import json
load_dotenv()
langsmith_client = Client()
queries = {
    # 1. Player Search queries
    "player_search": [
        "fazal haq faroo bowling stats for srh",
        "virat Kohli's batting statistics",
        "M.S. Dhoni's career performance",
        "steve Smith's batting average by year",
        "ishant Sharma's bowling records by competition",
        "shadab khan's performance in 2023",
        "trav head  stats by year",
        "r pant career statistics",
    ],

    # 2. Batsman vs Bowler Matchup queries
    "head_to_head": [
        "v Kohli vs James Anderson head-to-head stats",
        "david Warner vs Indian bowlers statistics",
        "r Ashwin's LBW dismissals against left-handers",
        "fast bowlers' performance against rohit sharma",
        "leg spinners' bowling records against Steve Smith",
    ],

    # 3. Leaders Board queries
    "leaderboard": [
        "most caught behind dismissals batters in uppal stadium ",
        "top cover drive players stats in 1-10 overs ",
        "best bowling averages bowlers in hyderabad",
        "highest run-scorers batter in 2023",
    ],

    # 4. Venue Search queries
    "venue_stats": [
        "match statistics at Lord's Cricket Ground",
        "M Chinnaswamy Stadium batting averages in ipl",
        "mcg batting stats by year ",
        "Sydney bowling stats by batter type",
    ],

    # 5. Team Search queries
    "team_stats": [
        "Australia vs New Zealand head-to-head record",
        "csk bowling stats in cheapuak by year wise",
        "rcb batting stats",
        "india win-loss record by venue",
    ]
}
dataset = langsmith_client.create_dataset(
            dataset_name=f"sql_agent_evaluation_0401",
            description="cricmetric style questions"
        )

        # Add data to the dataset
evaluation_data = []
for category, category_queries in queries.items():
    for query in category_queries:
        evaluation_data.append({
            "question": query,
            "type": category
        })
for item in evaluation_data:
    langsmith_client.create_example(
        inputs=item,
        dataset_id=dataset.id
    )


In [2]:
dataset.id

UUID('07dbb645-2988-41c8-9f85-b3a37f51bdb2')

In [1]:
import json
import requests
from bs4 import BeautifulSoup

class Match(object):

    def __init__(self, match_id):
        self.match_id = match_id
        self.match_url = "https://www.espncricinfo.com/matches/engine/match/{0}.html".format(str(match_id))
        self.json_url = "https://www.espncricinfo.com/matches/engine/match/{0}.json".format(str(match_id))
        self.headers = {'user-agent': 'Mozilla/5.0'}
        self.json = self.get_json()
        self.html = self.get_html()
        self.comms_json = self.get_comms_json()
        if self.json:
            self.__unicode__ = self._description()
            self.status = self._status()
            self.match_class = self._match_class()
            self.season = self._season()
            self.description = self._description()
            self.legacy_scorecard_url = self._legacy_scorecard_url()
            self.series = self._series()
            self.series_name = self._series_name()
            self.series_id = self._series_id()
            self.event_url = "http://core.espnuk.org/v2/sports/cricket/leagues/{0}/events/{1}".format(str(self.series_id), str(match_id))
            self.details_url = self._details_url()
            self.officials = self._officials()
            self.current_summary = self._current_summary()
            self.present_datetime_local = self._present_datetime_local()
            self.present_datetime_gmt = self._present_datetime_gmt()
            self.start_datetime_local = self._start_datetime_local()
            self.start_datetime_gmt = self._start_datetime_gmt()
            self.cancelled_match = self._cancelled_match()
            self.rain_rule = self._rain_rule()
            self.date = self._date()
            self.continent = self._continent()
            self.town_area = self._town_area()
            self.town_name = self._town_name()
            self.town_id = self._town_id()
            self.weather_location_code = self._weather_location_code()
            self.match_title = self._match_title()
            self.result = self._result()
            self.ground_id = self._ground_id()
            self.ground_name = self._ground_name()
            self.lighting = self._lighting()
            self.followon = self._followon()
            self.scheduled_overs = self._scheduled_overs()
            self.innings_list = self._innings_list()
            self.innings = self._innings()
            self.latest_batting = self._latest_batting()
            self.latest_bowling = self._latest_bowling()
            self.latest_innings = self._latest_innings()
            self.latest_innings_fow = self._latest_innings_fow()
            self.team_1 = self._team_1()
            self.team_1_id = self._team_1_id()
            self.team_1_abbreviation = self._team_1_abbreviation()
            self.team_1_players = self._team_1_players()
            self.team_1_innings = self._team_1_innings()
            self.team_1_run_rate = self._team_1_run_rate()
            self.team_1_overs_batted = self._team_1_overs_batted()
            self.team_1_batting_result = self._team_1_batting_result()
            self.team_2 = self._team_2()
            self.team_2_id = self._team_2_id()
            self.team_2_abbreviation = self._team_2_abbreviation()
            self.team_2_players = self._team_2_players()
            self.team_2_innings = self._team_2_innings()
            self.team_2_run_rate = self._team_2_run_rate()
            self.team_2_overs_batted = self._team_2_overs_batted()
            self.team_2_batting_result = self._team_2_batting_result()
            if not self.status == 'dormant':
                self.home_team = self._home_team()
                self.batting_first = self._batting_first()
                self.match_winner = self._match_winner()
                self.toss_winner = self._toss_winner()
                self.toss_decision = self._toss_decision()
                self.toss_decision_name = self._toss_decision_name()
                self.toss_choice_team_id = self._toss_choice_team_id()
                self.toss_winner_team_id = self._toss_winner_team_id()
                self.espn_api_url = self._espn_api_url()
                # from comms_json
                self.rosters = self._rosters()
                self.all_innings = self._all_innings()


    def __str__(self):
        return self.description

    def __repr__(self):
        return (f'{self.__class__.__name__}('f'{self.match_id!r})')

    def get_json(self):
        r = requests.get(self.json_url,headers=self.headers)
        if r.status_code == 404:
            raise Exception
        elif 'Scorecard not yet available' in r.text:
            raise Exception
        else:
            return r.json()

    def get_html(self):
        r = requests.get(self.match_url,headers=self.headers)
        if r.status_code == 404:
            raise Exception
        else:
            return BeautifulSoup(r.text, 'html.parser')

    def match_json(self):
        return self.json['match']

    def innings_comms_url(self, innings=1, page=1):
        return f"https://hsapi.espncricinfo.com/v1/pages/match/comments?lang=en&leagueId={self.series_id}&eventId={self.match_id}&period={innings}&page={page}&filter=full&liveTest=false"

    def get_comms_json(self):
        try:
            text = self.html.find_all('script')[15].string
            print(text)
            return json.loads(text)
        except:
            return None

    def _espn_api_url(self):
        return "https://site.api.espn.com/apis/site/v2/sports/cricket/{0}/summary?event={1}".format(self.series_id, self.match_id)

    def _legacy_scorecard_url(self):
        return "https://static.espncricinfo.com"+self.match_json()['legacy_url']

    def _details_url(self, page=1, number=1000):
        return self.event_url+"/competitions/{0}/details?page_size={1}&page={2}".format(str(self.match_id), str(number), str(page))

    def __str__(self):
        return self.json['description']

    def __unicode__(self):
        return self.json['description']

    def _status(self):
        return self.match_json()['match_status']

    def _match_class(self):
        if self.match_json()['international_class_card'] != "":
            return self.match_json()['international_class_card']
        else:
            return self.match_json()['general_class_card']

    def _season(self):
        return self.match_json()['season']

    def _description(self):
        return self.json['description']

    def _series(self):
        return self.json['series']

    def _series_name(self):
        try:
            return self.json['series'][-1]['series_name']
        except:
            return None

    def _series_id(self):
        return self.json['series'][-1]['core_recreation_id']

    def _officials(self):
        return self.json['official']

    # live matches only
    def _current_summary(self):
        return self.match_json().get('current_summary')

    def _present_datetime_local(self):
        return self.match_json()['present_datetime_local']

    def _present_datetime_gmt(self):
        return self.match_json()['present_datetime_gmt']

    def _start_datetime_local(self):
        return self.match_json()['start_datetime_local']

    def _start_datetime_gmt(self):
        return self.match_json()['start_datetime_gmt']

    def _cancelled_match(self):
        if self.match_json()['cancelled_match'] == 'N':
            return False
        else:
            return True

    def _rain_rule(self):
        if self.match_json().get('rain_rule') == "1":
            return self.match_json()['rain_rule_name']
        else:
            return None

    def _date(self):
        return self.match_json()['start_date_raw']

    def _continent(self):
        return self.match_json().get('continent_name')

    def _town_area(self):
        return self.match_json().get('town_area')

    def _town_name(self):
        return self.match_json().get('town_name')

    def _town_id(self):
        return self.match_json().get('town_id')

    def _weather_location_code(self):
        return self.match_json().get('weather_location_code')

    def _match_title(self):
        return self.match_json()['cms_match_title']

    def _result(self):
        return self.json['live']['status']

    def _ground_id(self):
        return self.match_json()['ground_id']

    def _ground_name(self):
        return self.match_json()['ground_name']

    def _lighting(self):
        return self.match_json()['floodlit_name']

    def _followon(self):
        if self.match_json().get('followon') == '1':
            return True
        else:
            return False

    def _scheduled_overs(self):
        try:
            return int(self.match_json()['scheduled_overs'])
        except:
            return None

    def _innings_list(self):
        try:
            return self.json['centre']['common']['innings_list']
        except:
            return None

    def _innings(self):
        return self.json['innings']

    def _latest_batting(self):
        try:
            return self.json['centre']['common']['batting']
        except:
            return None

    def _latest_bowling(self):
        try:
            return self.json['centre']['common']['bowling']
        except:
            return None

    def _latest_innings(self):
        try:
            return self.json['centre']['common']['innings']
        except:
            return None

    def _latest_innings_fow(self):
        return self.json['centre'].get('fow')

    def _team_1(self):
        return self.json['team'][0]

    def _team_1_id(self):
        return self._team_1()['team_id']

    def _team_1_abbreviation(self):
        return self._team_1()['team_abbreviation']

    def _team_1_players(self):
        return self._team_1().get('player', [])

    def _team_1_innings(self):
        try:
            return [inn for inn in self.json['innings'] if inn['batting_team_id'] == self._team_1_id()][0]
        except:
            return None

    def _team_1_run_rate(self):
        try:
            return float(self._team_1_innings()['run_rate'])
        except:
            return None

    def _team_1_overs_batted(self):
        try:
            return float(self._team_1_innings()['overs'])
        except:
            return None

    def _team_1_batting_result(self):
        try:
            return self._team_1_innings()['event_name']
        except:
            return None

    def _team_2(self):
        return self.json['team'][1]

    def _team_2_id(self):
        return self._team_2()['team_id']

    def _team_2_abbreviation(self):
        return self._team_2()['team_abbreviation']

    def _team_2_players(self):
        return self._team_2().get('player', [])

    def _team_2_innings(self):
        try:
            return [inn for inn in self.json['innings'] if inn['batting_team_id'] == self._team_2_id()][0]
        except:
            return None

    def _team_2_run_rate(self):
        try:
            return float(self._team_2_innings()['run_rate'])
        except:
            return None

    def _team_2_overs_batted(self):
        try:
            return float(self._team_2_innings()['overs'])
        except:
            return None

    def _team_2_batting_result(self):
        try:
            return self._team_2_innings()['event_name']
        except:
            return None

    def _home_team(self):
        if self._team_1_id() == self.match_json()['home_team_id']:
            return self._team_1_abbreviation()
        else:
            return self._team_2_abbreviation()

    def _batting_first(self):
        if self._team_1_id() == self.match_json()['batting_first_team_id']:
            return self._team_1_abbreviation()
        else:
            return self._team_2_abbreviation()

    def _match_winner(self):
        if self._team_1_id() == self.match_json()['winner_team_id']:
            return self._team_1_abbreviation()
        else:
            return self._team_2_abbreviation()

    def _toss_winner(self):
        if self._team_1_id() == self.match_json()['toss_winner_team_id']:
            return self._team_1_id()
        else:
            return self._team_2_id()

    def _toss_decision(self):
        if self.match_json()['toss_decision'] == '' and len(self.innings) > 0:
            if self.innings[0]['batting_team_id'] == self.toss_winner:
                decision = '1'
            else:
                decision = '2'
        else:
            decision = self.match_json()['toss_decision']
        return decision

    def _toss_decision_name(self):
        if self.match_json()['toss_decision_name'] == '' and len(self.innings) > 0:
            if self.innings[0]['batting_team_id'] == self.toss_winner:
                decision_name = 'bat'
            else:
                decision_name = 'bowl'
        else:
            decision_name = self.match_json()['toss_decision_name']
        return decision_name

    def _toss_choice_team_id(self):
        return self.match_json()['toss_choice_team_id']

    def _toss_winner_team_id(self):
        return self.match_json()['toss_winner_team_id']

    # comms_json methods

    def _rosters(self):
        try:
            return self.comms_json['props']['pageProps']['data']['pageData']['content']['matchPlayers']
        except:
            return None

    def _all_innings(self):
        try:
            return self.comms_json['props']['pageProps']['data']['pageData']['content']['scorecard']['innings']
        except:
            return self.json['innings']

    def batsmen(self, innings):
        try:
            return self.comms_json['props']['pageProps']['data']['pageData']['content']['scorecard']['innings'][str(innings)]['inningBatsmen']
        except:
            return None

    def bowlers(self, innings):
        try:
            return self.comms_json['props']['pageProps']['data']['pageData']['content']['scorecard']['innings'][str(innings)]['inningBowlers']
        except:
            return None

    def extras(self, innings):
        try:
            return self.comms_json['props']['pageProps']['data']['pageData']['content']['scorecard']['innings'][str(innings)]['extras']
        except:
            return None

    def fows(self, innings):
        try:
            return self.comms_json['props']['pageProps']['data']['pageData']['content']['scorecard']['innings'][str(innings)]['inningFallOfWickets']
        except:
            return None



In [2]:
mtch=Match(1439902)