# –ü–æ–¥–≥–æ—Ç–æ–≤–∫–∞ –¥–∞—Ç–∞—Å–µ—Ç–∞

In [1]:
!pip install sentence-transformers -q

In [None]:
from tqdm import tqdm
import pandas as pd

#from sentence_transformers import SentenceTransformer
#from transformers import AutoTokenizer, AutoModelForCausalLM
import numpy as np
#import torch

from sqlglot import parse_one
from sqlglot.diff import ChangeDistiller


from sqlalchemy import create_engine
from prompting import PromptBuilder
from sklearn.utils import shuffle
from sqlalchemy import Connection

### <div class='alert alert-info'>spans.py</div>


In [None]:
from abc import ABC
from dataclasses import dataclass

@dataclass
class Span(ABC):
    pass


@dataclass
class ExtendedSqlSpan(Span):
    NL : str
    sql_gold : str
    sql_pred : str
    df_soft : int
    df_flexible : int
    df_gold_IN_df_pred : bool
    df_pred_IN_df_gold : bool
    df_gold_columns : list[str]
    df_pred_columns : list[str]
    TED : int
    Error : str | None

### <div class='alert alert-info'>table_finder.py</div>

In [None]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from dataclasses import dataclass
from sentence_transformers import SentenceTransformer
from sklearn.feature_extraction.text import TfidfVectorizer

@dataclass
class DtoColumn:
    Name : str
    Description : str


@dataclass
class DtoTable:
    Name : str
    Description : str
    Columns : list[DtoColumn]


def prepare_df(df: pd.DataFrame) -> list[DtoTable]:
    tables = []

    for table in tqdm(df['table'].unique()):
        t : pd.DataFrame = df[df['table'] == table]
        columns : list[DtoColumn] = []
        for idx in t.index:
            name = str(t[t.index == idx]['field'][idx]).strip()
            desc = str(t[t.index == idx]['field_description'][idx]).strip()

            column = DtoColumn(name, desc)
            columns.append(column)

        dto_table = DtoTable(table, str(t['table_description'][idx]).strip(), columns)
        tables.append(dto_table)

    return tables


def generate_table_profile(table : DtoTable) -> str:
    profile = []
    
    profile.append(f"–¢–∞–±–ª–∏—Ü–∞: {table.Name}")
    profile.append(f"–û–ø–∏—Å–∞–Ω–∏–µ —Ç–∞–±–ª–∏—Ü—ã: {table.Description}")

    profile.append("–ö–æ–ª–æ–Ω–∫–∏:")
    for col in table.Columns:
        profile.append(f"- {col.Name} - {col.Description}")
    
    return "\n".join(profile)


class TableFinder:
    def __init__(self, tables):
        self.model = SentenceTransformer("sentence-transformers/paraphrase-multilingual-mpnet-base-v2")
        self.table_profiles = [generate_table_profile(t) for t in tables]
        self.table_embeddings = self.model.encode(self.table_profiles)
        self.tables = tables
    
    def find_tables(self, question: str, top_k: int = 5) :
        question_embedding = self.model.encode(question)
        
        similarities = []
        for emb in self.table_embeddings:
            cos_sim = np.dot(question_embedding, emb) / (
                np.linalg.norm(question_embedding) * np.linalg.norm(emb)
            )
            similarities.append(cos_sim)
        
        sorted_indices = np.argsort(similarities)[::-1]
        return [(self.tables[i], similarities[i]) for i in sorted_indices[:top_k]]


class HybridFinder(TableFinder):
    def __init__(self, tables):
        super().__init__(tables)
        self.tfidf = TfidfVectorizer()
        self.tfidf_matrix = self.tfidf.fit_transform(self.table_profiles)
    
    def find_tables(self, question: str, top_k: int = 5, alpha: float = 0.7, verbose : bool = False):
        semantic_scores = np.array([ex[1] for ex in super().find_tables(question, top_k=len(self.tables))])
        
        question_tfidf = self.tfidf.transform([question])
        keyword_scores = np.dot(question_tfidf, self.tfidf_matrix.T).toarray()[0]
        
        combined_scores = alpha * semantic_scores + (1 - alpha) * keyword_scores
        sorted_indices = np.argsort(combined_scores)[::-1]
        if verbose:
            return [(self.tables[i], combined_scores[i]) for i in sorted_indices[:top_k]]
        else:
            return [self.tables[i] for i in sorted_indices[:top_k]]

### <div class='alert alert-info'>general.py</div>


In [None]:
import numpy as np
from sentence_transformers import util
import pandas as pd
import zipfile
from sqlglot import exp
import sqlglot.optimizer
import re
from pandas.testing import assert_frame_equal, assert_series_equal
from tqdm import tqdm


class ExcelIO(object):
    @staticmethod
    def write_spans(spans : list[Span], path : str):
        asdict = [span.__dict__ for span in spans]
        df = pd.DataFrame(asdict)
        df.to_excel(excel_writer=path, index=False)

    @staticmethod
    def read_excel(path : str):
        df = pd.read_excel(path)
        return df


def find_similar_sentences(sentence_model, target_sentence : str, sentences : list[str], count : int = 3):
    """
    –§—É–Ω–∫—Ü–∏—è –ø–æ–∏—Å–∫–∞ –ø–æ—Ö–æ–∂–∏—Ö –ø–æ —Å–º—ã—Å–ª—É –ø—Ä–µ–¥–ª–æ–∂–µ–Ω–∏–π –∏–∑ –Ω–∞–±–æ—Ä–∞ `sentences` –¥–ª—è —É–∫–∞–∑–∞–Ω–Ω–æ–≥–æ –ø—Ä–µ–¥–ª–æ–∂–µ–Ω–∏—è `target_sentence`

    Parameters
    ----------
    sentence_model : Any
        –ú–æ–¥–µ–ª—å, –ø–æ–∑–≤–æ–ª—è—é—â–∞—è –≤–µ–∫—Ç–æ—Ä–∏–∑–æ–≤–∞—Ç—å —Ç–µ–∫—Å—Ç
    target_sentence: str
        –ü—Ä–µ–¥–ª–æ–∂–µ–Ω–∏–µ, –¥–ª—è –∫–æ—Ç–æ—Ä–æ–≥–æ –Ω—É–∂–Ω–æ –Ω–∞–π—Ç–∏ –ø–æ—Ö–æ–∂–∏–µ –ø–æ —Å–º—ã—Å–ª—É –ø—Ä–µ–¥–ª–æ–∂–µ–Ω–∏—è
    sentences : List[str]
        –ù–∞–±–æ—Ä –ø—Ä–µ–¥–ª–æ–∂–µ–Ω–∏–π
    count : int
        –ö–æ–ª–∏—á–µ—Å—Ç–≤–æ –æ–∂–∏–¥–∞–µ–º—ã—Ö –ø—Ä–µ–¥–ª–æ–∂–µ–Ω–∏–π
    """

    emb_target = sentence_model.encode(target_sentence)

    sims = []
    for i, sentence in enumerate(sentences):
        emb_sentence = sentence_model.encode(sentence)
        sim = util.pytorch_cos_sim(emb_sentence, emb_target)
        sims.append([i, np.float16(sim.squeeze())])

    nearest = sorted(sims, key=lambda pair : pair[1], reverse=True)
    similar_questions = [sentences[pair[0]] for pair in nearest if pair[1] != 1.0][:count]
    return similar_questions


def find_sql(text : str, start_keyword='SELECT'):
    """
    –§—É–Ω–∫—Ü–∏—è, –∫–æ—Ç–æ—Ä–∞—è –∏—â–µ—Ç –≤ —Å—Ç—Ä–æ–∫–µ `text` –ø–µ—Ä–≤–æ–µ –≤—Ö–æ–∂–¥–µ–Ω–∏–µ —Å–∞–º–æ–≥–æ –¥–ª–∏–Ω–Ω–æ–≥–æ, –ø—Ä–∞–≤–∏–ª—å–Ω–æ–≥–æ SQL –∑–∞–ø—Ä–æ—Å–∞
    """

    matches = re.search(f'({start_keyword}).*', text, flags=re.IGNORECASE|re.DOTALL)
    if not matches:
        return ''

    begin_sql = matches.group()
    splitted = begin_sql.split()

    maybe_sql = ''
    last_success_pos = 0
    for i, word in enumerate(splitted):
        maybe_sql += f' {word}'
        try:
            sqlglot.transpile(maybe_sql)
            last_success_pos = i
        except:
            pass

    found_sql = ' '.join(splitted[:last_success_pos + 1])
    return found_sql



def table_similarity(dataframe1 : pd.DataFrame, dataframe2 : pd.DataFrame, mode : str) -> int:
    """
    –§—É–Ω–∫—Ü–∏—è —Å—Ä–∞–≤–Ω–µ–Ω–∏—è –¥–≤—É—Ö —Ç–∞–±–ª–∏—Ü

    Parameters
    ----------
    dataframe1 : pd.DataFrame
        –ü–µ—Ä–≤–∞—è —Ç–∞–±–ª–∏—Ü–∞
    dataframe2 : pd.DataFrame
        –í—Ç–æ—Ä–∞—è —Ç–∞–±–ª–∏—Ü–∞
    mode : str
        –†–µ–∂–∏–º —Å—Ä–∞–≤–Ω–µ–Ω–∏—è. –î–æ–ø—É—Å—Ç–∏–º—ã —Ä–µ–∂–∏–º—ã soft, strict, flexible
    """

    # if dataframe1.columns.shape != dataframe2.columns.shape:
    #     return False
    # if not (dataframe1.columns == dataframe2.columns).all():
    #     return False
    
    match mode:
        case 'soft':
            return int(subset_df(dataframe1, dataframe2) and subset_df(dataframe2, dataframe1))
        case 'strict':
            return int(dataframe1.equals(dataframe2))
        case 'flexible':
            hash_1 = set(pd.util.hash_pandas_object(dataframe1, index=False))
            hash_2 = set(pd.util.hash_pandas_object(dataframe2, index=False))
            intersection = hash_1 & hash_2
            union = hash_1 | hash_2

            return len(intersection) / len(union) if len(union) != 0 else 1
        case _:
            raise Exception('Incorrect mode value')
     


def unzip_file(path, path_to):
    with zipfile.ZipFile(path, 'r') as zip_ref:
        zip_ref.extractall(path_to)



def schema_parse(sql : str, structure_dict : dict):
    """
    –§—É–Ω–∫—Ü–∏—è, –≤—ã—Ç—è–≥–∏–≤–∞—é—â–∞—è –≤—Å–µ –Ω–∞–∑–≤–∞–Ω–∏—è —Ç–∞–±–ª–∏—Ü –∏ —Å—Ç–æ–ª–±—Ü–æ–≤, –∫–æ—Ç–æ—Ä—ã–µ —É–ø–æ–º—è–Ω—É—Ç—ã –≤ –∑–∞–ø—Ä–æ—Å–µ `sql`

    Parameters
    ----------
    sql : str
        SQL –∑–∞–ø—Ä–æ—Å
    table_structure : List[dict]
        –°—Ç—Ä—É–∫—Ç—É—Ä–∞ —Ç–∞–±–ª–∏—Ü—ã, –∫–æ—Ç–æ—Ä–∞—è –º–æ–∂–µ—Ç –±—ã—Ç—å –ø–æ–ª—É—á–µ–Ω–∞ –ø—Ä–∏ –ø–æ–º–æ—â–∏ —Ñ—É–Ω–∫—Ü–∏–∏ `structure_from_connection`
    """

    optimized_sql = sqlglot.optimizer.optimize(
        sqlglot.parse_one(sql),
        schema=structure_dict
    )

    buckets = {table.name : set(structure_dict[table.name].keys()) for table in optimized_sql.find_all(exp.Table)}
    # for column in optimized_sql.find_all(exp.Column):
    #     table_of_col = column.table
    #     buckets[table_of_col].add(column.name)

    as_default = []
    for k, v in buckets.items():
        as_default.append({'table_name' : k, 'columns' : list(v)})

    return as_default


def normalize_table(
    df: pd.DataFrame
) -> pd.DataFrame:
    """
    Normalizes a dataframe by:
    1. sorting columns in alphabetical order
    2. sorting rows using values from first column to last
    3. resetting index
    """
    sorted_df = df.reindex(sorted(df.columns), axis=1)
    sorted_df = sorted_df.sort_values(by=list(sorted_df.columns))
    sorted_df = sorted_df.reset_index(drop=True)

    return sorted_df


def subset_df(
    df_sub: pd.DataFrame,
    df_super: pd.DataFrame,
    verbose: bool = False,
) -> bool:
    
    if df_sub.empty:
        return True  
    
    df_super_temp = df_super.copy(deep=True)
    matched_columns = []
    for col_sub_name in df_sub.columns:
        col_match = False
        for col_super_name in df_super_temp.columns:
            col_sub = df_sub[col_sub_name].sort_values().reset_index(drop=True)
            col_super = (
                df_super_temp[col_super_name].sort_values().reset_index(drop=True)
            )
            try:
                assert_series_equal(
                    col_sub, col_super, check_dtype=False, check_names=False
                )
                col_match = True
                matched_columns.append(col_super_name)
                df_super_temp = df_super_temp.drop(columns=[col_super_name])
                break
            except AssertionError:
                continue
        if col_match == False:
            if verbose:
                print(f"no match for {col_sub_name}")
            return False
    df_sub_normalized = normalize_table(df_sub)

    df_super_matched = df_super[matched_columns].rename(
        columns=dict(zip(matched_columns, df_sub.columns))
    )
    df_super_matched = normalize_table(df_super_matched)

    try:
        assert_frame_equal(df_sub_normalized, df_super_matched, check_dtype=False)
        return True
    except AssertionError:
        return False
    


def dto_tables_from_dataframe(df: pd.DataFrame) -> list[DtoTable]: 
    tables = []

    for table in tqdm(df['table'].unique()):
        t : pd.DataFrame = df[df['table'] == table]
        columns : list[DtoColumn] = []
        for idx in t.index:
            name = str(t[t.index == idx]['field'][idx]).strip()
            desc = str(t[t.index == idx]['field_description'][idx]).strip()

            column = DtoColumn(name, desc)
            columns.append(column)

        dto_table = DtoTable(table, str(t['table_description'][idx]).strip(), columns)
        tables.append(dto_table)

    return tables

### <div class='alert alert-info'>dataset.py</div>


In [None]:
import pandas as pd
from sqlalchemy import text, Connection, inspect


class IterableDataFrame:
    """
    –ö–ª–∞—Å—Å, –ø–æ–∑–≤–æ–ª—è—é—â–∏–π –∏—Ç–µ—Ä–∏—Ä–æ–≤–∞—Ç—å—Å—è –≤ —Ç–∞–±–ª–∏—Ü–µ —Ç–∏–ø–∞ `pd.DataFrame`
    """

    def __init__(self, df : pd.DataFrame):
        self.df = df
        self.__series = {}
        for idx in self.df.index:
            sample = {
                column : self.df[self.df.index == idx][column][idx] for column in self.df.keys()
            }
            self.__series[idx] = sample

    def __len__(self):
        return self.df.shape[0]

    def __as_list(self):
        return list(self.__series.values())
    
    def __iter__(self):
        return iter(self.__as_list())

    def __getitem__(self, index):
        return self.__as_list()[index]
    
    def at_index(self, index):
        return self.__series[index]


def tables_from_connection(conn : Connection):
    """
    –§—É–Ω–∫—Ü–∏—è, –≤–æ–∑–≤—Ä–∞—â–∞—é—â–∞—è —Å–ø–∏—Å–æ–∫ –Ω–∞–∑–≤–∞–Ω–∏–π –≤—Å–µ—Ö —Ç–∞–±–ª–∏—Ü –¥–ª—è –¥–∞–Ω–Ω–æ–≥–æ —Å–æ–µ–¥–∏–Ω–µ–Ω–∏—è `conn`

    Parameters
    ----------
    conn : sqlalchemy.Connection
        –°–æ–µ–¥–∏–Ω–µ–Ω–∏–µ —Å –±–∞–∑–æ–π –¥–∞–Ω–Ω—ã—Ö
    """

    master = pd.DataFrame(conn.execute(text('SELECT * FROM sqlite_master')).fetchall())
    tables = list(master[master['type'] == 'table']['name'])
    return tables


def structure_from_connection(conn : Connection):
    """
    –§—É–Ω–∫—Ü–∏—è, –≤–æ–∑–≤—Ä–∞—â–∞—é—â–∞—è —Å–ø–∏—Å–æ–∫ —Å–ª–æ–≤–∞—Ä–µ–π –≤–∏–¥–∞ {table_name, columns}, –≥–¥–µ table_name - str, –∞ columns - List[str]

    Parameters
    ----------
    conn : sqlalchemy.Connection
        –°–æ–µ–¥–∏–Ω–µ–Ω–∏–µ —Å –±–∞–∑–æ–π –¥–∞–Ω–Ω—ã—Ö
    """

    tables = tables_from_connection(conn)
    structure = []
    for table in tables:
        columns = pd.DataFrame(conn.execute(text(f'SELECT * FROM "{table}"')).fetchall()).columns.to_list()
        structure.append(
            {
                'table_name' : table,
                'columns' : columns
            })
        
    return structure


def structure_from_connection_dict(conn : Connection):
    """
    –§—É–Ω–∫—Ü–∏—è, –≤–æ–∑–≤—Ä–∞—â–∞—é—â–∞—è —Å–ª–æ–≤–∞—Ä—å —Å–ª–æ–≤–∞—Ä–µ–π –≤–∏–¥–∞ {"Table" : {"Col" : "INT", ...}}

    Parameters
    ----------
    conn : sqlalchemy.Connection
        –°–æ–µ–¥–∏–Ω–µ–Ω–∏–µ —Å –±–∞–∑–æ–π –¥–∞–Ω–Ω—ã—Ö
    """

    tables = tables_from_connection(conn)
    structure = {}
    for table in tables:
        columns = inspect(conn).get_columns(table)
        columns_meta = {column['name'] : column['type'] for column in columns}
        structure[table] = columns_meta

    return structure


def prepare_column_names(conn : Connection):
    """
    –§—É–Ω–∫—Ü–∏—è, –æ–±—Ä–∞–±–∞—Ç—ã–≤–∞—é—â–∞—è –±–∞–∑—É –¥–∞–Ω–Ω—ã—Ö –∏–∑ —Å–æ–µ–¥–∏–Ω–µ–Ω–∏—è `conn`. –§—É–Ω–∫—Ü–∏—è –ø–µ—Ä–µ–∏–º–µ–Ω–æ–≤—ã–≤–∞–µ—Ç –Ω–∞–∑–≤–∞–Ω–∏—è –≤—Å–µ—Ö —Ç–∞–±–ª–∏—Ü –∏ –∏—Ö —Å—Ç–æ–ª–±—Ü–æ–≤, 
    –∫–æ—Ç–æ—Ä—ã–µ —Å–æ–¥–µ—Ä–∂–∞—Ç whitespace –∏ punctuation —Å–∏–º–≤–æ–ª—ã. –í–æ–∑–≤—Ä–∞—â–∞–µ—Ç True, –µ—Å–ª–∏ –ø–µ—Ä–µ–∏–º–µ–Ω–æ–≤—ã–≤–∞–Ω–∏–µ –ø—Ä–æ—à–ª–æ —É—Å–ø–µ—à–Ω–æ

    Parameters
    ----------
    conn : sqlalchemy.Connection
        –°–æ–µ–¥–∏–Ω–µ–Ω–∏–µ —Å –±–∞–∑–æ–π –¥–∞–Ω–Ω—ã—Ö
    """
    
    structure = structure_from_connection(conn)
    for table in structure:
        for column in table['columns']:
            new_name = str.lower(''.join([char for char in column if str.isalnum(char)]))
            if new_name != column:
                conn.execute(text(
                    f'''ALTER TABLE "{table['table_name']}" RENAME COLUMN "{column}" TO "{new_name}"'''
                ))

        new_table_name = str.lower(''.join([char for char in table['table_name'] if str.isalnum(char)]))
        if new_table_name != table['table_name']:
            conn.execute(text(f'''ALTER TABLE "{table['table_name']}" RENAME TO "{new_table_name}"'''))

    return True

### <div class='alert alert-info'>prompting.py</div>


In [None]:
import pandas as pd
import numpy as np
import sqlalchemy

class PromptBuilder:
    """
    –ö–ª–∞—Å—Å, –æ—Ç–≤–µ—á–∞—é—â–∏–π –∑–∞ —Å–æ–∑–¥–∞–Ω–∏–µ –ø—Ä–æ–º–ø—Ç–∞ –Ω–∞ –æ—Å–Ω–æ–≤–µ —É–∫–∞–∑–∞–Ω–Ω—ã—Ö —Ñ–∏—á–µ–π
    """

    def __init__(self):
        self.__prompt = ''
        self.schema_linking = False


    def add_schema_linking(self, table_structure=None):
        """
        –ú–µ—Ç–æ–¥, –¥–æ–±–∞–≤–ª—è—é—â–∏–π —Ä–µ–∂–∏–º –∏—Å–ø–æ–ª—å–∑–æ–≤–∞–Ω–∏—è —Ñ–∏—á–∏ Schema Linking. 
        
        Parameters
        ----------
        table_structure : Any
            –°—Ç—Ä—É–∫—Ç—É—Ä–∞ —Ç–∞–±–ª–∏—Ü—ã, –∫–æ—Ç–æ—Ä–∞—è –º–æ–∂–µ—Ç –±—ã—Ç—å –ø–æ–ª—É—á–µ–Ω–∞ —Å –ø–æ–º–æ—â—å—é —Ñ—É–Ω–∫—Ü–∏–∏ `structure_from_connection`
        """

        self.table_structure = table_structure
        self.schema_linking = True
        return self


    def add_few_shot(self, 
                     queries : IterableDataFrame, 
                     target_question : str, 
                     sentence_model, 
                     count : int = 1):
        """
        –ú–µ—Ç–æ–¥, –æ—Ç–≤–µ—á–∞—é—â–∏–π –∑–∞ –¥–æ–±–∞–≤–ª–µ–Ω–∏–µ —Ñ–∏—á–∏ Few-Shot –≤ –ø—Ä–æ–º–ø—Ç

        Parameters
        ----------

        sentence_model : Any
            –ú–æ–¥–µ–ª—å, –ø–æ–∑–≤–æ–ª—è—é—â–∞—è –≤–µ–∫—Ç–æ—Ä–∏–∑–æ–≤–∞—Ç—å —Ç–µ–∫—Å—Ç
        target_question : str
            –í–æ–ø—Ä–æ—Å, –¥–ª—è –∫–æ—Ç–æ—Ä–æ–≥–æ –Ω—É–∂–Ω–æ –Ω–∞–π—Ç–∏ –ø–æ—Ö–æ–∂–∏–µ –ø–æ —Å–º—ã—Å–ª—É –≤–æ–ø—Ä–æ—Å—ã
        queries : IterableDataFrame
            –ù–∞–±–æ—Ä –≤–æ–ø—Ä–æ—Å–æ–≤ –∏ –∑–∞–ø—Ä–æ—Å–æ–≤, —Å—Ä–µ–¥–∏ –∫–æ—Ç–æ—Ä—ã—Ö –Ω—É–∂–Ω–æ –Ω–∞–π—Ç–∏ –±–ª–∏–∂–∞–π—à–∏–µ –ø–æ —Å–º—ã—Å–ª—É –≤–æ–ø—Ä–æ—Å—ã. –û–±—ä–µ–∫—Ç –¥–æ–ª–∂–µ–Ω —è–≤–ª—è—Ç—å—Å—è –º–∞—Ç—Ä–∏—Ü–µ–π Nx2
        count : int
        """

        questions = [sample['question'] for sample in queries]

        input_examples = []
        similar = find_similar_sentences(sentence_model, target_question, questions, count)
        for sample in queries:
            curr_qs = sample['question']
            if curr_qs in similar:
                input_examples.append([curr_qs, sample['query']])

        few_shot_template = ''
        for ex in input_examples:
            few_shot_template += f'Q: {ex[0]}\n'
            few_shot_template += f'A: {ex[1]}\n'

        self.__prompt += few_shot_template + '\n'
        return self
    

    def add_schema_template_from_connection(self, db_conn : sqlalchemy.Connection):
        """
        –ú–µ—Ç–æ–¥, –æ—Ç–≤–µ—á–∞—é—â–∏–π –∑–∞ –¥–æ–±–∞–≤–ª–µ–Ω–∏–µ —Ñ–∏—á–∏ Schema Template –≤ –ø—Ä–æ–º–ø—Ç —á–µ—Ä–µ–∑ —Å–æ–µ–¥–∏–µ–Ω–µ–Ω–∏–µ —Å –ë–î

        Parameters
        ----------
        db_conn : sqlalchemy.Connection
            –°–æ–µ–¥–∏–Ω–µ–Ω–∏–µ —Å –±–∞–∑–æ–π –¥–∞–Ω–Ω—ã—Ö
        """

        if self.schema_linking:
            structure = self.table_structure
        else:
            structure = structure_from_connection(db_conn)

        schema_template = ''
        for table in structure:
            schema_template += f"{table['table_name']}({', '.join(table['columns'])});\n"

        self.__prompt += schema_template + '\n'
        return self


    def add_schema_template_from_dto_tables(self, dto_tables : list[DtoTable]):
        """
        –ú–µ—Ç–æ–¥, –æ—Ç–≤–µ—á–∞—é—â–∏–π –∑–∞ –¥–æ–±–∞–≤–ª–µ–Ω–∏–µ —Ñ–∏—á–∏ Schema Template –≤ –ø—Ä–æ–º–ø—Ç —á–µ—Ä–µ–∑ —Å–ø–∏—Å–æ–∫ –æ–±—ä–µ–∫—Ç–æ–≤ —Ç–∏–ø–∞ DtoTable

        Parameters
        ----------
        db_conn : sqlalchemy.Connection
            –°–æ–µ–¥–∏–Ω–µ–Ω–∏–µ —Å –±–∞–∑–æ–π –¥–∞–Ω–Ω—ã—Ö
        """
        structure = []
        for dto_table in dto_tables:
            columns = [column.Name for column in dto_table.Columns]
            structure.append({
                'table_name' : dto_table.Name,
                'columns' : columns
            })

        schema_template = ''
        for table in structure:
            schema_template += f"{table['table_name']}({', '.join(table['columns'])});\n"

        self.__prompt += schema_template + '\n'
        return self


    def add_cell_value_referencing(self, db_conn : sqlalchemy.Connection, count=1):
        """
        –ú–µ—Ç–æ–¥, –æ—Ç–≤–µ—á–∞—é—â–∏–π –∑–∞ –¥–æ–±–∞–≤–ª–µ–Ω–∏–µ —Ñ–∏—á–∏ Cell Value Referencing –≤ –ø—Ä–æ–º–ø—Ç

        Parameters
        ----------
        db_conn : sqlalchemy.Connection
            –°–æ–µ–¥–∏–Ω–µ–Ω–∏–µ —Å –±–∞–∑–æ–π –¥–∞–Ω–Ω—ã—Ö
        count : int
            –û–∂–∏–¥–∞–µ–º–æ–µ –∫–æ–ª–∏—á–µ—Å—Ç–≤–æ –ø—Ä–∏–º–µ—Ä–æ–≤ –¥–ª—è –¥–æ–±–∞–≤–ª–µ–Ω–∏—è. –ü–æ —É–º–æ–ª—á–∞–Ω–∏—é —Ä–∞–≤–Ω–æ 1
        """

        if self.schema_linking:
            tables = [table['table_name'] for table in self.table_structure]
        else:
            tables = tables_from_connection(db_conn)

        data_information = []
        for table in tables:
            if self.schema_linking:
                instance = [bucket for bucket in self.table_structure if bucket['table_name'] == table][0]
                pd_table = pd.read_sql(f'SELECT * FROM {table}', db_conn)[instance['columns']]
            else:
                pd_table = pd.read_sql(f'SELECT * FROM {table}', db_conn)
            
            indexes = np.random.randint(0, pd_table.shape[0], size=count)
            series = [pd_table[pd_table.index == idx].to_numpy() for idx in indexes]

            data_information.append({
                'table_name' : table,
                'examples' : [f"[{', '.join(map(str,list(ser.reshape(ser.shape[1]))))}]" for ser in series]
            })

        value_template = ''
        for data in data_information:
            value_template += f"{data['table_name']}({', '.join(data['examples'])});\n"

        self.__prompt += value_template + '\n'
        return self


    def add_message(self, message : str):
        self.__prompt += message + '\n'
        return self


    def build_prompt(self):
        return self.__prompt

### <div class='alert alert-info'>models-evaluation.ipynb</div>


In [7]:
engine = create_engine('sqlite:////kaggle/input/main-database/main_database.sqlite', echo=False)
conn = engine.connect()

In [8]:
prepare_column_names(conn) # –£—Å—Ç—Ä–∞–Ω—è–µ—Ç –ø—Ä–æ–±–µ–ª—ã –≤ –Ω–∞–∑–≤–∞–Ω–∏–∏ —Å—Ç–æ–ª–±—Ü–æ–≤
queries = IterableDataFrame(pd.read_excel('/kaggle/input/main-database/NLSQL.xlsx'))

In [None]:
meta_df = pd.read_excel('table-main.xlsx')
meta_tables = dto_tables_from_dataframe(meta_df)

In [None]:
finder = HybridFinder(meta_tables)

# –ü—Ä–µ–ø—Ä–æ—Ü–µ—Å—Å–∏–Ω–≥ –ø—Ä–æ–º–ø—Ç–∞

In [9]:
sentence_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
class HuggingFaceModelInference:
    def __init__(self, path):
        self.path = path
        self.evaluated = False
        self.is_downloaded = False


    def __load_model(self):
        self.tokenizer = AutoTokenizer.from_pretrained(self.path, trust_remote_code=True)
        self.model = AutoModelForCausalLM.from_pretrained(
                    self.path,
                    torch_dtype=torch.float16,
                    device_map="auto",
                    max_memory={0: "10GiB", 1: "10GiB"},  
                    offload_folder="./offload", 
                    trust_remote_code=True
                    )

    def __inference(self, prompt):
        if self.tokenizer.pad_token is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token

        with torch.inference_mode():  
            inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device) 
            generate_ids = self.model.generate(
                            **inputs,
                            max_length=2048,
                            num_return_sequences=1,
                            temperature=0.2, 
                            top_p=0.95,
                            do_sample=True,
                            use_cache=True 
                            )
    
            output = self.tokenizer.decode(
                    generate_ids[0, inputs.input_ids.shape[1]:],
                    skip_special_tokens=True
                    )
            
        return output
    

    def evaluate(self, queries : IterableDataFrame, connection : Connection):
        if not self.is_downloaded:
            self.__load_model()
            self.is_downloaded = True

        self.model.eval()

        logger : list[ExtendedSqlSpan] = []
        summary = 0
        for query in tqdm(queries):
            question = query['question']
            gold_sql = query['query']

            found_tables = finder.find_tables(question, alpha=0.4, top_k=1)

            prompt = PromptBuilder()\
                .add_message('### You are an expert SQL developer with deep knowledge of database optimization, correct syntax, and efficient query design. Your task is to generate accurate, performant SQL queries based on the provided input.')\
                .add_message("### Table schema:")\
                .add_schema_template_from_dto_tables(found_tables)\
                .add_message("### Examples of data")\
                .add_cell_value_referencing(conn, count=1)\
                .add_message(f"### Your task: {question}")\
                .build_prompt()
            

            output = self.__inference(prompt)
            pred_sql = find_sql(output, start_keyword='SELECT')
            transpiled_sql = sqlglot.transpile(pred_sql, write=sqlglot.Dialects.SQLITE)
            
            sql_span = self.__make_excel_span(question,
                                                transpiled_sql, 
                                                gold_sql, 
                                                connection)
            
            summary += sql_span.df_flexible
            logger.append(sql_span)
        
        self.summary = summary
        self.queries_count = len(queries)
        self.logger = logger
        self.evaluated = True


    def accuracy(self):
        """
        –ó–Ω–∞—á–µ–Ω–∏–µ –º–µ—Ç—Ä–∏–∫–∏ Accuracy –¥–ª—è –ø–æ—Å–ª–µ–¥–Ω–µ–≥–æ –∑–∞–ø—É—Å–∫–∞ –º–æ–¥–µ–ª–∏
        """

        if not self.evaluated:
            raise Exception('Model was not been evaluated')
        
        return self.summary / self.queries_count
    

    def __make_excel_span(self,
                    question : str,
                    pred_sql : str,
                    gold_sql : str, 
                    connection : Connection) -> ExtendedSqlSpan:
        
        df_gold = pd.read_sql(gold_sql, connection)

        try:
            df_pred = pd.read_sql(pred_sql, connection)
            
            span_df_soft        = table_similarity(df_pred, df_gold, mode='soft')
            span_df_flexible    = table_similarity(df_pred, df_gold, mode='flexible')
            span_gold_IN_pred   = subset_df(df_gold, df_pred)
            span_pred_IN_gold   = subset_df(df_pred, df_gold)
            span_pred_columns   = df_pred.columns.to_list()
            span_ted            = self.__ted_compare(pred_sql, gold_sql)
            span_error          = None
        except Exception as exception:
            span_df_soft        = .0
            span_df_flexible    = .0
            span_gold_IN_pred   = False
            span_pred_IN_gold   = False
            span_pred_columns   = []
            span_ted            = self.__ted_compare(pred_sql, gold_sql)
            span_error          = exception

        sql_span = ExtendedSqlSpan(
                NL                 =question,
                sql_gold           =gold_sql,
                sql_pred           =pred_sql,
                df_soft            =span_df_soft,
                df_flexible        =span_df_flexible,
                df_pred_IN_df_gold =span_pred_IN_gold,
                df_gold_IN_df_pred =span_gold_IN_pred,
                df_gold_columns    =df_gold.columns.to_list(),
                df_pred_columns    =span_pred_columns,
                TED                =span_ted,
                Error              =span_error
            )
        
        return sql_span

    def __ted_compare(self, sql1 : str, sql2 : str):
        """
        –ö–æ–º–ø–æ—Ä–∞—Ç–æ—Ä –¥–ª—è –¥–≤—É—Ö –¥–µ—Ä–µ–≤—å–µ–≤
        """
        
        try:
            exp1 = parse_one(sql1)
            exp2 = parse_one(sql2)
        except:
            return .0

        distiller = ChangeDistiller()
        _ = distiller.diff(exp1, exp2)
        return distiller._dice_coefficient(exp1, exp2)


    def TED(self):
        """
        –ó–Ω–∞—á–µ–Ω–∏–µ –º–µ—Ç—Ä–∏–∫–∏ Tree Edit Distance –¥–ª—è –ø–æ—Å–ª–µ–¥–Ω–µ–≥–æ –∑–∞–ø—É—Å–∫–∞ –º–æ–¥–µ–ª–∏
        """

        if not self.evaluated:
            raise Exception('Model was not been evaluated')
        
        summary = 0
        for span in self.logger:
            summary += self.__ted_compare(span.sql_pred, span.sql_gold)

        return summary / self.queries_count

## 1. SQLCoder 7b

In [11]:
#sqlcoder = HuggingFaceModelInference('defog/sqlcoder-7b-2')
#sqlcoder.evaluate(shuffle(queries), conn)

In [12]:
#conn.rollback()

In [13]:
#ExcelIO.write_spans(sqlcoder.logger, 'out.xlsx')
#sqlcoder.accuracy(), sqlcoder.TED()

In [14]:
#sqlcoder.accuracy()

In [15]:
#sqlcoder.logger

## DeepSeek 6.7b

In [None]:
deepseek = HuggingFaceModelInference('deepseek-ai/deepseek-coder-6.7b-instruct')
deepseek.evaluate(shuffle(queries), conn) 

In [28]:
ExcelIO.write_spans(deepseek.logger, 'out.xlsx')
deepseek.accuracy(), deepseek.TED()

(0.7906976744186046, 0.8952091523033922)

## 3. Chat2DB 7b

In [18]:
# chat2db = HuggingFaceModelInference('Chat2DB/Chat2DB-SQL-7B')
# chat2db.evaluate(shuffle(queries), conn)

In [19]:
#ExcelIO.write_spans(chat2db.logger, 'out.xlsx')
#chat2db.accuracy(), chat2db.TED()

## 5. DuckDB-NSQL 7b

In [20]:
#duckdb = HuggingFaceModelInference('motherduckdb/DuckDB-NSQL-7B-v0.1')
#duckdb.evaluate(shuffle(queries), conn)

In [21]:
#ExcelIO.write_spans(duckdb.logger, 'out.xlsx')
#duckdb.accuracy(), duckdb.TED()

In [22]:
# dump_inference('DuckDB-NSQL-7B-v0.1', duckdb.exec_time, duckdb.sql_similarity(), duckdb.accuracy())

## –ü—Ä–æ—á–µ–µ

In [23]:
from numba import cuda
import gc
#cuda.devices.gpus[0].reset()
#cuda.devices.gpus[1].reset()
#gc.collect()

In [1]:
%%capture
!pip install unsloth

In [2]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048
dtype = None 
load_in_4bit = True 

ü¶• Unsloth: Will patch your computer to enable 2x faster free finetuning.
ü¶• Unsloth Zoo will now patch everything to make training faster!


In [15]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/gemma-7b-bnb-4bit", # Choose ANY! eg teknium/OpenHermes-2.5-Mistral-7B
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    device_map='auto'
)

==((====))==  Unsloth 2025.3.19: Fast Gemma patching. Transformers: 4.50.3.
   \\   /|    Tesla P100-PCIE-16GB. Num GPUs = 1. Max memory: 15.888 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 6.0. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [16]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

Unsloth 2025.3.19 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.


In [17]:
prompt = PromptBuilder()\
        .add_message('### –¢–µ–±–µ –±—É–¥–µ—Ç –¥–∞–Ω –Ω–µ–∫–æ—Ç–æ—Ä—ã–π –≤–æ–ø—Ä–æ—Å, –Ω–∞ –æ—Å–Ω–æ–≤–∞–Ω–∏–∏ –∫–æ—Ç–æ—Ä–æ–≥–æ —É–∂–µ –¥—Ä—É–≥–∞—è –º–æ–¥–µ–ª—å –ø–æ—Ç–æ–º —Å–≥–µ–Ω–µ—Ä–∏—Ä—É–µ—Ç SQL –∑–∞–ø—Ä–æ—Å. –¢–≤–æ—è –∂–µ –∑–∞–¥–∞—á–∞ –∏—Å–ø—Ä–∞–≤–∏—Ç—å –≤—Å–µ–≤–æ–∑–º–æ–∂–Ω—ã–µ –Ω–µ–æ–ø—Ä–µ–¥–µ–ª–µ–Ω–Ω–æ—Å—Ç–∏ –≤ —ç—Ç–æ–º –≤–æ–ø—Ä–æ—Å–µ, –∫–æ—Ç–æ—Ä—ã–µ –º–æ–≥—É—Ç –ø–æ–≤–ª–∏—è—Ç—å –Ω–∞ –∫–æ–¥–æ–≥–µ–Ω–µ—Ä–∞—Ü–∏—é. –ù–∞–ø—Ä–∏–º–µ—Ä:')\
        .add_message('### –ó–∞–¥–∞—á–∞ –Ω–∞–ø–∏—Å–∞—Ç—å –∏—Å–ø—Ä–∞–≤–ª–µ–Ω–Ω—ã–π –≤–æ–ø—Ä–æ—Å –∏ —Ç–æ–ª—å–∫–æ. –ï—Å–ª–∏ –≤ –≤–æ–ø—Ä–æ—Å–µ –≤—Å—ë –æ–ø—Ä–µ–¥–µ–ª–µ–Ω–Ω–æ, —Ç–æ –∏—Å–ø—Ä–∞–≤–ª—è—Ç—å –µ–≥–æ –Ω–µ –Ω—É–∂–Ω–æ. –ü–æ–º–∏–º–æ —ç—Ç–æ–≥–æ —Ç–µ–±–µ –±—É–¥–µ—Ç —Ç–∞–∫ –∂–µ –¥–∞–Ω–∞ —Å—Ö–µ–º–∞ –±–∞–∑—ã –¥–∞–Ω–Ω—ã—Ö')\
        .add_message('### –°—Ö–µ–º–∞ –±–∞–∑ –¥–∞–Ω–Ω—ã—Ö')\
        .add_schema_template(conn)\
        .add_message('### –û—Ç–≤–µ—Ç –≤—ã–≤–µ–¥–∏ –≤ —Ñ–æ—Ä–º–∞—Ç–µ [START] –æ—Ç–≤–µ—Ç [END]')\
        .add_message(f'### –í–æ–ø—Ä–æ—Å: {queries[10]["question"]}')\
        .add_message('### –ò—Å–ø—Ä–∞–≤–ª–µ–Ω–Ω—ã–π –≤–æ–ø—Ä–æ—Å:')\
        .build_prompt()

In [20]:
print(prompt)

### –¢–µ–±–µ –±—É–¥–µ—Ç –¥–∞–Ω –Ω–µ–∫–æ—Ç–æ—Ä—ã–π –≤–æ–ø—Ä–æ—Å, –Ω–∞ –æ—Å–Ω–æ–≤–∞–Ω–∏–∏ –∫–æ—Ç–æ—Ä–æ–≥–æ —É–∂–µ –¥—Ä—É–≥–∞—è –º–æ–¥–µ–ª—å –ø–æ—Ç–æ–º —Å–≥–µ–Ω–µ—Ä–∏—Ä—É–µ—Ç SQL –∑–∞–ø—Ä–æ—Å. –¢–≤–æ—è –∂–µ –∑–∞–¥–∞—á–∞ –∏—Å–ø—Ä–∞–≤–∏—Ç—å –≤—Å–µ–≤–æ–∑–º–æ–∂–Ω—ã–µ –Ω–µ–æ–ø—Ä–µ–¥–µ–ª–µ–Ω–Ω–æ—Å—Ç–∏ –≤ —ç—Ç–æ–º –≤–æ–ø—Ä–æ—Å–µ, –∫–æ—Ç–æ—Ä—ã–µ –º–æ–≥—É—Ç –ø–æ–≤–ª–∏—è—Ç—å –Ω–∞ –∫–æ–¥–æ–≥–µ–Ω–µ—Ä–∞—Ü–∏—é. –ù–∞–ø—Ä–∏–º–µ—Ä:
### –ó–∞–¥–∞—á–∞ –Ω–∞–ø–∏—Å–∞—Ç—å –∏—Å–ø—Ä–∞–≤–ª–µ–Ω–Ω—ã–π –≤–æ–ø—Ä–æ—Å –∏ —Ç–æ–ª—å–∫–æ. –ï—Å–ª–∏ –≤ –≤–æ–ø—Ä–æ—Å–µ –≤—Å—ë –æ–ø—Ä–µ–¥–µ–ª–µ–Ω–Ω–æ, —Ç–æ –∏—Å–ø—Ä–∞–≤–ª—è—Ç—å –µ–≥–æ –Ω–µ –Ω—É–∂–Ω–æ. –ü–æ–º–∏–º–æ —ç—Ç–æ–≥–æ —Ç–µ–±–µ –±—É–¥–µ—Ç —Ç–∞–∫ –∂–µ –¥–∞–Ω–∞ —Å—Ö–µ–º–∞ –±–∞–∑—ã –¥–∞–Ω–Ω—ã—Ö
### –°—Ö–µ–º–∞ –±–∞–∑ –¥–∞–Ω–Ω—ã—Ö
–æ—Å—Ç–∞—Ç–∫–∏2024(–∞—Ä—Ç–∏–∫—É–ª, –Ω–æ–º–µ–Ω–∫–ª–∞—Ç—É—Ä–∞, –µ–¥, 01042024, 02042024, 03042024, 04042024, 05042024, 06042024, 07042024, 08042024, 09042024, 10042024, 11042024, 12042024, 13042024, 14042024, 15042024, 16042024, 17042024, 18042024, 1904202

In [18]:
model.device

device(type='cuda', index=0)

In [19]:
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
inputs = tokenizer(prompt, return_tensors = "pt").to(model.device)

outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)
print(tokenizer.batch_decode(outputs)[0])

<bos>### –¢–µ–±–µ –±—É–¥–µ—Ç –¥–∞–Ω –Ω–µ–∫–æ—Ç–æ—Ä—ã–π –≤–æ–ø—Ä–æ—Å, –Ω–∞ –æ—Å–Ω–æ–≤–∞–Ω–∏–∏ –∫–æ—Ç–æ—Ä–æ–≥–æ —É–∂–µ –¥—Ä—É–≥–∞—è –º–æ–¥–µ–ª—å –ø–æ—Ç–æ–º —Å–≥–µ–Ω–µ—Ä–∏—Ä—É–µ—Ç SQL –∑–∞–ø—Ä–æ—Å. –¢–≤–æ—è –∂–µ –∑–∞–¥–∞—á–∞ –∏—Å–ø—Ä–∞–≤–∏—Ç—å –≤—Å–µ–≤–æ–∑–º–æ–∂–Ω—ã–µ –Ω–µ–æ–ø—Ä–µ–¥–µ–ª–µ–Ω–Ω–æ—Å—Ç–∏ –≤ —ç—Ç–æ–º –≤–æ–ø—Ä–æ—Å–µ, –∫–æ—Ç–æ—Ä—ã–µ –º–æ–≥—É—Ç –ø–æ–≤–ª–∏—è—Ç—å –Ω–∞ –∫–æ–¥–æ–≥–µ–Ω–µ—Ä–∞—Ü–∏—é. –ù–∞–ø—Ä–∏–º–µ—Ä:
### –ó–∞–¥–∞—á–∞ –Ω–∞–ø–∏—Å–∞—Ç—å –∏—Å–ø—Ä–∞–≤–ª–µ–Ω–Ω—ã–π –≤–æ–ø—Ä–æ—Å –∏ —Ç–æ–ª—å–∫–æ. –ï—Å–ª–∏ –≤ –≤–æ–ø—Ä–æ—Å–µ –≤—Å—ë –æ–ø—Ä–µ–¥–µ–ª–µ–Ω–Ω–æ, —Ç–æ –∏—Å–ø—Ä–∞–≤–ª—è—Ç—å –µ–≥–æ –Ω–µ –Ω—É–∂–Ω–æ. –ü–æ–º–∏–º–æ —ç—Ç–æ–≥–æ —Ç–µ–±–µ –±—É–¥–µ—Ç —Ç–∞–∫ –∂–µ –¥–∞–Ω–∞ —Å—Ö–µ–º–∞ –±–∞–∑—ã –¥–∞–Ω–Ω—ã—Ö
### –°—Ö–µ–º–∞ –±–∞–∑ –¥–∞–Ω–Ω—ã—Ö
–æ—Å—Ç–∞—Ç–∫–∏2024(–∞—Ä—Ç–∏–∫—É–ª, –Ω–æ–º–µ–Ω–∫–ª–∞—Ç—É—Ä–∞, –µ–¥, 01042024, 02042024, 03042024, 04042024, 05042024, 06042024, 07042024, 08042024, 09042024, 10042024, 11042024, 12042024, 13042024, 14042024, 15042024, 16042024, 17042024, 18042024, 19