<a href="https://colab.research.google.com/github/emredeveloper/Text-to-Sql/blob/main/Text_to_Sql_Streamlit_%2B_Notebook.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!wget https://huggingface.co/omeryentur/phi-3-sql/resolve/main/phi-3-sql.Q4_K_M.gguf

In [None]:
!pip install --upgrade --quiet  transformers --quiet
!pip install langchain langchain_community --quiet

In [7]:
from langchain_community.llms import LlamaCpp

In [None]:
client=LlamaCpp(model_path="/content/phi-3-sql.Q4_K_M.gguf")


In [9]:
from langchain.prompts.prompt import PromptTemplate

In [10]:
from langchain.sql_database import SQLDatabase

BU örnek database'i direkt githubdan indirip localde çalıştırabilirsiniz.

In [11]:
db_path="sqlite:///example.db"
db=SQLDatabase.from_uri(database_uri=db_path)

In [12]:
db.get_table_names()

  warn_deprecated(


['courses', 'students']

In [13]:
db._sample_rows_in_table_info=0

In [14]:
table_info=db.get_table_info()
table_info

'\nCREATE TABLE courses (\n\tid INTEGER, \n\tname TEXT NOT NULL, \n\tdescription TEXT, \n\tPRIMARY KEY (id)\n)\n\n\nCREATE TABLE students (\n\tid INTEGER, \n\tname TEXT NOT NULL, \n\tage INTEGER NOT NULL, \n\tPRIMARY KEY (id)\n)'

In [15]:
template="""
<|system|>
{table_info}

<|user|>
{question}
<|sql|>
"""

In [16]:
from langchain.schema import HumanMessage

In [20]:
question="Courses containing Introduction"

prompt=PromptTemplate.from_template(template)

In [None]:
res=client(prompt.format(table_info=table_info,question=question))


In [22]:
sql_query=res
sql_query

"SELECT * FROM courses WHERE description LIKE '%Introduction%' OR name LIKE '%Introduction%';\n"

In [23]:
db.run(sql_query)


"[(2, 'Physics', 'Introduction to Physics'), (4, 'Biology', 'Introduction to Biology'), (6, 'Geography', 'Introduction to Geography'), (9, 'Computer Science', 'Introduction to Computer Science')]"

In [24]:
db.run("SELECT * FROM courses")

"[(1, 'Mathematics', 'Basic Mathematics Course'), (2, 'Physics', 'Introduction to Physics'), (3, 'Chemistry', 'Basic Chemistry Course'), (4, 'Biology', 'Introduction to Biology'), (5, 'History', 'World History Overview'), (6, 'Geography', 'Introduction to Geography'), (7, 'Literature', 'World Literature Overview'), (8, 'Art', 'Art History and Techniques'), (9, 'Computer Science', 'Introduction to Computer Science'), (10, 'Music', 'Fundamentals of Music')]"

In [25]:
import pandas as pd

In [28]:
from sqlalchemy import create_engine
engine = create_engine("sqlite:///example.db")
with engine.connect() as connection:
  df = pd.read_sql_query(sql_query, connection,)

In [29]:
df

Unnamed: 0,id,name,description
0,2,Physics,Introduction to Physics
1,4,Biology,Introduction to Biology
2,6,Geography,Introduction to Geography
3,9,Computer Science,Introduction to Computer Science


In [34]:
%%writefile app.py

import os
import requests
import hashlib
import pandas as pd
import streamlit as st
from concurrent.futures import ThreadPoolExecutor
from langchain_community.llms import LlamaCpp
from langchain.prompts.prompt import PromptTemplate
from langchain.sql_database import SQLDatabase
from sqlalchemy import create_engine
import logging

logging.basicConfig(level=logging.ERROR)  # Set logging level

def calculate_md5(file_path):
    """Calculates the MD5 hash of a file."""
    hash_md5 = hashlib.md5()
    with open(file_path, "rb") as f:
        for chunk in iter(lambda: f.read(4096), b""):
            hash_md5.update(chunk)
    return hash_md5.hexdigest()

def download_file(url, filename, expected_md5):
    """Downloads a file and verifies its integrity."""
    try:
        response = requests.get(url, stream=True)
        if response.status_code == 200:
            with open(filename, 'wb') as f:
                for chunk in response.iter_content(chunk_size=1024):
                    if chunk:
                        f.write(chunk)

            if calculate_md5(filename) != expected_md5:
                st.error("Downloaded file is corrupted. Please try again.")
                os.remove(filename)
            else:
                st.success("Download complete and verified!")
        else:
            st.error(f"Failed to download file: {response.status_code}")
    except Exception as e:
        st.error(f"Error downloading file: {e}")

@st.cache_resource(ttl=3600)  # Cache the model for an hour
def load_model(model_file):
    """Loads the LlamaCpp model, ensuring it's a valid .gguf file."""
    try:
        if not model_file.endswith(".gguf"):
            st.error("Invalid model file format. Please provide a .gguf file.")
            return None

        client = LlamaCpp(model_path=model_file, temperature=0)
        return client
    except Exception as e:
        st.error(f"Error loading model: {e}")
        return None

def get_database():
    try:
        db_path = "sqlite:///example.db"
        db = SQLDatabase.from_uri(database_uri=db_path)
        db._sample_rows_in_table_info = 0
        engine = create_engine(db_path)
        return db, engine
    except Exception as e:
        st.error(f"Error connecting to database: {e}")
        return None, None

def main():
    st.title("SQL Query Interface")

    # User guide
    with st.expander("User Guide"):
        st.write("""
        This interface allows you to query an SQL database using natural language.
        - Enter your query in the input box and press 'Query' to get the results.
        - The tables and their first 5 rows are displayed upon loading the page.
        """)

    # Retrieve database and engine
    db, engine = get_database()

    if db and engine:
        # Display tables and contents upon page load
        table_names = db.get_table_names()
        if table_names:
            st.write("Tables:")
            tabs = st.tabs(table_names)
            for tab, table_name in zip(tabs, table_names):
                with tab:
                    st.write(f"Table: {table_name}")
                    query = f"SELECT * FROM {table_name} LIMIT 5"  # Limit to 5 rows for display
                    try:
                        with engine.connect() as connection:
                            df = pd.read_sql_query(query, connection)
                        st.write(df)
                    except Exception as e:
                        st.error(f"Error retrieving data from {table_name}: {e}")
        else:
            st.write("No tables found in the database.")

        question = st.text_area("Enter your query:", value="Courses containing Introduction")
        if st.button("Query"):
            model_file = "phi-3-sql.Q4_K_M.gguf"
            model_url = "https://huggingface.co/omeryentur/phi-3-sql/blob/main/phi-3-sql.Q4_K_M.gguf"
            expected_md5 = "d41d8cd98f00b204e9800998ecf8427e"  # Replace with the actual MD5 hash of the model file

            # Download the model file if it doesn't exist
            if not os.path.exists(model_file):
                st.write(f"Downloading {model_file}...")
                download_file(model_url, model_file, expected_md5)

            # Load the model
            client = load_model(model_file)
            if client:
                # Retrieve table info
                table_info = db.get_table_info()

                # Define the SQL prompt template
                template="""
                <|system|>
                {table_info}

                <|user|>
                {question}
                <|sql|>
                """

                # Create the prompt with the query
                prompt = PromptTemplate.from_template(template)
                prompt_text = prompt.format(table_info=table_info, question=question)

                try:
                    # Get SQL query from LLM
                    res = client(prompt_text)
                    sql_query = res.strip()
                    print(prompt_text)
                    with engine.connect() as connection:
                        df = pd.read_sql_query(sql_query, connection)

                    st.write(f"SQL Query: {sql_query}")
                    st.write("Result:")
                    st.write(df)
                except Exception as e:
                    st.error(f"Error executing query: {e}")
        else:
            st.write("Please enter your query and press 'Query' to get results.")

        # Add New Data to Database section
        st.subheader("Add New Data to Database")
        new_data = st.text_area("Enter new data (SQL INSERT statement):", "")
        if st.button("Add Data"):
            if new_data.strip():
                try:
                    with engine.connect() as connection:
                        connection.execute(new_data)
                    st.success("Data added successfully!")
                except Exception as e:
                    st.error(f"Error adding data: {e}")
            else:
                st.warning("Please enter a valid SQL INSERT statement.")

    else:
        st.error("Database connection not established.")

    # Button to clear cache
    if st.button("Clear Cache"):
        st.cache_data.clear()
        st.cache_resource.clear()
        st.success("Cache cleared!")

if __name__ == "__main__":
    main()

Overwriting app.py


In [None]:
!streamlit run /content/app.py &>/content/logs.txt & npx localtunnel --port 8501 & curl ipv4.icanhazip.com