In [1]:
pip install pandas sqlalchemy nltk

Collecting sqlalchemy
  Downloading SQLAlchemy-2.0.36-cp312-cp312-macosx_11_0_arm64.whl.metadata (9.7 kB)
Collecting nltk
  Downloading nltk-3.9.1-py3-none-any.whl.metadata (2.9 kB)
Collecting click (from nltk)
  Downloading click-8.1.7-py3-none-any.whl.metadata (3.0 kB)
Collecting regex>=2021.8.3 (from nltk)
  Downloading regex-2024.11.6-cp312-cp312-macosx_11_0_arm64.whl.metadata (40 kB)
Collecting tqdm (from nltk)
  Downloading tqdm-4.67.0-py3-none-any.whl.metadata (57 kB)
Downloading SQLAlchemy-2.0.36-cp312-cp312-macosx_11_0_arm64.whl (2.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m39.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading nltk-3.9.1-py3-none-any.whl (1.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m37.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading regex-2024.11.6-cp312-cp312-macosx_11_0_arm64.whl (284 kB)
Downloading click-8.1.7-py3-none-any.whl (97 kB)
Downloading tqdm-4.67.0-py3-none

In [2]:
from sqlalchemy import create_engine
import pandas as pd

def upload_csv_to_databases(csv_path):
    # Load CSV into Pandas
    df = pd.read_csv(csv_path)

    # Connect to SQLite and PostgreSQL
    sqlite_engine = create_engine('sqlite:///sqlite_db.db')
    postgres_engine = create_engine('postgresql+psycopg2://user:password@localhost/postgres_db')

    # Store data in both databases
    df.to_sql('uploaded_data', sqlite_engine, if_exists='replace', index=False)
    df.to_sql('uploaded_data', postgres_engine, if_exists='replace', index=False)
    print("Data uploaded successfully to both databases.")

In [None]:
def explore_database(db_engine):
    with db_engine.connect() as connection:
        result = connection.execute("SELECT * FROM uploaded_data LIMIT 5;")
        print("Sample Data:")
        for row in result:
            print(row)

In [None]:
import random

QUERY_PATTERNS = [
    "SELECT * FROM uploaded_data WHERE {column} > {value};",
    "SELECT {column} FROM uploaded_data ORDER BY {column} DESC;",
    "SELECT COUNT(*) FROM uploaded_data WHERE {column} = '{value}';"
]

def generate_sample_queries(columns):
    sample_queries = []
    for pattern in QUERY_PATTERNS:
        query = pattern.format(column=random.choice(columns), value=random.randint(1, 100))
        sample_queries.append(query)
    return sample_queries

In [None]:
CLAUSE_PATTERNS = {
    "groupby": "SELECT {column}, COUNT(*) FROM uploaded_data GROUP BY {column};",
    "orderby": "SELECT * FROM uploaded_data ORDER BY {column} DESC;"
}

def generate_query_with_clause(clause, columns):
    if clause in CLAUSE_PATTERNS:
        query = CLAUSE_PATTERNS[clause].format(column=random.choice(columns))
        return query
    return "Clause not recognized."

In [None]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords

nltk.download('punkt')
nltk.download('stopwords')

def parse_natural_language_query(user_input):
    tokens = word_tokenize(user_input.lower())
    filtered_tokens = [t for t in tokens if t not in stopwords.words('english')]
    
    # Simple pattern matching
    if "group by" in user_input.lower():
        return CLAUSE_PATTERNS["groupby"]
    elif "order by" in user_input.lower():
        return CLAUSE_PATTERNS["orderby"]
    else:
        return "Could not understand the query. Try again."

In [None]:
def main():
    print("Welcome to ChatDB!")
    choice = input("Upload CSV or Explore Database? (upload/explore): ")

    if choice == "upload":
        csv_path = input("Enter the path to your CSV file: ")
        upload_csv_to_databases(csv_path)
    elif choice == "explore":
        db_choice = input("Choose database (sqlite/postgres): ")
        db_engine = sqlite_engine if db_choice == "sqlite" else postgres_engine
        explore_database(db_engine)