### Import libraries

In [1]:
import os
import pandas as pd
import pyodbc, struct
from azure import identity
from faker import Faker
from typing import Union
from fastapi import FastAPI
from pydantic import BaseModel
from sqlalchemy import create_engine

from dotenv import load_dotenv
load_dotenv(override=True)
connection_string = os.environ["AZURE_SQL_CONNECTIONSTRING"]

SQL_SERVER_USERNAME = os.environ["SQL_SERVER_USERNAME"]
SQL_SERVER_ENDPOINT = os.environ["SQL_SERVER_ENDPOINT"]
SQL_SERVER_PASSWORD = os.environ["SQL_SERVER_PASSWORD"]
SQL_SERVER_DATABASE = os.environ["SQL_SERVER_DATABASE"] 
driver = "{ODBC Driver 18 for SQL Server}"
sqlalchemy_driver = "ODBC Driver 18 for SQL Server"

## Creatng a connection to the SQL Server
conn = pyodbc.connect(f'DRIVER={driver};SERVER={SQL_SERVER_ENDPOINT};PORT=1433;DATABASE={SQL_SERVER_DATABASE};UID={SQL_SERVER_USERNAME};PWD={SQL_SERVER_PASSWORD}')  
cursor = conn.cursor()

#https://github.com/Azure-Samples/openai/blob/main/End_to_end_Solutions/AOAISearchDemo/scripts/prepopulate/populate_sql.py

#### Reading input files

In [2]:
relative_path = "../../../../data/processed/files/"
files = os.listdir(relative_path)

df = pd.concat([pd.read_parquet(relative_path+file) for file in files]).reset_index(drop=True)
df['title_vector'] = df['title_vector'].apply(lambda x: x.tolist())
df['content_vector'] = df['content_vector'].apply(lambda x: x.tolist())
df['id'] = df['chunk_id']

CONTAINER_ID = df['preprocessing_pipeline'][0]

#### Save CSV files as tables

#### Create other SQL Tables

In [61]:
cursor.execute("""
CREATE TABLE Customers (
  cust_id INTEGER,
  cust_name VARCHAR(1000),
  cust_email VARCHAR(1000),
  cust_phone VARCHAR(1000),
  cust_address VARCHAR(1000),
  PRIMARY KEY (cust_id)
);

CREATE TABLE Products (
  prod_id INTEGER,
  prod_name VARCHAR(1000),
  price FLOAT,
  category VARCHAR(1000),
  PRIMARY KEY (prod_id)
);

CREATE TABLE Merchants (
  merchant_id INTEGER,
  merchant_name VARCHAR(1000),
  merchant_region VARCHAR(1000),
  merchant_address VARCHAR(1000),
  PRIMARY KEY (merchant_id)
);

CREATE TABLE Stock (
  prod_id INTEGER,
  merchant_id INTEGER,
  stock INTEGER,
  PRIMARY KEY (prod_id, merchant_id),
  FOREIGN KEY (merchant_id) REFERENCES Merchants(merchant_id),
  FOREIGN KEY (prod_id) REFERENCES Products(prod_id)
);

CREATE TABLE Sales (
    sale_id INTEGER,
    cust_id INTEGER,
    merchant_id INTEGER,
    date DATETIME,
    total_price FLOAT,
    PRIMARY KEY (sale_id),
    FOREIGN KEY (cust_id) REFERENCES Customers(cust_id),
    FOREIGN KEY (merchant_id) REFERENCES Merchants(merchant_id)
);

CREATE TABLE Sales_Detail (
  sales_id INTEGER,
  prod_id INTEGER,
  quantity INTEGER,
  PRIMARY KEY (sales_id, prod_id),
  FOREIGN KEY (sales_id) REFERENCES Sales(sale_id),
  FOREIGN KEY (prod_id) REFERENCES Products(prod_id)
);
""")

cursor.commit()

#### Inserting Tables

In [None]:
def CSVToSQL(table_name,file_location):
    # Read the CSV file
    df = pd.read_csv(file_location)
    for index, row in df.iterrows():
        insert_query = f"""
        INSERT INTO {table_name} ({', '.join(df.columns)})
        VALUES ({', '.join(['?' for _ in df.columns])})
        """
        cursor.execute(insert_query, tuple(row))
    conn.commit()

In [62]:
relative_path = "../../../../data/processed/structured/"
files = os.listdir(relative_path)

CSVToSQL("Customers",relative_path+'/customers.csv')
CSVToSQL("Products",relative_path+'/products.csv')
CSVToSQL("Merchants",relative_path+'/merchants.csv')
CSVToSQL("Stock",relative_path+'/stock.csv')
CSVToSQL("Sales",relative_path+'/sales.csv')
CSVToSQL("Sales_Detail",relative_path+'/sales_detail.csv')

cursor.commit()