Local Setup Instructions

In [None]:
%pip install pyodbc

Once you have the environment ready, use the following Python code to create a SQL Server database (PersonSearchDB), setting up a Persons table, and populating it with 1 million realistic random records (first names, last names, cities, states, and email addresses using randommail.com for privacy). After testing, a cleanup script can safely remove the database and table.

In [17]:
import pyodbc
import random

# Define server name as a global variable so it can be set once
server = 'localhost'
db_name='PersonSearchDB'

# Connection to 'master' for creating the database
master_conn_str = f'DRIVER={{ODBC Driver 17 for SQL Server}};SERVER={server};DATABASE=master;Trusted_Connection=yes;'
master_conn = pyodbc.connect(master_conn_str)

# Set autocommit to True to disable transactions
master_conn.autocommit = True


master_cursor = master_conn.cursor()

# Check if the database exists, if not, create it
create_db_query = f"IF DB_ID('{db_name}') IS NULL CREATE DATABASE {db_name};"
master_cursor.execute(create_db_query)

# Close master connection
master_cursor.close()
master_conn.close()

# Function to return SQL Server connection
def get_sql_connection(db_name):
    """
    Returns a SQL Server connection object for the given database name.
    If the database does not exist, the function connects to 'master' first to create it.
    
    Parameters:
    db_name (str): The name of the database to connect to.

    Returns:
    pyodbc.Connection: A connection object to the specified database.
    """    
    
    # Connection to the newly created or existing database
    conn_str = f'DRIVER={{ODBC Driver 17 for SQL Server}};SERVER={server};DATABASE={db_name};Trusted_Connection=yes;'
    return pyodbc.connect(conn_str)

# Function to execute a SQL query
def execute_SQL_Query(db_name, query, params=None):
    """
    Executes a SQL query on the given database. Uses the connection obtained from get_sql_connection.
    
    Parameters:
    db_name (str): The name of the database.
    query (str): The SQL query to execute.
    params (tuple): Parameters to pass to the query (optional).
    
    Returns:
    None
    """
    conn = get_sql_connection(db_name)
    cursor = conn.cursor()
    if params:
        cursor.execute(query, params)
    else:
        cursor.execute(query)
    
    master_cursor = conn.cursor()

    # Check if the database exists, if not, create it
    master_cursor.execute(query)
    conn.commit()

    # Close master connection         
    cursor.close()
    conn.close()

#Create database
execute_SQL_Query('master',f"IF DB_ID('{db_name}') IS NULL CREATE DATABASE {db_name};")

# List of Indian first names, last names, cities, and states
first_names = ['Rahul', 'Anjali', 'Amit', 'Pooja', 'Rajesh', 'Sneha', 'Vikram', 'Neha', 'Suresh', 'Sunita']
last_names = ['Sharma', 'Patel', 'Gupta', 'Mehta', 'Jain', 'Agarwal', 'Reddy', 'Singh', 'Kumar', 'Verma']
cities = ['Mumbai', 'Delhi', 'Bangalore', 'Chennai', 'Hyderabad', 'Ahmedabad', 'Kolkata', 'Pune', 'Jaipur', 'Lucknow']
states = ['MH', 'DL', 'KA', 'TN', 'TS', 'GJ', 'WB', 'MH', 'RJ', 'UP']

def random_name():
    return random.choice(first_names), random.choice(last_names)

def random_email(first_name, last_name):
    return f"{first_name.lower()}.{last_name.lower()}@randommail.com"


# Function to create the table and insert records in bulk
def setup_database_and_bulk_insert_data(db_name, record_count=1000, batch_size=100):
    

    # Insert records in batches
    for batch_start in range(0, record_count, batch_size):
        values = []
        for _ in range(batch_size):
            first_name, last_name = random_name()
            preferred_name = first_name  # Assume preferred name is the first name
            city = random.choice(cities)
            state = random.choice(states)
            email = random_email(first_name, last_name)
            values.append(f"SELECT '{first_name}', '{last_name}', '{preferred_name}', '{city}', '{state}', '{email}'")

        # Create bulk insert query using INSERT INTO ... SELECT
        insert_query = '''
        INSERT INTO Persons (FirstName, LastName, PreferredName, City, State, Email)
        ''' + " UNION ALL ".join(values)
                
        execute_SQL_Query(db_name, insert_query)

# Create Persons table using execute_SQL_Query
create_table_query = '''
IF OBJECT_ID('Persons', 'U') IS NOT NULL DROP TABLE Persons;
CREATE TABLE Persons (
    FirstName NVARCHAR(50),
    LastName NVARCHAR(50),
    PreferredName NVARCHAR(50),
    City NVARCHAR(50),
    State NVARCHAR(50),
    Email NVARCHAR(100)
);
'''
execute_SQL_Query(db_name, create_table_query)

# # Example usage: setting up the database and bulk inserting data
setup_database_and_bulk_insert_data(db_name, record_count=1000000, batch_size=10000)