In [16]:
import sqlite3
import csv
import os

# Path to your database file
db_path = 'D:\\code\\uom_explore\\database\\voc_lab.db'

# Function to close the database connection if open
def close_db_connection(conn):
    try:
        if conn:
            conn.close()
    except Exception as e:
        print(f"Error closing the database connection: {e}")

# Function to delete the database file if it exists
def delete_db_file(db_path):
    try:
        if os.path.exists(db_path):
            os.remove(db_path)
            print(f"Deleted existing database file: {db_path}")
    except PermissionError as e:
        print(f"PermissionError: {e}. Ensure no other process is using the file.")
    except Exception as e:
        print(f"Error deleting the database file: {e}")

# Try to close any existing database connections
conn = None
try:
    conn = sqlite3.connect(db_path)
    close_db_connection(conn)
except Exception as e:
    print(f"Error connecting to the database: {e}")

# Now, attempt to delete the database file
delete_db_file(db_path)

# Connect to SQLite database (this will create a new database file)
conn = sqlite3.connect(db_path)
cursor = conn.cursor()

# Create the ExperimentData table
cursor.execute('''
CREATE TABLE IF NOT EXISTS ExperimentData (
    experiment_batch TEXT NOT NULL,
    experiment_id TEXT NOT NULL,
    channel_id INTEGER NOT NULL,
    heater_setting INTEGER NOT NULL,
    timestamp INTEGER NOT NULL,
    sensor_value REAL NOT NULL
)
''')


# Commit the transaction and close the connection
conn.commit()
conn.close()

Deleted existing database file: D:\code\uom_explore\database\voc_lab.db


# Write local data to DB

In [18]:
import os
import re
import sqlite3
import csv

# Path to your database file
db_path = 'D:\\code\\uom_explore\\database\\voc_lab.db'

# Connect to SQLite database (this will create a new database file)
conn = sqlite3.connect(db_path)
cursor = conn.cursor()

# Function to read CSV and insert data into the database
def insert_data_from_csv(experiment_batch, experiment_id, channel_id, file_path):
    with open(file_path, 'r') as file:
        reader = csv.reader(file)
        first_row = next(reader)
        
        # Check if the first row contains non-numeric values
        try:
            int(first_row[0])
            int(first_row[1])
            float(first_row[2])
            # First row is valid data, process it
            process_row(experiment_batch, experiment_id, channel_id, first_row)
        except ValueError:
            # First row is header, skip it and process the rest
            pass
        
        for row in reader:
            process_row(experiment_batch, experiment_id, channel_id, row)

def process_row(experiment_batch, experiment_id, channel_id, row):
    heater_setting, timestamp, sensor_value = row
    cursor.execute('''
    INSERT INTO ExperimentData (experiment_batch, experiment_id, channel_id, heater_setting, timestamp, sensor_value)
    VALUES (?, ?, ?, ?, ?, ?)
    ''', (experiment_batch, experiment_id, channel_id, int(heater_setting), int(timestamp), float(sensor_value)))

# Function to extract channel_id from filename using regex
def extract_channel_id(file_name):
    match = re.search(r'c(\d+)', file_name)
    if match:
        return int(match.group(1))
    return None

# Function to loop through all folders and files
def process_folders(root_folder):
    for batch_folder in os.listdir(root_folder):
        batch_folder_path = os.path.join(root_folder, batch_folder)
        if os.path.isdir(batch_folder_path):
            for csv_file in os.listdir(batch_folder_path):
                if csv_file.endswith('.csv'):
                    csv_file_path = os.path.join(batch_folder_path, csv_file)
                    experiment_id = os.path.splitext(csv_file)[0]
                    channel_id = extract_channel_id(csv_file)
                    if channel_id is not None:
                        insert_data_from_csv(batch_folder, experiment_id, channel_id, csv_file_path)

# Root folder containing all batch folders
root_folder = 'D:\\code\\uom_explore\\raw_data\\2024_07_23'

# Process all folders and insert data into the database
process_folders(root_folder)

# Commit the transaction and close the connection
conn.commit()
conn.close()


# Deduplicate

In [13]:
def deduplicate_data():
    cursor.execute('''
    DELETE FROM ExperimentData
    WHERE rowid NOT IN (
        SELECT MIN(rowid)
        FROM ExperimentData
        GROUP BY experiment_batch, experiment_id, heater_setting, timestamp
    )
    ''')

In [14]:
# Path to your database file
db_path = 'D:\\code\\uom_explore\\database\\voc_lab.db'

conn = sqlite3.connect(db_path)
cursor = conn.cursor()

deduplicate_data()

# Commit the transaction and close the connection
conn.commit()
conn.close()