In [6]:
import os
import re
import mysql.connector
import csv
import logging

# Configure logging
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')

# MySQL database connection configuration
db_config = {
    'user': 'root',
    'password': 'your_password',
    'host': 'localhost',
    'database': 'experiment_db',
}

# Connect to MySQL database
try:
    conn = mysql.connector.connect(**db_config)
    cursor = conn.cursor()
    logging.info('Connected to MySQL database')
except mysql.connector.Error as err:
    logging.error(f'Error: {err}')
    exit(1)

# Function to read CSV and insert data into the database
def insert_data_from_csv(experiment_batch, experiment_id, channel_id, file_path):
    logging.debug(f'Reading file: {file_path}')
    with open(file_path, 'r') as file:
        reader = csv.reader(file)
        first_row = next(reader)
        
        # Check if the first row contains non-numeric values
        try:
            int(first_row[0])
            int(first_row[1])
            float(first_row[2])
            # First row is valid data, process it
            process_row(experiment_batch, experiment_id, channel_id, first_row)
        except ValueError:
            # First row is header, skip it and process the rest
            logging.debug('First row is a header, skipping it')

        for row in reader:
            process_row(experiment_batch, experiment_id, channel_id, row)

def process_row(experiment_batch, experiment_id, channel_id, row):
    heater_setting, timestamp, sensor_value = row
    try:
        cursor.execute('''
        INSERT INTO ExperimentData (experiment_batch, experiment_id, channel_id, heater_setting, timestamp, sensor_value)
        VALUES (%s, %s, %s, %s, %s, %s)
        ''', (experiment_batch, experiment_id, channel_id, int(heater_setting), int(timestamp), float(sensor_value)))
        logging.debug(f'Inserted row: {row}')
    except mysql.connector.Error as err:
        logging.error(f'Error inserting row {row}: {err}')

# Function to extract channel_id from filename using regex
def extract_channel_id(file_name):
    match = re.search(r'c(\d+)', file_name)
    if match:
        logging.debug(f'Extracted channel_id: {match.group(1)} from file name: {file_name}')
        return int(match.group(1))
    logging.warning(f'Could not extract channel_id from file name: {file_name}')
    return None

# Function to loop through all folders and files
def process_folders(root_folder):
    logging.info(f'Starting to process root folder: {root_folder}')
    for batch_folder in os.listdir(root_folder):
        batch_folder_path = os.path.join(root_folder, batch_folder)
        if os.path.isdir(batch_folder_path):
            logging.info(f'Processing batch folder: {batch_folder_path}')
            for csv_file in os.listdir(batch_folder_path):
                logging.debug(f'Found file: {csv_file}')
                if csv_file.endswith('.csv') and '_BME680' not in csv_file:
                    csv_file_path = os.path.join(batch_folder_path, csv_file)
                    logging.debug(f'Processing CSV file: {csv_file_path}')
                    experiment_id = os.path.splitext(csv_file)[0]
                    channel_id = extract_channel_id(csv_file)
                    if channel_id is not None:
                        insert_data_from_csv(batch_folder, experiment_id, channel_id, csv_file_path)
                    else:
                        logging.warning(f'No channel_id found for file: {csv_file_path}')
                else:
                    logging.debug(f'Skipping file: {csv_file}')

# Root folder containing all batch folders
root_folder = 'D:\\code\\uom_explore\\raw_data\\2024_07_26'

# Process all folders and insert data into the database
process_folders(root_folder)

# Commit the transaction and close the connection
conn.commit()
conn.close()
logging.info('MySQL connection closed')


2024-07-29 11:28:15,380 - INFO - Connected to MySQL database
2024-07-29 11:28:15,382 - INFO - Starting to process root folder: D:\code\uom_explore\raw_data\exp_efficiency_test_async_1
2024-07-29 11:28:15,451 - INFO - MySQL connection closed
