In [None]:
import os
import json
import csv
import config  # Import configuration for paths and MongoDB connection
import chardet  # For detecting file encoding
from dataIngestionToMongodb import dbIngestion

In [1]:
def clean_record(record, file_type):
    """
    Cleans and structures data for the `txparamsdab` or `txantennadab` collection.
    """
    # Include `id` field
    record["id"] = record.get("id", "").strip()  # Ensure `id` is always included
    if file_type == 'txparamsdab':
        service_labels = []
        for i in range(1, 33):  # Assuming 32 possible service labels
            label_key = f"Serv Label{i}"
            sid_key = f"SId {i} (Hex)"
            lsn_key = f"LSN {i} (Hex)"
            if label_key in record and record[label_key].strip():
                service_labels.append({
                    "service_label": record[label_key].strip(),
                    "sid": record.get(sid_key, "").strip(),
                    "lsn": record.get(lsn_key, "").strip(),
                })
                # Remove processed keys from the original record
                record.pop(label_key, None)
                record.pop(sid_key, None)
                record.pop(lsn_key, None)
        record["service_labels"] = service_labels

        # Remove empty data service fields
        for i in range(1, 16):
            record.pop(f"Data Serv Label{i}", None)
            record.pop(f"Data SId {i} (Hex)", None)
    elif file_type == 'txantennadab':
        # Process directional data
        directions = {}
        for i in range(0, 360, 10):  # Assuming data every 10 degrees
            key = str(i)
            if key in record:
                try:
                    directions[key] = float(record[key]) if record[key].strip() else None
                except ValueError:
                    print(f"Invalid float value in field '{key}': {record[key]}")
                    directions[key] = None
                finally:
                    record.pop(key)  # Remove processed key
        record["directional_data"] = directions

        # Extract and clean missing tags
        record["in_use_ae_ht"] = (
            float(record.pop("In-Use Ae Ht", "").strip()) if record.get("In-Use Ae Ht") else None
        )
        record["in_use_erp_total"] = (
            record.pop("In-Use ERP Total", "").strip() if record.get("In-Use ERP Total") else None
        )
        record["dir_max_erp"] = (
            record.pop("Dir Max ERP", "").strip() if record.get("Dir Max ERP") else None
        )

        # Clean latitude and longitude
        record["latitude"] = (
            float(record.pop("Lat", None)) if record.get("Lat") and record["Lat"].strip() else None
        )
        record["longitude"] = (
            float(record.pop("Long", None)) if record.get("Long") and record["Long"].strip() else None
        )
        record["site_height"] = (
            float(record.get("Site Height", "").strip()) if record.get("Site Height") else None
        )

    # Remove unwanted keys or empty strings
    record = {k: v for k, v in record.items() if v not in [None, "", []]}  # Filter out empty fields
    return record

In [None]:
def determine_file_type(file_name):
    """
    Determines the file type based on the file name.
    """
    file_name = os.path.basename(file_name).lower()
    if "txparamsdab" in file_name:
        return "txparamsdab"
    elif "txantennadab" in file_name:
        return "txantennadab"
    return "unknown"

In [None]:
def fileReader(csv_file, outFilename):
    """
    Reads a CSV file and converts it to JSON, ensuring proper formatting.
    """
    try:
        print(f"Reading CSV file: {csv_file}...")
        file_type = determine_file_type(os.path.basename(csv_file))

        # Detect encoding of the file
        with open(csv_file, 'rb') as f:
            raw_data = f.read()
            result = chardet.detect(raw_data)
            detected_encoding = result['encoding']
            print(f"Detected encoding for {csv_file}: {detected_encoding}")

        # Open the CSV file with the detected encoding
        with open(csv_file, 'r', encoding=detected_encoding) as csvfile:
            reader = csv.DictReader(csvfile)
            if not reader.fieldnames:
                print(f"Error: No headers found in {csv_file}.")
                return None

            # Clean and validate each row
            records = []
            for row in reader:
                if any(row.values()):  # Skip empty rows
                    cleaned_row = {k.strip(): v.strip() for k, v in row.items()}
                    records.append(clean_record(cleaned_row, file_type))
        print(f"Converting {csv_file} to JSON...")
        # Write to JSON file
        with open(outFilename, 'w') as jsonfile:
            json.dump(records, jsonfile, indent=2)
            print(f"JSON file created: {outFilename}")
        return outFilename
    except Exception as e:
        print(f"Error processing {csv_file}: {e}")
        return None

In [2]:
def process_user_file():
    """
    Processes a user-specified CSV file and converts it to JSON.
    """
    file_name = input("Enter the name of the CSV file to process (including extension): ").strip()
    csv_file = os.path.join(config.CSV_DIR, file_name)

    # Verify if the file exists
    if not os.path.exists(csv_file):
        print(f"Error: File '{file_name}' not found in {config.CSV_DIR}.")
        return False

    # Determine output JSON file path
    outFilename = os.path.join(
        config.JSON_DIR,
        f"{os.path.basename(csv_file).lower().replace('.csv', '.json')}"
    )

    # Process the file
    print(f"Processing file: {csv_file}")
    processed_file = fileReader(csv_file, outFilename)
    if processed_file:
        print(f"File processed successfully. JSON saved to: {processed_file}")
        return True
    else:
        print(f"Failed to process the file: {csv_file}")
        return False

In [3]:
def process_json_files():
    """
    Processes all JSON files and inserts them into MongoDB.
    """
    print(f"Looking for JSON files in directory: {config.JSON_DIR}...")
    json_files = [os.path.join(config.JSON_DIR, f) for f in os.listdir(config.JSON_DIR) if f.endswith('.json')]
    if not json_files:
        print("No JSON files found.")
        return

    # Ingest processed JSON files
    print("\nStarting ingestion of JSON files into MongoDB...")
    dbIngestion(config.JSON_DIR, config.DB_NAME)
    print("Ingestion complete.")

In [None]:
print("Starting CSV to JSON conversion process...")

while True:
        success = process_user_file()

        # Prompt the user to process another file
        another_file = input("\nDo you want to process another file? (yes/no): ").strip().lower()
        if another_file not in ['yes', 'y']:
            print("\nNo more files to process.")
            # Starting data ingestion into MongoDb
            print("\nStarting data ingestion to MongoDB...")
            process_json_files()
            print("Data ingestion complete.")
            break

print("\nProcess complete!")

Starting CSV to JSON conversion process...
