In [13]:
import sqlite3
import pandas as pd

## Step 1: Create SQLite Database

In [14]:
# Step 1: Create SQLite Database and Tables
def create_database():
    """Create SQLite database with TxAntennaDAB and TxParamsDAB tables."""
    conn = sqlite3.connect('formative_week2.db')  # Create SQLite database
    cursor = conn.cursor()
    
    # Create TxAntennaDAB table
    cursor.execute('''
        CREATE TABLE IF NOT EXISTS TxAntennaDAB (
            id INTEGER PRIMARY KEY,               -- Unique identifier
            NGR TEXT,                             -- National Grid Reference
            Longitude_Latitude TEXT,              -- Longitude and Latitude as a string
            Site_Height INTEGER,                  -- Site Height in meters
            In_Use_Ae_Ht REAL,                    -- In-Use Antenna Height in meters
            In_Use_ERP_Total REAL,                -- In-Use Effective Radiated Power (ERP)
            Dir_Max_ERP REAL                      -- Directional Max ERP
        )
    ''')
    
    # Create TxParamsDAB table
    cursor.execute('''
        CREATE TABLE IF NOT EXISTS TxParamsDAB (
            id INTEGER PRIMARY KEY,               -- Unique identifier
            Date TEXT,                            -- Date of entry
            Ensemble TEXT,                        -- Ensemble group
            Licence TEXT,                         -- Licence ID
            Ensemble_Area TEXT,                   -- Name of Ensemble Area
            EID TEXT,                             -- Multiplex Block ID (e.g., C19A)
            Transmitter_Area TEXT,                -- Transmitter Area
            Site TEXT,                            -- Site name
            Freq REAL,                            -- Frequency in MHz
            Block TEXT,                           -- Frequency Block
            FOREIGN KEY (id) REFERENCES TxAntennaDAB (id) -- Link to Antenna table
        )
    ''')
    
    conn.commit()
    conn.close()
    print("Database and tables created successfully.")

## Step 2: Load CSV Files and Clean Data

In [15]:
def load_and_clean_data(antenna_csv, params_csv):
    """Load CSV data, clean it, and ensure column names match the database."""
    # Load Antenna data
    df_antenna = pd.read_csv(antenna_csv, encoding='ISO-8859-1')
    df_antenna = df_antenna.rename(columns={
        'Site Height': 'Site_Height',
        'In-Use Ae Ht': 'In_Use_Ae_Ht',
        'In-Use ERP Total': 'In_Use_ERP_Total',
        'Dir Max ERP': 'Dir_Max_ERP'
    })
    # Drop unnecessary columns
    df_antenna = df_antenna[['id', 'NGR', 'Longitude/Latitude', 'Site_Height', 'In_Use_Ae_Ht', 'In_Use_ERP_Total', 'Dir_Max_ERP']]
    
    # Load Params data
    df_params = pd.read_csv(params_csv, encoding='ISO-8859-1')
    df_params = df_params.rename(columns={
        'Ensemble Area': 'Ensemble_Area',
        'Transmitter Area': 'Transmitter_Area',
        'Freq.': 'Freq'
    })
    # Drop unnecessary columns
    df_params = df_params[['id', 'Date', 'Ensemble', 'Licence', 'Ensemble_Area', 'EID', 'Transmitter_Area', 'Site', 'Freq', 'Block']]
    
    return df_antenna, df_params

##  Step 3: Save Data to Database

In [16]:
def save_data_to_database(df_antenna, df_params):
    """Save cleaned CSV data into SQLite database."""
    conn = sqlite3.connect('formative_week2.db')
    
    # Save DataFrames to database
    df_antenna.to_sql('TxAntennaDAB', conn, if_exists='replace', index=False)
    df_params.to_sql('TxParamsDAB', conn, if_exists='replace', index=False)
    
    conn.close()
    print("Data saved to SQLite database successfully.")

## Step 4: Filter and Add Multiplex Data

In [17]:
def filter_and_add_multiplex():
    """Filter out unwanted rows and create DAB_Multiplex column."""
    conn = sqlite3.connect('formative_week2.db')
    cursor = conn.cursor()

    # Filter out unwanted NGR values in TxAntennaDAB
    exclude_ngrs = ['NZ02553847', 'SE213515', 'NT05399374', 'NT252675908']
    cursor.execute('''
        DELETE FROM TxAntennaDAB WHERE NGR IN (?, ?, ?, ?)
    ''', exclude_ngrs)
    conn.commit()
    print("Unwanted NGR values removed from TxAntennaDAB table.")

    # Add Multiplex information to TxParamsDAB
    multiplexes = ['C18A', 'C18F', 'C188']
    df_params = pd.read_sql('SELECT * FROM TxParamsDAB', conn)

    # Create a new column for DAB Multiplex based on EID
    df_params['DAB_Multiplex'] = df_params['EID'].str.extract('(C18A|C18F|C188)', expand=False)

    # Save updated Params table
    df_params.to_sql('TxParamsDAB', conn, if_exists='replace', index=False)
    conn.close()
    print("DAB_Multiplex column added and saved to TxParamsDAB table.")


## Step 5: Execution

In [19]:
antenna_csv_path = "Data sets/TxAntennaDAB.csv"  # Replace with your file path
params_csv_path = "Data sets/TxParamsDAB.csv"    # Replace with your file path

create_database()
df_antenna, df_params = load_and_clean_data(antenna_csv_path, params_csv_path)
save_data_to_database(df_antenna, df_params)
filter_and_add_multiplex()

Database and tables created successfully.
Data saved to SQLite database successfully.
Unwanted NGR values removed from TxAntennaDAB table.
DAB_Multiplex column added and saved to TxParamsDAB table.


## Step 6: Validation

In [26]:
# Connect to the SQLite database
conn = sqlite3.connect('formative_week2.db')

###  Data Integrity Check

In [22]:
# Step 1: Data Integrity Check
def check_data_integrity():
    """Check if data is correctly stored in the database."""
    cursor = conn.cursor()
    
    # Check the row counts for both tables
    print("Checking row counts:")
    cursor.execute('SELECT COUNT(*) FROM TxAntennaDAB')
    antenna_count = cursor.fetchone()[0]
    print(f"TxAntennaDAB row count: {antenna_count}")
    
    cursor.execute('SELECT COUNT(*) FROM TxParamsDAB')
    params_count = cursor.fetchone()[0]
    print(f"TxParamsDAB row count: {params_count}")
    
    # Check if NGR filtering was successful
    print("\nChecking NGR filtering:")
    exclude_ngrs = ['NZ02553847', 'SE213515', 'NT05399374', 'NT252675908']
    cursor.execute(f'''
        SELECT * FROM TxAntennaDAB 
        WHERE NGR IN ({','.join(['?']*len(exclude_ngrs))})
    ''', exclude_ngrs)
    ngr_results = cursor.fetchall()
    
    if ngr_results:
        print(f"Filtering failed for {len(ngr_results)} rows: {ngr_results}")
    else:
        print("NGR filtering successful. No excluded NGRs found.")

### Data Join Check

In [23]:
# Step 2: Data Join Check
def check_data_join():
    """Check if data joins correctly between TxAntennaDAB and TxParamsDAB."""
    print("\nChecking data join:")
    query = '''
        SELECT a.id, a.NGR, p.Date, p.EID
        FROM TxAntennaDAB AS a
        JOIN TxParamsDAB AS p
        ON a.id = p.id
        LIMIT 10
    '''
    # Use pandas to fetch and display the joined data
    joined_data = pd.read_sql_query(query, conn)
    print(joined_data)

### DAB_Multiplex Column Verification

In [24]:
# Step 3: DAB_Multiplex Column Verification
def check_dab_multiplex():
    """Check if DAB_Multiplex column is correctly populated in TxParamsDAB."""
    print("\nChecking DAB_Multiplex column:")
    query = '''
        SELECT EID, DAB_Multiplex
        FROM TxParamsDAB
        WHERE DAB_Multiplex IS NOT NULL
        LIMIT 10
    '''
    # Fetch and display multiplex data
    multiplex_data = pd.read_sql_query(query, conn)
    print(multiplex_data)

In [27]:
# Execute all checks
check_data_integrity()  # Verify row counts and filtering
check_data_join()       # Verify join integrity between the two tables
check_dab_multiplex()   # Verify the DAB_Multiplex column

# Close the database connection
conn.close()
print("Database connection closed.")

Checking row counts:
TxAntennaDAB row count: 1210
TxParamsDAB row count: 1214

Checking NGR filtering:
NGR filtering successful. No excluded NGRs found.

Checking data join:
       id         NGR        Date   EID
0  745392  NO76418994  23/11/2001  C19A
1  745393  NJ76043299  22/11/2001  C19A
2  745394  NJ98315700  21/11/2001  C19A
3  745395  NJ94270253  24/05/2016  C19A
4  745396  NS29181617  04/04/2002  C19B
5  745397  NS55753413  03/04/2002  C19B
6  745398  NX21179812  29/11/2016  C19B
7  745399  NS21494835  03/04/2002  C19B
8  745400  SU64314792  30/11/2022  C1DF
9  745401  SO03450129  30/01/2014  CE15

Checking DAB_Multiplex column:
    EID DAB_Multiplex
0  C18A          C18A
1  C18A          C18A
2  C18A          C18A
3  C18A          C18A
4  C18A          C18A
5  C18A          C18A
6  C18A          C18A
7  C18A          C18A
8  C18F          C18F
9  C18F          C18F
Database connection closed.


## EOF