In [1]:
import numpy as np
import pandas as pd

In [2]:
antenna_df = pd.read_csv("data_set/TxAntennaDAB.csv")
params_df = pd.read_csv("data_set/TxParamsDAB.csv", encoding='latin-1')

In [3]:
antenna_df.columns.intersection(params_df.columns)

Index(['id'], dtype='object')

In [4]:
intersection = np.intersect1d(antenna_df.columns, params_df.columns)
params_int = params_df[intersection]
antenna_int = antenna_df[intersection]
comb = pd.concat([params_int, antenna_int], axis=1)

mask = ~(params_int == antenna_int).all(axis=1)
different_rows = params_int[mask]

# if the rows, that are common in both data frames do intersect with different data
# the data is deemed invalid and processing cannot continue
assert (len(different_rows) == 0)  # validation

merge_df = pd.merge(antenna_df, params_df, left_index=True, right_index=True, suffixes=('_dab', '_params'))
merge_df = merge_df.drop('id_params', axis=1) # drop duplicate params column

# Strip leading and trailing spaces from column names
merge_df.columns = merge_df.columns.str.strip()

# Strip leading and trailing spaces from all string values in the DataFrame
merge_df = merge_df.map(lambda x: x.strip() if isinstance(x, str) else x)
merge_df


Unnamed: 0,id_dab,NGR,Longitude/Latitude,Site Height,In-Use Ae Ht,In-Use ERP Total,Dir Max ERP,0,10,20,...,Data Serv Label11,Data SId 11 (Hex),Data Serv Label12,Data SId 12 (Hex),Data Serv Label13,Data SId 13 (Hex),Data Serv Label14,Data SId 14 (Hex),Data Serv Label15,Data SId 15 (Hex)
0,745392,NO76418994,002W23 24 57N00 00,325,230,"2.000,000",43.0,1.1,1.7,2.4,...,,,,,,,,,,
1,745393,NJ76043299,002W24 01 57N23 12,245,138,"8.510,009",140.0,0.1,0.5,1.0,...,,,,,,,,,,
2,745394,NJ98315700,002W01 48 57N36 11,225,35,"1.000,000",55.0,0.1,0.0,0.0,...,,,,,,,,,,
3,745395,NJ94270253,002W05 46 57N06 49,87,41,"1.199,999",355.0,0.2,0.5,1.0,...,,,,,,,,,,
4,745396,NS29181617,004W41 59 55N24 35,273,34,"4.000,000",70.0,0.4,0.0,0.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1209,746601,SJ669343,002W29 37 52N54 18,100,24,"0.554,794",65.0,1.7,5.7,1.6,...,,,,,,,,,,
1210,746602,SJ26613178,003W05 31 52N52 42,305,27,"0.640,000",100.0,3.8,3.0,2.0,...,,,,,,,,,,
1211,746603,SO92279444,002W06 56 52N32 52,225,32,"0.600,000",180.0,1.6,1.8,2.0,...,,,,,,,,,,
1212,746604,SJ62890828,002W33 01 52N40 15,396,48,"1.954,474",,3.7,5.5,4.3,...,,,,,,,,,,


In [5]:
# Outputs should not include any data from DAB radio stations that have the following ‘NGR’: NZ02553847, SE213515, NT05399374 and NT252675908
exclude_ngr = ["NZ02553847", "SE213515", "NT05399374", "NT252675908"]
merge_df = merge_df[~merge_df['NGR'].isin(exclude_ngr)]

# Extract relevant columns
# Based on the requirements of the document, these are the columns necessary for processing.
req_columns = ["id_dab", "NGR", "EID", "Site", "Site Height", "In-Use Ae Ht", "In-Use ERP Total", "Date", "Freq.",
               "Block", "Serv Label1", "Serv Label2", "Serv Label3", "Serv Label4", "Serv Label10"]
req_df = merge_df[req_columns]

# remove all rows where data is beyond 3 standard deviations
numeric_cols = ["Site Height", "In-Use Ae Ht","Freq."]
for col in numeric_cols:
    mean = req_df[col].mean()
    std = req_df[col].std()
    req_df = req_df[~((req_df[col] < mean - 3 * std) | (req_df[col] > mean + 3 * std))]

# remove rows with a NaN value is the requisite fields
req_df = req_df.dropna()

req_df


Unnamed: 0,id_dab,NGR,EID,Site,Site Height,In-Use Ae Ht,In-Use ERP Total,Date,Freq.,Block,Serv Label1,Serv Label2,Serv Label3,Serv Label4,Serv Label10
1,745393,NJ76043299,C19A,Meldrum,245,138,"8.510,009",22/11/2001,220.352,11C,Northsound 1,Northsound 2,Waves Radio,BBC Scotland,talkSPORT Scot
2,745394,NJ98315700,C19A,Mormond Hill DAB,225,35,"1.000,000",21/11/2001,220.352,11C,Northsound 1,Northsound 2,Waves Radio,BBC Scotland,talkSPORT Scot
3,745395,NJ94270253,C19A,Redmoss,87,41,"1.199,999",24/05/2016,220.352,11C,Northsound 1,Northsound 2,Waves Radio,BBC Scotland,talkSPORT Scot
9,745401,SO03450129,CE15,Aberdare,282,43,"0.200,000",30/01/2014,225.648,12B,BBC Radio 1,BBC Radio 2,BBC Radio 3,BBC Radio 4,BBC AsianNetwork
10,745402,SO24411270,CE15,Abergavenny,433,47,"0.488,619",11/09/2008,225.648,12B,BBC Radio 1,BBC Radio 2,BBC Radio 3,BBC Radio 4,BBC AsianNetwork
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1209,746601,SJ669343,C18B,Market Drayton,100,24,"0.554,794",16/06/2017,218.640,11B,Free Radio Wolv,BBC Shropshire,BBC WM,Capital,Absolute C Rock
1210,746602,SJ26613178,C18B,Rhosfach Farm,305,27,"0.640,000",07/12/2016,218.640,11B,Free Radio Wolv,BBC Shropshire,BBC WM,Capital,Absolute C Rock
1211,746603,SO92279444,C18B,Sedgley Beacon Lane,225,32,"0.600,000",28/04/2016,218.640,11B,Free Radio Wolv,BBC Shropshire,BBC WM,Capital,Absolute C Rock
1212,746604,SJ62890828,C18B,The Wrekin,396,48,"1.954,474",17/01/2001,218.640,11B,Free Radio Wolv,BBC Shropshire,BBC WM,Capital,Absolute C Rock


In [6]:
# Step 1: Extract DAB multiplex information into new columns
dab_multiplexes = ['C18A', 'C18F', 'C188']
for mux in dab_multiplexes:
    req_df.loc[:, [mux]] = req_df['EID'] == mux

req_df.loc[:,['Power (kW)']] = req_df['In-Use ERP Total'].str.replace('.', '', regex=False).str.replace(',', '.', regex=False).astype(float)
req_df.loc[:,['Aerial height (m)']] = req_df['In-Use Ae Ht'].astype(float)

# Step 3: Filter data by multiplex and include relevant information
result = {}
for mux in dab_multiplexes:
    result[mux] = req_df[req_df[mux]].copy()  # Filter rows where this multiplex is True
    result[mux].drop(columns=dab_multiplexes, inplace=True)
    result[mux].drop(columns=['In-Use ERP Total','In-Use Ae Ht'], inplace=True) # Drop duplicate/irrelevant columns

In [7]:
result["C18A"]

Unnamed: 0,id_dab,NGR,EID,Site,Site Height,Date,Freq.,Block,Serv Label1,Serv Label2,Serv Label3,Serv Label4,Serv Label10,Power (kW),Aerial height (m)
758,746150,NT51617605,C18A,Athelstaneford,175,25/05/2016,229.072,12D,Forth One,FORTH 2,Heart,HITS RADIO UK,Magic Soul,1000.0,18.0
759,746151,NS82836457,C18A,Black Hill,275,10/10/2000,229.072,12D,Forth One,FORTH 2,Heart,HITS RADIO UK,Magic Soul,2399.999,155.0
760,746152,NT25056945,C18A,Braid Hills,202,02/05/2007,229.072,12D,Forth One,FORTH 2,Heart,HITS RADIO UK,Magic Soul,977.237,43.0
761,746153,NT23338724,C18A,CRAIGKELLY,181,11/10/2000,229.072,12D,Forth One,FORTH 2,Heart,HITS RADIO UK,Magic Soul,5999.997,78.0
762,746154,NS72138846,C18A,Earls Hill - DAB,410,12/05/2016,229.072,12D,Forth One,FORTH 2,Heart,HITS RADIO UK,Magic Soul,740.0,49.0
763,746155,NT25117350,C18A,EDINBURGH CASTLE,121,28/03/2018,229.072,12D,Forth One,FORTH 2,Heart,HITS RADIO UK,Magic Soul,27.2,18.0
765,746157,NT25265908,C18A,PENICUIK,248,12/10/2000,229.072,12D,Forth One,FORTH 2,Heart,HITS RADIO UK,Magic Soul,2500.0,43.0


In [8]:
result["C18F"]

Unnamed: 0,id_dab,NGR,EID,Site,Site Height,Date,Freq.,Block,Serv Label1,Serv Label2,Serv Label3,Serv Label4,Serv Label10,Power (kW),Aerial height (m)
841,746233,SE23723509,C18F,Beecroft Hill,149,15/05/2001,229.072,12D,Grt Hits Leeds,Smooth UK,Absolute C Rock,Magic Chilled,Absolute Country,977.237,44.0
844,746236,SE283268,C18F,Morley,120,14/11/2001,229.072,12D,Grt Hits Leeds,Smooth UK,Absolute C Rock,Magic Chilled,Absolute Country,1000.0,59.0


In [9]:
result["C188"]

Unnamed: 0,id_dab,NGR,EID,Site,Site Height,Date,Freq.,Block,Serv Label1,Serv Label2,Serv Label3,Serv Label4,Serv Label10,Power (kW),Aerial height (m)
1190,746582,NZ25156398,C188,Cale Cross House,15,23/11/2017,220.352,11C,METRO Radio,Grt Hits N East,Smooth Radio,HITS RADIO UK,Magic Soul,300.0,62.0
1191,746583,NZ26434233,C188,DURHAM,100,27/07/2016,220.352,11C,METRO Radio,Grt Hits N East,Smooth Radio,HITS RADIO UK,Magic Soul,300.0,27.0
1192,746584,NZ21676488,C188,Fenham,120,15/11/2000,220.352,11C,METRO Radio,Grt Hits N East,Smooth Radio,HITS RADIO UK,Magic Soul,1499.999,35.0
1193,746585,NZ40955565,C188,Hendon,10,27/07/2016,220.352,11C,METRO Radio,Grt Hits N East,Smooth Radio,HITS RADIO UK,Magic Soul,800.0,42.0
1194,746586,NZ21486744,C188,Kenton TE,120,19/09/2016,220.352,11C,METRO Radio,Grt Hits N East,Smooth Radio,HITS RADIO UK,Magic Soul,100.0,21.0
1195,746587,NZ21818645,C188,MORPETH,60,26/04/2016,220.352,11C,METRO Radio,Grt Hits N East,Smooth Radio,HITS RADIO UK,Magic Soul,1100.001,50.0
1196,746588,NZ03586532,C188,NEWTON,165,26/05/2016,220.352,11C,METRO Radio,Grt Hits N East,Smooth Radio,HITS RADIO UK,Magic Soul,600.0,45.0
1197,746589,NU224152,C188,Peppermoor Farm,107,25/08/2016,220.352,11C,METRO Radio,Grt Hits N East,Smooth Radio,HITS RADIO UK,Magic Soul,1199.999,27.0
1198,746590,NZ06555323,C188,SHOTLEYFIELD,199,05/04/2016,220.352,11C,METRO Radio,Grt Hits N East,Smooth Radio,HITS RADIO UK,Magic Soul,300.0,53.0
1200,746592,NZ3501071334,C188,Whitley Bay,40,30/06/2016,220.352,11C,METRO Radio,Grt Hits N East,Smooth Radio,HITS RADIO UK,Magic Soul,100.0,26.0


The client initially needs information to generate the following and output the results using appropriate representation:
- Produce the mean, mode and median for the ‘In-Use ERP Total’ from the extracted DAB multiplexes extracted earlier: C18A, C18F, C188
    - For ‘Site Height’ more than 75
    - For ‘Date’ from 2001 onwards

In [10]:
# Step 1: Filter data for Site Height > 75 and Date >= 2001
stat_results = {}

# Step 2: Extract relevant data for each multiplex
for mux in result:
    df = result[mux]
    df['Date'] = pd.to_datetime(df['Date'], format='%d/%m/%Y')

    filter_df = {
        "Site Height > 75": df[(df['Site Height'] > 75)],
        "Date >= 2001": df[df['Date'].dt.year >= 2001],
    }

    data_key = "Power (kW)"

    for filter_name in filter_df:
        filter_data = filter_df[filter_name]
        # Calculate mean, mode, and median for "Power (kW)"
        if not df.empty:
            # Store results
            if mux not in stat_results:
                stat_results[mux] = {}
            stat_results[mux][filter_name] = {
                'Mean': filter_data[data_key].mean(),
                'Median': filter_data[data_key].median(),
                'Mode': filter_data[data_key].mode().tolist()  # Mode can be multiple values
            }
        else:
            stat_results[mux] = "No data matches the conditions"
# stat_results

# # Step 3: Output results
for mux, stats in stat_results.items():
    print(f"Statistics for {mux}:")
    if isinstance(stats, str):
        print(stats)
        print()
        continue
    for filter_data, st in stats.items():
        print(f"\tFor {filter_data}")
        print(f"\t\tMean: {st['Mean'].round(3)}(kW), Median: {st['Median'].round(3)}(kW), Mode: {', '.join(f'{x:.3f}(kW)' for x in st['Mode'])}")
    print()

Statistics for C18A:
	For Site Height > 75
		Mean: 1949.205(kW), Median: 1000.0(kW), Mode: 27.200(kW), 740.000(kW), 977.237(kW), 1000.000(kW), 2399.999(kW), 2500.000(kW), 5999.997(kW)
	For Date >= 2001
		Mean: 686.109(kW), Median: 858.618(kW), Mode: 27.200(kW), 740.000(kW), 977.237(kW), 1000.000(kW)

Statistics for C18F:
	For Site Height > 75
		Mean: 988.618(kW), Median: 988.618(kW), Mode: 977.237(kW), 1000.000(kW)
	For Date >= 2001
		Mean: 988.618(kW), Median: 988.618(kW), Mode: 977.237(kW), 1000.000(kW)

Statistics for C188:
	For Site Height > 75
		Mean: 666.666(kW), Median: 450.0(kW), Mode: 300.000(kW)
	For Date >= 2001
		Mean: 533.333(kW), Median: 300.0(kW), Mode: 300.000(kW)

