### **<h1 align="center">Interesting Data - France</h1>**

## Radioinstallations >5W

### Inspect API to find latest data files

In [1]:
import requests

# Dataset ID
dataset_id = "551d4ff3c751df55da0cd89f"

# Fetch dataset metadata
api_url = f"https://www.data.gouv.fr/api/1/datasets/{dataset_id}/"
response = requests.get(api_url)

if response.status_code == 200:
    metadata = response.json()
    # Display all resources
    resources = metadata.get("resources", [])
    if resources:
        for resource in resources:
            print(f"Title: {resource['title']}")
            print(f"URL: {resource['url']}")
            print(f"Format: {resource['format']}")
            print("="*40)
    else:
        print("No resources found in the dataset.")
else:
    print(f"Failed to fetch dataset metadata. HTTP Status Code: {response.status_code}")

Title: Tables supports antennes emetteurs bandes Octobre 2024
URL: https://static.data.gouv.fr/resources/donnees-sur-les-installations-radioelectriques-de-plus-de-5-watts-1/20241106-155941/20241031-export-etalab-data.zip
Format: zip
Title: Tables de reference Novembre 2024
URL: https://static.data.gouv.fr/resources/donnees-sur-les-installations-radioelectriques-de-plus-de-5-watts-1/20241106-155830/20241031-export-etalab-ref.zip
Format: zip
Title: Tables de reference Octobre 2024
URL: https://static.data.gouv.fr/resources/donnees-sur-les-installations-radioelectriques-de-plus-de-5-watts-1/20241004-154513/tables-de-reference-octobre-2024.zip
Format: zip
Title: Tables supports antennes emetteurs bandes Octobre 2024
URL: https://static.data.gouv.fr/resources/donnees-sur-les-installations-radioelectriques-de-plus-de-5-watts-1/20241004-154402/tables-supports-antennes-emetteurs-bandes-octobre-2024.zip
Format: zip
Title: Tables de reference Septembre 2024
URL: https://static.data.gouv.fr/resou

### Get files from API

In [None]:
import requests
import zipfile
import io
import os

# List of ZIP file URLs
zip_urls = [
    "https://static.data.gouv.fr/resources/donnees-sur-les-installations-radioelectriques-de-plus-de-5-watts-1/20241106-155941/20241031-export-etalab-data.zip",
    "https://static.data.gouv.fr/resources/donnees-sur-les-installations-radioelectriques-de-plus-de-5-watts-1/20241106-155830/20241031-export-etalab-ref.zip",
    # Add more URLs here
]

# Create a directory to store extracted files
output_dir = "data/"
os.makedirs(output_dir, exist_ok=True)

for index, zip_url in enumerate(zip_urls, start=1):
    print(f"Processing file {index} of {len(zip_urls)}: {zip_url}")
    try:
        response = requests.get(zip_url)
        if response.status_code == 200:
            with zipfile.ZipFile(io.BytesIO(response.content)) as z:
                z.extractall(output_dir)
                print(f"Extracted files from {zip_url}: {z.namelist()}")
        else:
            print(f"Failed to download the ZIP file: {zip_url}")
    except Exception as e:
        print(f"An error occurred while processing {zip_url}: {e}")

print("All files processed.")

Processing file 1 of 2: https://static.data.gouv.fr/resources/donnees-sur-les-installations-radioelectriques-de-plus-de-5-watts-1/20241106-155941/20241031-export-etalab-data.zip
Extracted files from https://static.data.gouv.fr/resources/donnees-sur-les-installations-radioelectriques-de-plus-de-5-watts-1/20241106-155941/20241031-export-etalab-data.zip: ['SUP_STATION.txt', 'SUP_EMETTEUR.txt', 'SUP_ANTENNE.txt', 'SUP_BANDE.txt', 'SUP_SUPPORT.txt']
Processing file 2 of 2: https://static.data.gouv.fr/resources/donnees-sur-les-installations-radioelectriques-de-plus-de-5-watts-1/20241106-155830/20241031-export-etalab-ref.zip
Extracted files from https://static.data.gouv.fr/resources/donnees-sur-les-installations-radioelectriques-de-plus-de-5-watts-1/20241106-155830/20241031-export-etalab-ref.zip: ['SUP_EXPLOITANT.txt', 'SUP_NATURE.txt', 'SUP_PROPRIETAIRE.txt', 'SUP_TYPE_ANTENNE.txt']
All files processed.


### Import TXT files to a single Dataframe

In [2]:
import pandas as pd
import os

In [3]:
def create_support_df(data_dir):
    """
    Creates a DataFrame containing information about supports, including a column
    that counts how many antennas are associated with each support.

    Parameters:
        data_dir (str): Path to the directory containing .txt files.

    Returns:
        pd.DataFrame: A DataFrame containing support data and a count of associated antennas.
    """
    # Load support data
    sup_support = pd.read_csv(os.path.join(data_dir, "SUP_SUPPORT.txt"), delimiter=";", encoding="ISO-8859-1")

    # Convert DMS to decimal degrees for latitude and longitude in SUP_SUPPORT
    def dms_to_decimal(degrees, minutes, seconds, direction):
        decimal = degrees + (minutes / 60) + (seconds / 3600)
        if direction in ['S', 'W']:
            decimal *= -1
        return decimal

    sup_support['latitude'] = sup_support.apply(
        lambda row: dms_to_decimal(
            row['COR_NB_DG_LAT'], row['COR_NB_MN_LAT'], row['COR_NB_SC_LAT'], row['COR_CD_NS_LAT']
        ), axis=1
    )
    sup_support['longitude'] = sup_support.apply(
        lambda row: dms_to_decimal(
            row['COR_NB_DG_LON'], row['COR_NB_MN_LON'], row['COR_NB_SC_LON'], row['COR_CD_EW_LON']
        ), axis=1
    )
    # Drop old coordinate columns
    sup_support.drop(columns=[
        'COR_NB_DG_LAT', 'COR_NB_MN_LAT', 'COR_NB_SC_LAT', 'COR_CD_NS_LAT',
        'COR_NB_DG_LON', 'COR_NB_MN_LON', 'COR_NB_SC_LON', 'COR_CD_EW_LON'
    ], inplace=True)

    # Load antenna data to count antennas per support
    sup_antenne = pd.read_csv(os.path.join(data_dir, "SUP_ANTENNE.txt"), delimiter=";", encoding="ISO-8859-1")

    # Count antennas per support
    antenna_counts = sup_antenne.groupby('SUP_ID').size().reset_index(name='antenna_count')

    # Merge support data with antenna counts
    support_df = sup_support.merge(antenna_counts, on='SUP_ID', how='left')
    support_df['antenna_count'] = support_df['antenna_count'].fillna(0).astype(int)

    # Load other support-related information and merge
    sup_nature = pd.read_csv(os.path.join(data_dir, "SUP_NATURE.txt"), delimiter=";", encoding="ISO-8859-1")
    sup_proprietaire = pd.read_csv(os.path.join(data_dir, "SUP_PROPRIETAIRE.txt"), delimiter=";", encoding="ISO-8859-1")

    # Merge additional support information
    support_df = (
        support_df
        .merge(sup_nature, on="NAT_ID", how="left")  # Join on structure type
        .merge(sup_proprietaire, on="TPO_ID", how="left")  # Join on owner type
    )

    # Rename columns for easier use
    support_df.rename(columns={
        'SUP_ID': 'support_id',
        'STA_NM_ANFR': 'station_name_anfr',
        'NAT_ID': 'structure_type_id',
        'NAT_LB_NOM': 'structure_type_name',
        'SUP_NM_HAUT': 'support_height',
        'TPO_ID': 'owner_type_id',
        'TPO_LB': 'owner_type_name',
        'ADR_LB_LIEU': 'address_place',
        'ADR_LB_ADD1': 'address_line_1',
        'ADR_LB_ADD2': 'address_line_2',
        'ADR_LB_ADD3': 'address_line_3',
        'ADR_NM_CP': 'postal_code',
        'COM_CD_INSEE': 'insee_code',
        'latitude': 'latitude',
        'longitude': 'longitude',
        'antenna_count': 'antenna_count'
    }, inplace=True)

    return support_df

In [4]:
def create_antennas_df(data_dir):
    """
    Creates a DataFrame containing information about antennas, including associated support data and operator data.

    Parameters:
        data_dir (str): Path to the directory containing .txt files.

    Returns:
        pd.DataFrame: A DataFrame containing antenna data, associated support data, and operator data.
    """
    # Load all required data files
    sup_support = pd.read_csv(os.path.join(data_dir, "SUP_SUPPORT.txt"), delimiter=";", encoding="ISO-8859-1")
    sup_antenne = pd.read_csv(os.path.join(data_dir, "SUP_ANTENNE.txt"), delimiter=";", encoding="ISO-8859-1")
    sup_exploitant = pd.read_csv(os.path.join(data_dir, "SUP_EXPLOITANT.txt"), delimiter=";", encoding="ISO-8859-1")
    sup_type_antenne = pd.read_csv(os.path.join(data_dir, "SUP_TYPE_ANTENNE.txt"), delimiter=";", encoding="ISO-8859-1")
    sup_bande = pd.read_csv(os.path.join(data_dir, "SUP_BANDE.txt"), delimiter=";", encoding="ISO-8859-1", low_memory=False)
    sup_emetteur = pd.read_csv(os.path.join(data_dir, "SUP_EMETTEUR.txt"), delimiter=";", encoding="ISO-8859-1", low_memory=False)

    # Convert DMS to decimal degrees for latitude and longitude in SUP_SUPPORT
    def dms_to_decimal(degrees, minutes, seconds, direction):
        decimal = degrees + (minutes / 60) + (seconds / 3600)
        if direction in ['S', 'W']:
            decimal *= -1
        return decimal

    sup_support['latitude'] = sup_support.apply(
        lambda row: dms_to_decimal(
            row['COR_NB_DG_LAT'], row['COR_NB_MN_LAT'], row['COR_NB_SC_LAT'], row['COR_CD_NS_LAT']
        ), axis=1
    )
    sup_support['longitude'] = sup_support.apply(
        lambda row: dms_to_decimal(
            row['COR_NB_DG_LON'], row['COR_NB_MN_LON'], row['COR_NB_SC_LON'], row['COR_CD_EW_LON']
        ), axis=1
    )
    # Drop old coordinate columns
    sup_support.drop(columns=[
        'COR_NB_DG_LAT', 'COR_NB_MN_LAT', 'COR_NB_SC_LAT', 'COR_CD_NS_LAT',
        'COR_NB_DG_LON', 'COR_NB_MN_LON', 'COR_NB_SC_LON', 'COR_CD_EW_LON'
    ], inplace=True)

    # Merge antenna data with support data
    antennas_df = sup_antenne.merge(sup_support[['SUP_ID', 'latitude', 'longitude', 'STA_NM_ANFR']], on="SUP_ID", how="left")

    # Consolidate STA_NM_ANFR columns
    if 'STA_NM_ANFR_x' in antennas_df.columns and 'STA_NM_ANFR_y' in antennas_df.columns:
        antennas_df['STA_NM_ANFR'] = antennas_df['STA_NM_ANFR_x'].combine_first(antennas_df['STA_NM_ANFR_y'])
        antennas_df.drop(columns=['STA_NM_ANFR_x', 'STA_NM_ANFR_y'], inplace=True)

    # Merge operator and antenna type data
    antennas_df = (
        antennas_df
        .merge(sup_exploitant, left_on="SUP_ID", right_on="ADM_ID", how="left")  # Join on operator
        .merge(sup_type_antenne, on="TAE_ID", how="left")  # Join on antenna type
    )

    # Merge band and emitter data
    sup_bande_emetteur = sup_bande.merge(sup_emetteur, on="EMR_ID", how="left")
    if 'STA_NM_ANFR_x' in sup_bande_emetteur.columns:
        sup_bande_emetteur['STA_NM_ANFR'] = sup_bande_emetteur['STA_NM_ANFR_x'].combine_first(sup_bande_emetteur['STA_NM_ANFR_y'])
        sup_bande_emetteur.drop(columns=['STA_NM_ANFR_x', 'STA_NM_ANFR_y'], inplace=True)

    # Join with band/emitter data on station identifier
    if 'STA_NM_ANFR' in antennas_df.columns and 'STA_NM_ANFR' in sup_bande_emetteur.columns:
        antennas_df = antennas_df.merge(sup_bande_emetteur, on="STA_NM_ANFR", how="left")
    else:
        raise KeyError("'STA_NM_ANFR' column missing in either antennas_df or sup_bande_emetteur.")

    # Rename columns for easier use
    antennas_df.rename(columns={
        'SUP_ID': 'support_id',
        'STA_NM_ANFR': 'station_name_anfr',
        'AER_ID': 'antenna_element_id',
        'TAE_ID': 'antenna_type_id',
        'AER_NB_DIMENSION': 'antenna_dimension',
        'AER_FG_RAYON': 'antenna_radius_flag',
        'AER_NB_AZIMUT': 'antenna_azimuth',
        'AER_NB_ALT_BAS': 'antenna_altitude_bottom',
        'latitude': 'latitude',
        'longitude': 'longitude',
        'ADM_ID': 'operator_id',
        'ADM_LB_NOM': 'operator_name',
        'TAE_LB': 'antenna_type',
        'BAN_ID': 'band_id',
        'EMR_ID': 'emitter_id',
        'BAN_NB_F_DEB': 'frequency_start',
        'BAN_NB_F_FIN': 'frequency_end',
        'BAN_FG_UNITE': 'frequency_unit',
        'EMR_LB_SYSTEME': 'emitter_system',
        'EMR_DT_SERVICE': 'emitter_service_date',
    }, inplace=True)

    return antennas_df

In [5]:
# Directory containing the .txt files
data_directory = "data"

# Load Support and Antenna dataframes
support_df = create_support_df(data_directory)
antenna_df = create_antennas_df(data_directory)

# Display the first few rows of the cleaned DataFrame
#print(support_df.head())

### View on map

In [30]:
import folium
import pandas as pd
from matplotlib import colors, colormaps
import branca

def display_map(dataframe, filters=None, popup_list=None, popup_width=200, gradient_column=None, sample_size=0, tile_layers=None):
    """
    Displays an interactive map with CircleMarkers representing data points, multiple tile layers, and a color gradient legend.

    Parameters:
    - dataframe (pd.DataFrame): DataFrame containing the data (including 'latitude' and 'longitude').
    - filters (dict or None): Dictionary with column names as keys and values or prefixes to filter on.
    - popup_list (list or None): List of columns to include in the popup. If None, include all columns.
    - popup_width (int): Width of the popup in pixels.
    - gradient_column (str or None): Column name to determine the color gradient.
    - sample_size (int): Number of points to sample (if 0, don't sample).
    - tile_layers (list or None): List of tile layer names to include (default: ['OpenStreetMap', 'CartoDB Positron', 'CartoDB Dark_Matter']).

    Returns:
    - folium.Map: A Folium map object.
    """
    # Apply filters if provided
    if filters:
        for column, value in filters.items():
            if column == 'postal_code' and isinstance(value, int):
                # Handle postal code filtering based on the number of digits provided
                value_str = str(value)
                if len(value_str) == 1 or len(value_str) == 2:
                    min_value = value * 1000
                    max_value = (value + 1) * 1000
                    dataframe = dataframe[(dataframe['postal_code'] >= min_value) & (dataframe['postal_code'] < max_value)]
                else:
                    dataframe = dataframe[dataframe['postal_code'] == value]
            else:
                # General filter for exact matches
                dataframe = dataframe[dataframe[column] == value]

    # Check if the DataFrame is empty after filtering
    if dataframe.empty:
        print("Warning: No data available after applying filters. Please check your filter criteria.")
        return folium.Map(location=[46.603354, 1.888334], zoom_start=6, tiles="CartoDB Positron")

    # Sample data if sample_size is greater than 0
    if sample_size > 0 and len(dataframe) > sample_size:
        dataframe = dataframe.sample(sample_size, random_state=42)

    # Clean the gradient column if specified
    if gradient_column:
        # Ensure numeric data in the gradient column
        dataframe.loc[:, gradient_column] = pd.to_numeric(dataframe[gradient_column], errors='coerce')
        dataframe = dataframe.dropna(subset=[gradient_column])  # Drop rows with non-numeric values

        if dataframe.empty:
            print(f"Warning: No valid data available after cleaning the gradient column '{gradient_column}'.")
            return folium.Map(location=[46.603354, 1.888334], zoom_start=6, tiles="CartoDB Positron")

        norm = colors.Normalize(vmin=dataframe[gradient_column].min(), vmax=dataframe[gradient_column].max())
        colormap = colormaps.get_cmap("viridis")

        # Create a color legend using branca
        gradient_legend = branca.colormap.LinearColormap(
            colors=[colors.rgb2hex(colormap(norm(v))) for v in norm([dataframe[gradient_column].min(), dataframe[gradient_column].max()])],
            vmin=dataframe[gradient_column].min(),
            vmax=dataframe[gradient_column].max(),
            caption=f'Color Gradient: {gradient_column}'
        )
    else:
        norm = None
        colormap = None
        gradient_legend = None

    # Center the map on France
    france_center = [46.603354, 1.888334]  # Approximate center of France
    m = folium.Map(location=france_center, zoom_start=6, tiles=None)

    # Add tile layers
    if tile_layers is None:
        tile_layers = ['OpenStreetMap', 'CartoDB Positron', 'CartoDB Dark_Matter']  # Default layers
    for tile in tile_layers:
        try:
            folium.TileLayer(tile, name=tile).add_to(m)
        except ValueError as e:
            print(f"Skipping unsupported tile layer '{tile}': {e}")

    # Add CircleMarkers to the map
    for _, row in dataframe.iterrows():
        # Determine color based on the gradient column
        if gradient_column:
            color = colormap(norm(row[gradient_column]))
            hex_color = colors.rgb2hex(color[:3])  # Convert to hex color
        else:
            hex_color = "#3388ff"  # Default blue color

        # Create popup content dynamically
        popup_content = "<div style='width:{}px;'>".format(popup_width)
        if popup_list is None:
            popup_list = dataframe.columns  # Use all columns if popup_list is not specified
        for col in popup_list:
            value = row[col]
            formatted_value = f"{value:,.2f}" if isinstance(value, (int, float)) else value
            popup_content += f"<b>{col}:</b> {formatted_value}<br>"
        popup_content += "</div>"

        # Add a CircleMarker for each point
        folium.CircleMarker(
            location=[row['latitude'], row['longitude']],
            radius=5,  # Size of the marker
            color=hex_color,
            fill=True,
            fill_color=hex_color,
            fill_opacity=0.7,
            tooltip=f"<b>{gradient_column}:</b> {row[gradient_column]:,.2f}" if gradient_column else None,
            popup=folium.Popup(popup_content, max_width=popup_width)
        ).add_to(m)

    # Add the color gradient legend to the map
    if gradient_legend:
        gradient_legend.add_to(m)

    # Add fixed statistics box with black background, 50% transparency, white text, and rounded corners
    total_supports = dataframe['support_id'].nunique() if 'support_id' in dataframe.columns else len(dataframe)
    total_antennas = dataframe['antenna_count'].sum() if 'antenna_count' in dataframe.columns else 'N/A'

    stats_html = f"""
    <div id="stats-box" style="position: fixed;
                bottom: 20px; left: 20px; width: 300px; height: 60px;
                background-color: rgba(0, 0, 0, 0.5); color: white; z-index: 1000;
                padding: 10px; border: 1px solid black; border-radius: 10px;">
        <b>Total Number of Supports:</b> {total_supports}<br>
        <b>Total Number of Antennas:</b> {total_antennas}
    </div>
    """
    
    # Add the stats_html as a custom script to the map
    m.get_root().html.add_child(folium.Element(stats_html))

    # Add layer control
    folium.LayerControl().add_to(m)

    return m

In [31]:
# Define filters
filters = {'postal_code': 54}

# Define the columns for the popup
popup_columns = None

# Display the map
map_result = display_map(
    dataframe=support_df,
    filters=filters,
    popup_list=popup_columns,
    popup_width=200,
    gradient_column='antenna_count',
    sample_size=0,
    tile_layers = [
        'OpenStreetMap',
        'CartoDB Positron',
        'CartoDB Dark_Matter'
    ]
)

map_result