In [None]:
"""
In this file:
- all_data_center_locations, containing firm coordinates to their data centers, is loaded
- a folium map is created
- a marker is added for each data center
- firms logos are added as custom icons displaying the location on the map
- the map is saved as an html file
"""

In [64]:
import pandas as pd

import numpy as np

!pip install folium
import folium
from folium.plugins import MarkerCluster

!pip install -U scikit-learn
from sklearn.cluster import DBSCAN



In [65]:
# Load all_data_center_locations
all_data_center_locations = pd.read_csv('all_data_center_locations_operationalized.csv')

# Drop rows with NaN values in 'Latitude' and 'Longitude' columns
all_data_center_locations = all_data_center_locations.dropna(subset=['Latitude', 'Longitude'])

# Convert to int
all_data_center_locations = all_data_center_locations.dropna(subset=['Number of facilities'])
# all_data_center_locations['Number of facilities'] = all_data_center_locations['Number of facilities'].astype('Int64')
all_data_center_locations['Number of facilities'] = all_data_center_locations['Number of facilities'].apply(int)
all_data_center_locations['Availability zones'] = all_data_center_locations['Availability zones'].astype('Int64')

all_data_center_locations

Unnamed: 0,Organization short name,Geocode,Geolocated Country,Coordinates,Latitude,Longitude,Availability zones,Number of facilities,Year of commissioning,Investment,Renewable,Renewable strategy
0,Alibaba,SGP,Singapore,"(1.352083, 103.819836)",1.352083,103.819836,3,3,2015,,,
1,Alibaba,AUS,Australia,"(-33.8688197, 151.2092955)",-33.868820,151.209295,2,2,2016,,,
2,Alibaba,THA,Thailand,"(13.7563309, 100.5017651)",13.756331,100.501765,1,1,2022,,,
3,Alibaba,MYS,Malaysia,"(3.1319197, 101.6840589)",3.131920,101.684059,2,2,2017,,,
4,Alibaba,IDN,Indonesia,"(-6.1944491, 106.8229198)",-6.194449,106.822920,3,3,2018,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
792,SAP,"USA, OR",United States,"(45.6017716, -121.1848433)",45.601772,-121.184843,,1,,,,
793,SAP,"USA, VA",United States,"(37.5407246, -77.4360481)",37.540725,-77.436048,,1,,,,
794,SAP,"USA, IA",United States,"(41.2619444, -95.8608333)",41.261944,-95.860833,,1,,,,
795,SAP,BRA,Brazil,"(-23.5557714, -46.6395571)",-23.555771,-46.639557,,1,,,,


In [66]:
# List of organizations
organizations = all_data_center_locations['Organization short name'].unique().tolist()
print(organizations)

['Alibaba', 'AWS', 'Digital Realty', 'Equinix', 'Alphabet', 'Iron Mountain', 'Lumen', 'Meta', 'Microsoft', 'NTT', 'Oracle', 'QTS', 'SAP']


In [None]:
# Location View

from folium.plugins import FastMarkerCluster
from branca.element import Template, MacroElement

# Create a new map centered at some location
m = folium.Map(location=[0, 0], zoom_start=3)

# Prepare a list to store the data for the markers
data = []

# Loop over each organization
for org in organizations:
    # Filter the DataFrame for the current organization
    org_data = all_data_center_locations[all_data_center_locations['Organization short name'] == org]
    
    # Loop over each location for the current organization
    for i in range(len(org_data)):
        lat = org_data.iloc[i]['Latitude']
        lon = org_data.iloc[i]['Longitude']
        num_facilities = int(org_data.iloc[i]['Number of facilities'])  # Convert to Python int
        
        
        # Skip this row if either the latitude or longitude is NaN
        if pd.isna(lat) or pd.isna(lon):
            continue

        # Add a small random offset to the coordinates
        lat += np.random.normal(scale=0.001)  # scale determines the standard deviation of the normal distribution
        lon += np.random.normal(scale=0.001)

        # Add a custom icon
        icon = folium.CustomIcon(icon_image=org + '.jpg', icon_size=(15, 15))
        
        # Create a list to hold the tooltip information
        tooltip_info = [f"{org}", f"Number of facilities: {num_facilities}"]

        # Add the additional information if it's not NaN
        if pd.notna(org_data.iloc[i]['Year of commissioning']):
            tooltip_info.append(f"Year of commissioning: {org_data.iloc[i]['Year of commissioning']}")
        if pd.notna(org_data.iloc[i]['Investment']):
            tooltip_info.append(f"Investment: {org_data.iloc[i]['Investment']}")
        if pd.notna(org_data.iloc[i]['Renewable']):
            tooltip_info.append(f"Renewable: {org_data.iloc[i]['Renewable']}")
        if pd.notna(org_data.iloc[i]['Renewable strategy']):
            tooltip_info.append(f"Renewable strategy: {org_data.iloc[i]['Renewable strategy']}")

        # Join the tooltip information with line breaks to create the tooltip
        tooltip = folium.Tooltip('<br>'.join(tooltip_info))

        # Add the marker to the map instead of the cluster
        marker = folium.Marker([lat, lon], icon=icon, tooltip=tooltip)
        marker.add_to(m)

        # Add the data for the marker to the list
        data.append([lat, lon, num_facilities])

# Define the JavaScript function for the custom aggregation
callback = """
function (cluster, markers) {
    var total = 0;
    markers.forEach(function(marker) {
        total += marker.num_facilities;
    });
    return total;
}
"""

# Create a FastMarkerCluster
marker_cluster = FastMarkerCluster(data=data, callback=callback).add_to(m)

# Add a custom JavaScript function to listen for zoom events
template = """
{% macro script(this, kwargs) %}
    map.on('zoomend', function() {
        console.log(map.getZoom());
        if (map.getZoom() > 2) {
            $(".leaflet-marker-icon").css("display", "block");
            $(".leaflet-marker-icon").css("width", "1000px");
            $(".leaflet-marker-icon").css("height", "1000px");
        } else {
            $(".leaflet-marker-icon").css("display", "none");
        }
    });
{% endmacro %}
"""
macro = MacroElement()
macro._template = Template(template)
m.get_root().add_child(macro)

# Display the map
m.save('fast_marker_cluster_map.html')
m

In [None]:
# Location View - Alternative with locations centered around the centroid of the cluster

from folium.plugins import FastMarkerCluster
from branca.element import Template, MacroElement

# Convert latitude and longitude to radians and stack them into a 2D array
coords = np.radians(all_data_center_locations[['Latitude', 'Longitude']].dropna().values)

# Use DBSCAN to cluster locations that are within 50 km of each other
# The earth's radius is approximately 6371 km
db = DBSCAN(eps=50/6371., min_samples=1, algorithm='ball_tree', metric='haversine').fit(coords)

# Add the cluster labels to the DataFrame
all_data_center_locations['Cluster'] = db.labels_

# Create a new map centered at Zurich
m = folium.Map(location=[47.3769, 8.5417], zoom_start=3)

# Prepare a list to store the data for the markers
data = []

# Calculate the centroid of each cluster
centroids = all_data_center_locations.groupby('Cluster')[['Latitude', 'Longitude']].mean()

# Loop over each organization
for org in organizations:
    # Filter the DataFrame for the current organization
    org_data = all_data_center_locations[all_data_center_locations['Organization short name'] == org]
    
    # Loop over each location for the current organization
    for i in range(len(org_data)):
        # Get the cluster this data center belongs to
        cluster = org_data.iloc[i]['Cluster']
        
        # Get the centroid of the cluster
        lat, lon = centroids.loc[cluster]
        
        # Add a small random offset to the centroid coordinates
        lat += np.random.normal(scale=0.01)  # scale determines the standard deviation of the normal distribution
        lon += np.random.normal(scale=0.01)

        # Add a custom icon
        icon = folium.CustomIcon(icon_image=org + '.jpg', icon_size=(17, 17))
        
        # Create a list to hold the tooltip information
        tooltip_info = [f"{org}", f"Number of facilities: {num_facilities}"]

        # Add the additional information if it's not NaN
        if pd.notna(org_data.iloc[i]['Year of commissioning']):
            tooltip_info.append(f"Year of commissioning: {org_data.iloc[i]['Year of commissioning']}")
        if pd.notna(org_data.iloc[i]['Investment']):
            tooltip_info.append(f"Investment: {org_data.iloc[i]['Investment']}")
        if pd.notna(org_data.iloc[i]['Renewable']):
            tooltip_info.append(f"Renewable: {org_data.iloc[i]['Renewable']}")
        if pd.notna(org_data.iloc[i]['Renewable strategy']):
            tooltip_info.append(f"Renewable strategy: {org_data.iloc[i]['Renewable strategy']}")

        # Join the tooltip information with line breaks to create the tooltip
        tooltip = folium.Tooltip('<br>'.join(tooltip_info))

        # Add the marker to the map instead of the cluster
        marker = folium.Marker([lat, lon], icon=icon, tooltip=tooltip)
        marker.add_to(m)

        # Add the data for the marker to the list
        data.append([lat, lon, num_facilities])

# Define the JavaScript function for the custom aggregation
callback = """
function (cluster, markers) {
    var total = 0;
    markers.forEach(function(marker) {
        total += marker.num_facilities;
    });
    return total;
}
"""

# Create a FastMarkerCluster
marker_cluster = FastMarkerCluster(data=data, callback=callback).add_to(m)

# Add a custom JavaScript function to listen for zoom events
template = """
{% macro script(this, kwargs) %}
    map.on('zoomend', function() {
        console.log(map.getZoom());
        if (map.getZoom() > 2) {
            $(".leaflet-marker-icon").css("display", "block");
            $(".leaflet-marker-icon").css("width", "1000px");
            $(".leaflet-marker-icon").css("height", "1000px");
        } else {
            $(".leaflet-marker-icon").css("display", "none");
        }
    });
{% endmacro %}
"""
macro = MacroElement()
macro._template = Template(template)
m.get_root().add_child(macro)

# Display the map
m.save('fast_marker_cluster_map.html')
m

In [78]:
from bs4 import BeautifulSoup

# Open the saved HTML file
with open('fast_marker_cluster_map.html', 'r') as f:
    soup = BeautifulSoup(f, 'html.parser')

# Find the map div
map_div = soup.find('div', {'class': 'folium-map'})

# Create a new body tag
body = soup.new_tag('body')

# Create and add the h1 tag to the body
h1 = soup.new_tag('h1')
h1.string = '  Data Center Map'
h1['style'] = 'font-family: Helvetica;'  # Set the font style
body.append(h1)

# Create and add the a tag to the body
a = soup.new_tag('a', href='marker_cluster_map.html')
a.string = '  👉 Change to Cluster View'
a['style'] = 'font-family: Helvetica;'  # Set the font style
body.append(a)

# Create and add an empty p tag to the body
p = soup.new_tag('p')
body.append(p)

# Add the map div to the new body
body.append(map_div)

# Replace the old body with the new body
soup.body.replace_with(body)

# Write the modified HTML back to the file
with open('fast_marker_cluster_map.html', 'w') as f:
    f.write(str(soup))

In [None]:
# Cluster View

# Create a new map centered at Zurich
m = folium.Map(location=[47.3769, 8.5417], zoom_start=3)

# Create a MarkerCluster with a larger max_cluster_radius
marker_cluster = MarkerCluster(max_cluster_radius=200).add_to(m)

# Create a copy of the original DataFrame
df_duplicate = all_data_center_locations.copy()

# Create a new DataFrame to hold the duplicated rows
df_duplicated = pd.DataFrame(columns=df_duplicate.columns)

# Create a set to hold the unique values
unique_values = set()

# Loop over each row in the original DataFrame
for _, row in df_duplicate.iterrows():
    # Get the number of facilities for this row
    num_facilities = int(row['Number of facilities'])
    
    # Duplicate the row based on the number of facilities
    for _ in range(num_facilities):
        df_duplicated.loc[len(df_duplicated)] = row

# Reset the index of the new DataFrame
df_duplicated.reset_index(drop=True, inplace=True)

# Convert latitude and longitude to numeric type
df_duplicated['Latitude'] = pd.to_numeric(df_duplicated['Latitude'], errors='coerce')
df_duplicated['Longitude'] = pd.to_numeric(df_duplicated['Longitude'], errors='coerce')

# Drop rows with NaN values in 'Latitude' and 'Longitude' columns
df_duplicated = df_duplicated.dropna(subset=['Latitude', 'Longitude'])

# Convert latitude and longitude to radians and stack them into a 2D array
coords = np.radians(df_duplicated[['Latitude', 'Longitude']].values)

# Use DBSCAN to cluster locations that are within 50 km of each other
# The earth's radius is approximately 6371 km
db = DBSCAN(eps=50/6371., min_samples=1, algorithm='ball_tree', metric='haversine').fit(coords)

# Add the cluster labels to the DataFrame
df_duplicated['Cluster'] = db.labels_

# Loop over each cluster
for cluster in set(db.labels_):
    # Filter the DataFrame for the current cluster
    cluster_data = df_duplicated[df_duplicated['Cluster'] == cluster]
    
    # Calculate the mean latitude and longitude of the cluster
    lat = cluster_data['Latitude'].mean()
    lon = cluster_data['Longitude'].mean()
    
    # Loop over each row in the cluster data
    for _, row in cluster_data.iterrows():
        # Get the organization and number of facilities for this row
        org = row['Organization short name']
        num_facilities = row['Number of facilities']
        
        # Check if this location is unique or a duplicate for this organization
        if (org, lat, lon) in unique_values:
            # This is a duplicate, use an empty icon
            icon = folium.Icon(icon=" ", icon_size=(0.1, 0.1))
        else:
            # This is a unique value, use the custom icon and add it to the set of unique values
            icon = folium.CustomIcon(icon_image=org + '.png', icon_size=(35, 35))
            unique_values.add((org, lat, lon))
        
        # Create a list to hold the tooltip information
        tooltip_info = [f"{org}", f"Number of facilities: {num_facilities}"]

        # Add the additional information if it's not NaN
        if pd.notna(row['Year of commissioning']):
            tooltip_info.append(f"Year of commissioning: {row['Year of commissioning']}")
        if pd.notna(row['Investment']):
            tooltip_info.append(f"Investment: {row['Investment']}")
        if pd.notna(row['Renewable']):
            tooltip_info.append(f"Renewable: {row['Renewable']}")
        if pd.notna(row['Renewable strategy']):
            tooltip_info.append(f"Renewable strategy: {row['Renewable strategy']}")

        # Join the tooltip information with line breaks to create the tooltip
        tooltip = folium.Tooltip('<br>'.join(tooltip_info))

        # Add the marker to the cluster instead of the map
        folium.Marker([lat, lon], icon=icon, tooltip=tooltip).add_to(marker_cluster)

# Display the map
m.save('marker_cluster_map.html')
m

In [79]:
from bs4 import BeautifulSoup

# Open the saved HTML file
with open('marker_cluster_map.html', 'r') as f:
    soup = BeautifulSoup(f, 'html.parser')

# Find the map div
map_div = soup.find('div', {'class': 'folium-map'})

# Remove the default margin and padding of the map div
map_div['style'] = 'margin:0;padding:0'

# Create a new body tag
body = soup.new_tag('body')

# Remove the default margin and padding of the body
body['style'] = 'margin:0;padding:0'

# Create and add the h1 tag to the body
h1 = soup.new_tag('h1')
h1.string = '  Data Center Map'
h1['style'] = 'font-family: Helvetica;'  # Set the font style
body.append(h1)

# Create and add the a tag to the body
a = soup.new_tag('a', href='fast_marker_cluster_map.html')
a.string = '  👉 Change to Location View'
a['style'] = 'font-family: Helvetica;'  # Set the font style
body.append(a)

# Create and add an empty p tag to the body
p = soup.new_tag('p')
body.append(p)

# Add the map div to the body
body.append(map_div)

# Replace the old body with the new body
soup.body.replace_with(body)

# Write the modified HTML back to the file
with open('marker_cluster_map.html', 'w') as f:
    f.write(str(soup))