In [1]:
import pandas as pd

data = pd.read_csv('geo_data_centers_cleaned.csv', sep=',')

In [2]:
data.drop(columns=['Unnamed: 0.1'], inplace=True)
data.drop(columns=['Unnamed: 0'], inplace=True)

In [3]:
print(data.columns)
print(data.info())

Index(['name', 'country', 'city', 'town', 'address', 'total space (sqft)',
       'colocation space (sqft)', 'total power (MW)', 'latitude', 'longitude'],
      dtype='object')
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3278 entries, 0 to 3277
Data columns (total 10 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   name                     3278 non-null   object 
 1   country                  3278 non-null   object 
 2   city                     3276 non-null   object 
 3   town                     3124 non-null   object 
 4   address                  2409 non-null   object 
 5   total space (sqft)       2019 non-null   object 
 6   colocation space (sqft)  1260 non-null   object 
 7   total power (MW)         1467 non-null   object 
 8   latitude                 3260 non-null   float64
 9   longitude                3260 non-null   float64
dtypes: float64(2), object(8)
memory usage: 256.2+ KB
None


In [4]:
location_data = data[data['latitude'].notna() & data['longitude'].notna()]
location_data

Unnamed: 0,name,country,city,town,address,total space (sqft),colocation space (sqft),total power (MW),latitude,longitude
0,EXA Infrastructure,switzerland,Geneva,1217 Meyrin,Chemin de l'Epinglier 2,,,,46.201756,6.146601
1,NexTDC,australia,Perth WA,11 Newcastle Street,,129166.93,,20,-31.955893,115.860585
2,OVHCloud,usa,Virginia 20187,Warrenton,6872 Watson Ct,,,,33.711823,-117.792414
3,LeaseWeb,germany,Kalbach-Riedberg,60437 Frankfurt am Main,Heinrich-Lanz-Allee 47,215278,,10,50.176622,8.632078
4,Amazon AWS,usa,VA,Chantilly,43701 Clubber Ln,150000,,,37.123224,-78.492772
...,...,...,...,...,...,...,...,...,...,...
3273,rne Global,uk,London,36-43 Great Sutton Street,,91000,,9.6,51.507446,-0.127765
3274,Iron Mountain Data Centers,usa,NJ,Edison,3003 Woodbridge Avenue,830000,,25.6,40.075738,-74.404162
3275,Iron Mountain Data Centers,netherlands,Haarlem,J.W. Lucasweg 35,,182986,,22.7,52.383706,4.643560
3276,Telstra,singapore,Tai Seng,,,45000,45000,,1.335383,103.888306


In [5]:
def clean_total_power(value):
    if pd.isna(value):
        return 0.0
    if isinstance(value, str):
        value = value.replace('MW', '').strip()
    try:
        value = float(value)
        if value > 1000000:
            value /= 1000000
        elif value > 100:
            value /= 1000
        return value
    except ValueError:
        return 0.0


# Apply the function to the total power column
location_data['total power (MW)'] = location_data['total power (MW)'].apply(clean_total_power)
location_data

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  location_data['total power (MW)'] = location_data['total power (MW)'].apply(clean_total_power)


Unnamed: 0,name,country,city,town,address,total space (sqft),colocation space (sqft),total power (MW),latitude,longitude
0,EXA Infrastructure,switzerland,Geneva,1217 Meyrin,Chemin de l'Epinglier 2,,,0.0,46.201756,6.146601
1,NexTDC,australia,Perth WA,11 Newcastle Street,,129166.93,,20.0,-31.955893,115.860585
2,OVHCloud,usa,Virginia 20187,Warrenton,6872 Watson Ct,,,0.0,33.711823,-117.792414
3,LeaseWeb,germany,Kalbach-Riedberg,60437 Frankfurt am Main,Heinrich-Lanz-Allee 47,215278,,10.0,50.176622,8.632078
4,Amazon AWS,usa,VA,Chantilly,43701 Clubber Ln,150000,,0.0,37.123224,-78.492772
...,...,...,...,...,...,...,...,...,...,...
3273,rne Global,uk,London,36-43 Great Sutton Street,,91000,,9.6,51.507446,-0.127765
3274,Iron Mountain Data Centers,usa,NJ,Edison,3003 Woodbridge Avenue,830000,,25.6,40.075738,-74.404162
3275,Iron Mountain Data Centers,netherlands,Haarlem,J.W. Lucasweg 35,,182986,,22.7,52.383706,4.643560
3276,Telstra,singapore,Tai Seng,,,45000,45000,0.0,1.335383,103.888306


In [6]:
max(location_data['total power (MW)'].to_list())

100.0

In [9]:
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
from dash import dcc, html
import plotly.graph_objects as go
import pandas as pd
import numpy as np

# Create a Dash application
app = dash.Dash(__name__)

# Define the layout with a dropdown for filtering
app.layout = html.Div(children=[
    html.H1("Data Centre Map"),
    dcc.Dropdown(
        id='power-filter',
        options=[
            {'label': 'All Points', 'value': 'ALL'},
            {'label': 'With Power Values', 'value': 'WITH_POWER'},
            {'label': 'Without Power Values', 'value': 'WITHOUT_POWER'}
        ],
        value='ALL'  # Default value
    ),
    dcc.Graph(
        id='map-graph'
    )
])

# Define the callback to update the graph based on dropdown selection
@app.callback(
    Output('map-graph', 'figure'),
    [Input('power-filter', 'value')]
)
def update_graph(filter_value):
    # Filter data based on dropdown selection
    if filter_value == 'ALL':
        filtered_data = location_data
    elif filter_value == 'WITH_POWER':
        filtered_data = location_data[location_data['total power (MW)'] > 0]
    elif filter_value == 'WITHOUT_POWER':
        filtered_data = location_data[location_data['total power (MW)'] == 0]

    # Define marker sizes and colors
    marker_sizes = filtered_data['total power (MW)'].fillna(0).apply(lambda x: 7 if x == 0 else x / 3)
    marker_colors = ['blue' if x == 0 else 'red' for x in filtered_data['total power (MW)']]  # Colors based on power values

    # Create the updated figure
    fig = go.Figure()
    fig.add_trace(go.Scattergeo(
        lon=filtered_data['longitude'],
        lat=filtered_data['latitude'],
        text=filtered_data.apply(lambda row: f"Power: {row['total power (MW)']} MW<br>Colocation Space: {row['colocation space (sqft)']} sqft<br>Total Space: {row['total space (sqft)']} sqft", axis=1),
        marker=dict(
            size=marker_sizes,  # Constant size
            color=marker_colors,  # Color based on power values
            colorscale='Viridis',  # Optional, as colors are manually set
            #colorbar=dict(title='Power (MW)'),
            line=dict(width=0)
        )
    ))

    fig.update_layout(
        title='Geographical Distribution of Data Centres',
        geo=dict(
            showland=True,
            landcolor="green",  # Land color
            bgcolor="lightblue",  # Ocean color (background color)
            projection=dict(
                type='orthographic',
                rotation=dict(lon=-100, lat=40)
            ),
            scope='world',
            showcountries=True
        ),
    )

    return fig

app.run_server(debug=True)


: 