Download dependencies:

In [None]:
!pip install zipp
!pip install urllib3==1.26.6
!pip install requests
!pip install pandas
!pip install plotly
!pip install numpy==1.23.4
!pip install datashader
!pip install dash

Import Dependencies:

In [None]:
from zipfile import ZipFile
import requests
import pandas as pd
import plotly as plot
import plotly.express as px
import numpy as np
import plotly.graph_objects as go
from math import radians, sin, cos, sqrt, atan2, floor
import datashader as ds

import dash
from dash import dcc
from dash import html

Download the file:

In [None]:
url = 'https://download-data.deutschebahn.com/static/datasets/wifi/20171212_wifionice.zip'
response = requests.get(url)

with open('20171212_wifionice.zip', 'wb') as f:
    f.write(response.content)

Extract the csv file and read dataset:

In [None]:
with ZipFile('20171212_wifionice.zip', 'r') as zip_file:
    zip_file.extract('surveyor_hackathon_data_20171212.csv')

data = pd.read_csv('surveyor_hackathon_data_20171212.csv', sep=';', low_memory=False)

Clean Dataset

In [None]:
data = data.dropna(subset=["gps_breite"])
data = data.dropna(subset=["gps_laenge"])

data = data.drop(data[data["gps_v"] > 85].index)
data = data.dropna(subset=["gps_v"])

data['tprx'] = data['tprx'].fillna(value=0)
data['tptx'] = data['tptx'].fillna(value=0)

data = data.dropna(subset=["pax_auth"])

data = data.drop(['gps_hoehe', 'sat', 'gps_richtung', 'pax_total', 'link_id', 'link_gw_conn'], axis=1)

data.columns = ['sid', 'created', 'lat', 'lon', 'speed', 'devicecount', 'received_byte', 'sent_byte', 'ping']

data['created'] = pd.to_datetime(data['created'])
data = data.groupby(['sid', pd.Grouper(key='created', freq='1Min')]).first().reset_index()

data['sid'] = data['sid'].astype(np.int64)
data['devicecount'] = data['devicecount'].astype(int)

data['speed_in_km/h'] = data['speed'] * 3.6
data['sent_byte_per_user'] = data['sent_byte']/data['devicecount']
data['down_byte_per_user'] = data['received_byte'] / data['devicecount']

Save smaller Dataset

In [None]:
data.to_csv('clean_data.csv', index=False)

Load smaller Dataset

In [None]:
data = pd.read_csv('clean_data.csv')

Splitting the dataset in smaller ones for the visualization

In [None]:
upload_data = data[['sid','devicecount','sent_byte','sent_byte_per_user']]
download_data = data[['sid','devicecount','received_byte','down_byte_per_user']]
download_speed_data = data[['sid','speed_in_km/h','received_byte']]
upload_speed_data = data[['sid','speed_in_km/h','sent_byte']]


grid_cell_size = 0.001

def assign_grid_cell(lat, lon):
    lat_cell = floor(lat / grid_cell_size)
    lon_cell = floor(lon / grid_cell_size)
    return f"{lat_cell}_{lon_cell}"

data['grid_cell'] = data.apply(lambda row: assign_grid_cell(row['lat'], row['lon']), axis=1)

space_data = data.groupby('grid_cell').first().reset_index()

space_data = space_data.drop('grid_cell', axis=1)

Scatter Plot for Upload and User correlation

In [None]:
fig_sent_byte = px.scatter(upload_data, x='sent_byte_per_user', y='devicecount', title='Sent Bytes Scatter Plot')
fig_sent_byte.update_xaxes(type='log')
fig_sent_byte.update_traces(marker=dict(size=2))
fig_sent_byte.show()

Scatter Plot for Download and Speed correlation

In [None]:
y_axis_range = [0,350]
fig_speed_down = px.scatter(download_speed_data, x='received_byte', y='speed_in_km/h', title='Received Bytes per speed')
fig_speed_down.update_xaxes(type='log')
fig_speed_down.update_layout(yaxis=dict(range=y_axis_range))
fig_speed_down.update_traces(marker=dict(size=2))
fig_speed_down.show()

Scatter Plot for Download and User correlation

In [None]:
fig_down_byte = px.scatter(download_data, x='down_byte_per_user', y='devicecount', title='Download Bytes Scatter Plot')
fig_down_byte.update_xaxes(type='log')
fig_down_byte.update_traces(marker=dict(size=2))
fig_down_byte.show()

Scatter Plot for Upload and Speed correlation

In [None]:
y_axis_range = [0,350]
fig_speed_up = px.scatter(upload_speed_data, x='sent_byte', y='speed_in_km/h', title='Send Bytes per speed')
fig_speed_up.update_xaxes(type='log')
fig_speed_up.update_layout(yaxis=dict(range=y_axis_range))
fig_speed_up.update_traces(marker=dict(size=2))
fig_speed_up.show()

Map Scatterplot

In [None]:
fig_map = go.Figure(go.Scattergeo(
    lat=space_data['lat'],  # Latitude column from DataFrame
    lon=space_data['lon'],  # Longitude column from DataFrame
    mode='markers',
    marker=dict(
        size=5,
        color=space_data['received_byte'],
        colorscale='Viridis',
        opacity=0.7,
        colorbar=dict(title='Download rate')
    ),
))

# Set the layout properties for the map
fig_map.update_layout(
    geo=dict(
        projection_type='natural earth',
        center=dict(lat=51.1657, lon=10.4515),
        scope='europe',
        fitbounds="locations"
    ),
    showlegend=True
)

# Show the map
fig_map.show()

Heatmap of device vs speed

In [None]:
cvs = ds.Canvas(plot_width=100, plot_height=100)
agg = cvs.points(data, 'devicecount', 'sent_byte')
zero_mask = agg.values == 0
agg.values = np.log10(agg.values, where=np.logical_not(zero_mask))
agg.values[zero_mask] = np.nan
fig_heat = px.imshow(agg, origin='lower', labels={'color':'Log10(count)'})
fig_heat.update_traces(hoverongaps=False)
fig_heat.update_layout(coloraxis_colorbar=dict(title='Count', tickprefix='1.e'))
fig_heat.show()

Some Dash

In [None]:
# Create the Dash application
app = dash.Dash()

# Define your Plotly figures
figure1 = fig_map
figure2 = fig_speed_up
figure3 = fig_heat
figure4 = fig_down_byte

app.layout = html.Div(
    [
        html.H1("Internet speeds on ICE"),
        html.Div(
            [
                html.Div(
                    dcc.Graph(figure=figure1),
                    style={'width': '45%', 'height': '500px'}
                ),
                html.Div(
                    dcc.Graph(figure=figure2),
                    style={'width': '45%', 'height': '500px'}
                ),
                html.Div(
                    dcc.Graph(figure=figure3),
                    style={'width': '45%', 'height': '500px'}
                ),
                html.Div(
                    dcc.Graph(figure=figure4),
                    style={'width': '45%', 'height': '500px'}
                ),
            ],
            style={'display': 'flex', 'flex-wrap': 'wrap', 'justify-content': 'space-evenly'},
        ),
    ],
    style={'text-align': 'center'}
)


app.run_server(debug=True, use_reloader=False)