In [None]:
import pandas as pd  # dataset handling
import geopandas as gpd  # geodataset handling
import numpy as np

# Kepler.gl is a tool created by the Uber's Visualization Team for visual exploration of large scale geospatial data
from keplergl import KeplerGl  # geospatial visualization 

import warnings
warnings.filterwarnings("ignore")

# Data

In [None]:

# Import as a pandas DataFrame
df_sensors = pd.read_csv(
    '../data/cleaned_sensors_dwd_train.csv',
    index_col=0
).dropna(subset=['PM10', 'PM2p5'])

# make dummies
timestamps = pd.Series(df_sensors['timestamp'].unique(), name='timestamp')
cities = pd.Series(df_sensors['city'].unique(), name='city')

dummies = pd.DataFrame(data={
    'location_id': -1,
    'lat': [0, 90],
    'lon': [0, 90],
    'PM10': [0, df_sensors['PM10'].max()],
    'PM2p5': [0, df_sensors['PM2p5'].max()],
})

dummies = dummies.merge(cities, how='cross').merge(timestamps, how='cross')

df_sensors = pd.concat([df_sensors, dummies])

df_sensors = df_sensors.query('city=="Frankfurt"')

# Show the DataFrame
df_sensors[df_sensors['location_id']==-1].head()

In [None]:

# # Import as a pandas DataFrame
# df_sensors = pd.read_csv(
#     '../data/cleaned_sensors_dwd_train.csv',
#     index_col=0
# ).dropna(subset=['PM10', 'PM2p5'])

pm2p5_bins = np.arange(0, 50, 5)
pm2p5_labels = pm2p5_bins
pm2p5_bins = np.append(pm2p5_bins, 1000)
pm2p5_bins[0] = -1

pm10_bins = np.arange(0, 50, 5)
pm10_labels = pm10_bins
pm10_bins = np.append(pm10_bins, 2000)
pm10_bins[0] = -1

print(pm2p5_bins)

df_sensors['PM2p5_bins'] = pd.cut(df_sensors['PM2p5'], bins=pm2p5_bins, labels=pm2p5_labels).astype(int)
df_sensors['PM10_bins'] = pd.cut(df_sensors['PM10'], bins=pm10_bins, labels=pm10_labels).astype(int)

df_sensors = df_sensors.query('city=="Frankfurt"')

# # Show the DataFrame
df_sensors.head()

In [None]:
# Make the geo DataFrame
gdf_sensors = gpd.GeoDataFrame(
    df_sensors, 
    geometry=gpd.points_from_xy(
        x=df_sensors['lon'],
        y=df_sensors['lat']
    )
)


# Show the DataFrame
gdf_sensors.head()

# kepler.gl

In [None]:
gdf_sensors

In [None]:
gdf_sensors.info()

In [None]:
# Creating a Datetime column (Kepler is funny about datetimes)
gdf_sensors['timestamp'] = pd.to_datetime(gdf_sensors['timestamp'])
gdf_sensors['timestamp'] = gdf_sensors['timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S')

# Selecting only columns we need
gdf_sensors = gdf_sensors[[
    'PM10', 'PM2p5', 'lon', 'lat', 'geometry', 'timestamp',  'PM2p5_bins'
]]

gdf_sensors

In [None]:
# drop columns with NaNs
gdf_sensors.drop(gdf_sensors[gdf_sensors['PM2p5'].isnull()].index, axis=0, inplace=True)
gdf_sensors.drop(gdf_sensors[gdf_sensors['PM10'].isnull()].index, axis=0, inplace=True)

In [None]:
gdf_sensors['PM10'] = gdf_sensors['PM10'].astype(int)
gdf_sensors['PM2p5'] = gdf_sensors['PM2p5'].astype(int)

In [None]:
gdf_sensors.info()

## Create map and save it

In [None]:
%run config.py
map_config = config

In [None]:
kepler_map = KeplerGl(
    height=700,
    data={
        'Sensors': gdf_sensors,
    }, config=map_config
)

#kepler_map

In [None]:
kepler_map

In [None]:
# # Save kepler_map config to a file
# with open('config.py', 'w') as f:
#    f.write('config = {}'.format(kepler_map.config))


In [None]:

# kepler_map.save_to_html(data={
#         'Sensors': gdf_sensors,
#     } , config=kepler_map.config,file_name='Sensors.html')