In [22]:
import pandas as pd
import plotly.express as px
import dash_bootstrap_components as dbc
import plotly.graph_objects as go
import geojson

# Load GeoJSON file from Github
# Author: Francesco Schwarz
# source: https://github.com/isellsoap/deutschlandGeoJSON
with open('2_hoch.geo.json') as b:
    bundeslaender = geojson.load(b)

In [23]:
bundeslaender

{"features": [{"geometry": {"coordinates": [[[[9.65046, 49.77634], [9.650968, 49.765152], [9.656839, 49.761452], [9.6404, 49.750141], [9.652028, 49.74276], [9.652208, 49.739029], [9.64654, 49.738991], [9.64672, 49.73526], [9.652379, 49.735302], [9.65255, 49.731571], [9.646889, 49.731529], [9.641079, 49.735222], [9.64141, 49.727749], [9.63018, 49.727669], [9.635949, 49.723991], [9.641581, 49.72403], [9.647408, 49.720341], [9.647579, 49.71661], [9.64192, 49.716572], [9.6421, 49.712841], [9.636459, 49.712799], [9.6315, 49.697842], [9.637129, 49.69788], [9.637469, 49.690422], [9.64878, 49.690502], [9.660488, 49.683109], [9.671889, 49.683182], [9.682979, 49.690701], [9.681969, 49.713058], [9.704829, 49.7132], [9.704669, 49.716919], [9.715789, 49.724449], [9.715949, 49.720722], [9.721829, 49.71703], [9.72246, 49.702122], [9.734221, 49.694759], [9.734711, 49.683601], [9.740411, 49.683632], [9.757231, 49.6912], [9.756611, 49.7061], [9.77344, 49.713661], [9.784719, 49.717472], [9.790429, 49.717

In [24]:
# Read mapbox Token
# mapbox_access_token = 'pk.eyJ1Ijoid2kyMm0wMjAiLCJhIjoiY2xpdDVmMW01MGxmdzN0bzdxNW42Yzk0cCJ9.yaPiBJNIJKWkB-181IeUtg'
# px.set_mapbox_access_token(mapbox_access_token)

In [25]:
##### Incorporate data
# Convert 'Date' from object to Date
df = pd.read_csv('group_rossmann_dataprep.csv', sep=';')
df.head()


Unnamed: 0,Store,StoreType,Assortment,CompetitionDistance,CompetitionOpenSinceMonth,CompetitionOpenSinceYear,Promo2,Promo2SinceWeek,Promo2SinceYear,PromoInterval,State,StateName,DayOfWeek,Date,Sales,Customers,Open,Promo,StateHoliday,SchoolHoliday
0,103,d,c,5210.0,5.0,2015.0,0,,,,BE,Berlin,1,10.06.2013,0,0,0,0,0.0,0
1,894,a,a,190.0,11.0,2012.0,0,,,,NW,NordrheinWestfalen,1,10.06.2013,0,0,0,0,0.0,0
2,972,a,a,14960.0,,,0,,,,ST,SachsenAnhalt,1,10.06.2013,0,0,0,0,0.0,0
3,1081,b,a,400.0,3.0,2006.0,0,,,,BE,Berlin,1,10.06.2013,0,0,0,0,0.0,0
4,665,a,a,90.0,12.0,2012.0,1,14.0,2011.0,"Jan,Apr,Jul,Oct",HE,Hessen,1,24.06.2013,0,0,0,0,0.0,0


In [26]:
df.dtypes

Store                          int64
StoreType                     object
Assortment                    object
CompetitionDistance          float64
CompetitionOpenSinceMonth    float64
CompetitionOpenSinceYear     float64
Promo2                         int64
Promo2SinceWeek              float64
Promo2SinceYear              float64
PromoInterval                 object
State                         object
StateName                     object
DayOfWeek                      int64
Date                          object
Sales                          int64
Customers                      int64
Open                           int64
Promo                          int64
StateHoliday                 float64
SchoolHoliday                  int64
dtype: object

In [27]:
# Convert Column "Date" to DateTime
df["Date"] = pd.to_datetime(df["Date"], format="%d.%m.%Y")
df.dtypes

Store                                 int64
StoreType                            object
Assortment                           object
CompetitionDistance                 float64
CompetitionOpenSinceMonth           float64
CompetitionOpenSinceYear            float64
Promo2                                int64
Promo2SinceWeek                     float64
Promo2SinceYear                     float64
PromoInterval                        object
State                                object
StateName                            object
DayOfWeek                             int64
Date                         datetime64[ns]
Sales                                 int64
Customers                             int64
Open                                  int64
Promo                                 int64
StateHoliday                        float64
SchoolHoliday                         int64
dtype: object

In [28]:
df.head()

Unnamed: 0,Store,StoreType,Assortment,CompetitionDistance,CompetitionOpenSinceMonth,CompetitionOpenSinceYear,Promo2,Promo2SinceWeek,Promo2SinceYear,PromoInterval,State,StateName,DayOfWeek,Date,Sales,Customers,Open,Promo,StateHoliday,SchoolHoliday
0,103,d,c,5210.0,5.0,2015.0,0,,,,BE,Berlin,1,2013-06-10,0,0,0,0,0.0,0
1,894,a,a,190.0,11.0,2012.0,0,,,,NW,NordrheinWestfalen,1,2013-06-10,0,0,0,0,0.0,0
2,972,a,a,14960.0,,,0,,,,ST,SachsenAnhalt,1,2013-06-10,0,0,0,0,0.0,0
3,1081,b,a,400.0,3.0,2006.0,0,,,,BE,Berlin,1,2013-06-10,0,0,0,0,0.0,0
4,665,a,a,90.0,12.0,2012.0,1,14.0,2011.0,"Jan,Apr,Jul,Oct",HE,Hessen,1,2013-06-24,0,0,0,0,0.0,0


In [29]:
# Group By Date and StateName, aggregate by sum of Sales -> nyc.groupby (....).agg(....)
# check results
df_sales=df.groupby(["State", "StateName"], as_index=False).agg({"Sales": "sum"})
df_sales

Unnamed: 0,State,StateName,Sales
0,BE,Berlin,604709903
1,BW,BadenWuerttemberg,355745661
2,BY,Bayern,742361827
3,HE,Hessen,603605998
4,HH,Hamburg,169053366
5,NW,NordrheinWestfalen,1598936483
6,RP,RheinlandPfalz,181243703
7,SH,SchleswigHolstein,618426981
8,SN,Sachsen,402442109
9,ST,SachsenAnhalt,276915114


In [41]:
# Add map of Germany
fig = px.choropleth_mapbox(data_frame = df_sales, 
                           geojson = bundeslaender, 
                           featureidkey = 'properties.id', 
                           locations = 'State', 
                           color = 'Sales',
                           hover_name = 'StateName',
                           color_continuous_scale = "Teal",
                           range_color = (200000000, 1800000000),
                           mapbox_style = "carto-positron",
                           zoom = 4.5, 
                           center = {"lat": 51.165691, "lon": 10.451526},
                           opacity = 0.9,
                           labels = {'Sales':'Sales rate'}
                          )
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
#fig.update_traces(marker_line_width = 2, marker_line_color = 'white')
fig.show()