# A look into creating interactive maps with folium

We'll be using the individual datasets at first, to get individual countries' maps. Once we have the work on individual countries, we'll start trying to build a european map

We start by importing the required packages

In [None]:
import countriesAndCities
import dataGathering
import folium
import geopandas as gpd
import pandas as pd
import contextily as ctx
from shapely import wkt

# 1. Austria

In [None]:
def changeKeys(country, valueToChange, newValue):
    '''A method that takes the keys for a country in the largestStations dictionary, and replacey certain values
     @param country: the country with a value to change, of type string
     @param valueToChange: the value in the key to change
     @param newValue: the new value in the key
     @return largestStations: a dictionary with the information, of type dict'''
    listKeys = list(largestStations[country].keys())
    oldKeys = []
    for i in range (len(listKeys)):
        station = listKeys[i]
        if valueToChange in listKeys[i]:
            oldKey = station
            oldKeys.append(oldKey)
            newKey = station.replace(valueToChange, '') + newValue
            largestStations[country][newKey] = largestStations[country][oldKey]
    
    for station in oldKeys:
        largestStations[country].pop(station)
    
    return(largestStations)

We build the dataset the same way we built it for the unified database

In [None]:
largestStations = dict()

urlGermany = 'https://bahnauskunft.info/bahnhoefe-deutschland/'
urlAustria = 'https://www.omio.at/bahnhoefe'

In [None]:
largestStations['Austria']= dataGathering.gather(urlAustria, start=1)

In [None]:
largestStations = changeKeys('Austria', 'Hbf', 'Hauptbahnhof')

In [None]:
largestStations['Austria']

In [None]:
largestStations['Austria']['Wien Westbahnhof'][0]

# 1.1 Stations

In [None]:
stationsAustria = 'Austria/GIP_Betriebsstellen_DelEUV_JSON.json'
stationsAustriaFrame = gpd.read_file(stationsAustria)

In [None]:
columnsToRemove = ['BSTS_ID', 'DB640_CODE', 'OBJECTID', 'GIP_OBID', 'EXTERNALID', 'REGIONALCO', 'VALIDFROM', 'VALIDTO', 'OWNER_NAME', 'PV_EVA_NR', 'ANZ_AUFZUG', 'ANZ_FAHRTR', 'ANZ_UHREN',
                  'ANZ_AKUSTI','ANZ_OPTISC', 'INFOPOINT', 'MUEZ', 'MUEZ_KURZ', 'HILFE_MOBI', 'ANZ_ROLLST', 'ANZ_E_LADE', 'RUD_PARKPL', 'VERIFIZIER',
                  'PUBL_WLAN', 'MUEZ_LANG', 'BEMERKUNG']

In [None]:
for column in columnsToRemove:
    stationsAustriaFrame = stationsAustriaFrame.drop(column, axis=1)

In [None]:
dfStationsAustria = []
for station in largestStations['Austria']:
    tempFrame = stationsAustriaFrame.loc[stationsAustriaFrame['NAME_FPL'] == station]
    dfStationsAustria.append(tempFrame)

In [None]:
workFrameAustria = gpd.GeoDataFrame(pd.concat(dfStationsAustria), crs = 31287)

In [None]:
workFrameAustria = gpd.GeoDataFrame(pd.concat(dfStationsAustria), crs = 31287)

In [None]:
workFrameAustria

In [None]:
workFrameAustria['NAME_FPL'].values[0]

# 1.2 Lines

In [None]:
linesAustria = 'Austria/GIP_Strecken_MLA.json'
linesAustriaFrame = gpd.read_file(linesAustria)

In [None]:
uselessColumns = ['GIP_OBID', 'BST_ID', 'FOW_NAME', 'FRC_NAME', 'REGION', 'VALIDFROM', 'VALIDTO', 'CROSSSECT', 'CROSS_NAME', 
                  'ELEKTRI', 'EXPDATE']

for column in uselessColumns:
    linesAustriaFrame = linesAustriaFrame.drop(column, axis=1)

In the case of the lines, we can't remove any rows of the dataframe. Every line has a bit of information, that cannot be replicated based on other rows

# 2. Visualisation

We now have two sets of working data. We can visualise this data on interactive maps using folium

In [None]:
trainMap = folium.Map(location = [47.5, 14.6], tiles = 'OpenStreetMap', zoom_start=12)



We can now add markers, based on the different train stations, onto the map

In [None]:
austriaStations = [[point.xy[1][0], point.xy[0][0]] for point in workFrameAustria.geometry ]

In [None]:
for i in range (len(austriaStations)):
    coordinates = austriaStations[i]
    station = workFrameAustria['NAME_FPL'].values[i]
    dailyVisitors = largestStations['Austria'][station][0]
    trainMap.add_child(folium.Marker(location = coordinates, popup = station + "<br>" + dailyVisitors + ' daily visitors'))

In [None]:
trainMap

In [None]:
for line in linesAustriaFrame.geometry:
    lineTuple = []
    lineTupleInvert = list(line[0].coords)
    for i in range (len(lineTupleInvert)):
        lineTuple.append([lineTupleInvert[i][1], lineTupleInvert[i][0]])
    folium.PolyLine(lineTuple).add_to(trainMap)

In [None]:
trainMap

# 2. Germany

We can do the exact same for the German train lines

In [None]:
largestStations['Germany'] = dataGathering.gather(urlGermany)

In [None]:
largestStations = changeKeys('Germany', 'Hauptbahnhof', 'Hbf')

# 2.1 German Stations

In [None]:
stations = 'Germany/railwayStationNodes.geojson'

deutscheBahnStations = gpd.read_file(stations)

In [None]:
deutscheBahnStations = deutscheBahnStations.drop('formOfNode', axis = 1)
deutscheBahnStations = deutscheBahnStations.drop('id', axis = 1)

In [None]:
dfListStations = []
for station in (list(largestStations['Germany'].keys())):
    tempFrame = deutscheBahnStations.loc[deutscheBahnStations['geographicalName'] == station]
    dfListStations.append(tempFrame)

In [None]:
workFrameStationsGermany = gpd.GeoDataFrame(pd.concat(dfListStations, ignore_index=True), crs=4258)

In [None]:
workFrameStationsGermany

In [None]:
workFrameStationsGermany = workFrameStationsGermany.drop_duplicates(subset='railwayStationCode')

# 2.2 German train lines

In [None]:
deutscheBahnLines = gpd.read_file('Germany/railwayLines.geojson')

In [None]:
workFrameLines = gpd.GeoDataFrame(pd.concat(dfListLines, ignore_index = True), crs = 4258)

In [None]:
workFrameLines = workFrameLines.drop_duplicates(subset='railwayLineCode')

# 2.3 Visualising German data

In [None]:
germanStationsList = [[point.xy[1][0], point.xy[0][0]] for point in workFrameStationsGermany.geometry ]

In [None]:
for i in range(len(germanStationsList)):
    coordinates = germanStationsList[i]
    station = workFrameAustria['geographicalName'].values[i]
    dailyVisitors = largestStations['Germany'][station][0]
    trainMap.add_child(folium.Marker(location = coordinates, popup=station + "<br>" + dailyVisitors + ' daily visitors'))

In [None]:
trainMap

In [None]:
for line in deutscheBahnLines.geometry:
    lineTuple = []
    lineTupleInvert = list(line[0].coords)
    for i in range (len(lineTupleInvert)):
        lineTuple.append([lineTupleInvert[i][1], lineTupleInvert[i][0]])
    folium.PolyLine(lineTuple).add_to(trainMap)

In [None]:
trainMap

The German data is awfully incomplete. We can add further information on the German rail network by using a global database

In [None]:
globalData = 'wld_trs_railways_wfp.csv'
worldData = pd.read_csv(globalData)

In [None]:
Germany = worldData[worldData['country'] == 'Germany']

In [None]:
Germany['shape'] = Germany['shape'].apply(wkt.loads)

In [None]:
geodata = gpd.GeoSeries(Germany['shape'])

In [None]:
Germany.set_geometry('shape')

In [None]:
for line in geodata.geometry:
    lineTuple = []
    lineTupleInvert = list(line[0].coords)
    for i in range (len(lineTupleInvert)):
        lineTuple.append([lineTupleInvert[i][1], lineTupleInvert[i][0]])
    folium.PolyLine(lineTuple).add_to(trainMap)

In [None]:
trainMap

# 3 A look into the French railway network



---
A look into the French railway network 

We are using databases coming from French government open-data portal and French national railway company (SNCF) : https://www.data.gouv.fr/fr/ and https://ressources.data.sncf.com/pages/accueil/ \
We focus on two databases providing information either on the railway stations or on tne rail connections. \
The databases are : referentiel-gares-voyageurs.geojson and formes-des-lignes-du-rfn.geojson

# 3.1 French Stations

We first select the relevant cities. We focus on a dozen of them, the biggest ones. We chose : Paris, Lyon, Bordeaux, Toulouse, Brest, Marseille, Nice, Strasbourg, Rennes, Motpellier and Grenoble. \
Some of these cities have several stations. For more simplicity, there is a focus on the relevant stations connecting those cities.

In [None]:
f_station = gpd.read_file('referentiel-gares-voyageurs.geojson')

In [None]:
values = ['Paris', 'Lyon', 'Strasbourg', 'Lille', 'Bordeaux', 'Massy', 'Marseille', 'Chessy', 'Brest', 'Rennes', 'Toulouse', 'Nice', 'Montpellier', 'Grenoble']
f_station = f_station[f_station.commune_libellemin.isin(values)] 

f_station = f_station[f_station['segmentdrg_libelle']=='a']	

There is a selection on two criteria : the name of the city and the importance of the line between cities (segmentdrg_libelle). \
There is a focus on national and international railways. \


Note :  \
commune_libellemin = name of the city \
segmentdrg_libelle = a,b or c indicates the size of the railroad

In [None]:
f_station.loc[:,'Country'] = 'France'

Additional column : name of the country and drop of the contingent.

In [None]:
f_station.drop(['tvs', 'tvss',
       'gare_alias_libelle_fronton', 'commune_code', 'uic_code', 'rg_libelle',
       'gare', 'adresse_cp', 'code_gare', 'latitude_entreeprincipale_wgs84',
       'gare_agencegc_libelle', 'departement_libellemin', 'code',
       'gare_drg_on', 'gare_etrangere_on', 'segmentdrg_libelle',
       'gare_regionsncf_libelle', 'longitude_entreeprincipale_wgs84',
       'gare_ut_libelle', 'gare_nbpltf', 'alias_libelle_noncontraint',
       'departement_numero', 'niveauservice_libelle', 'gare_ug_libelle'],1,inplace=True)

In [None]:
f_station

Unnamed: 0,gare_alias_libelle_noncontraint,commune_libellemin,geometry,Country
107,Lyon Part Dieu,Lyon,POINT (4.85935 45.76056),France
378,Paris Montparnasse,Paris,POINT (2.32051 48.84117),France
386,Paris Saint-Lazare,Paris,POINT (2.32533 48.87624),France
496,Paris Gare du Nord,Paris,POINT (2.35515 48.88018),France
497,Lille Europe,Lille,POINT (3.07580 50.63922),France
617,Nice,Nice,POINT (7.26190 43.70456),France
632,Marseille Saint-Charles,Marseille,POINT (5.38041 43.30267),France
776,Toulouse Matabiau,Toulouse,POINT (1.45362 43.61121),France
917,Rennes,Rennes,POINT (-1.67274 48.10352),France
1200,Grenoble,Grenoble,POINT (5.71458 45.19149),France


# 3.2 French railway lines

Now, we select the relevant lines between those cities

In [None]:
lines_france = gpd.read_file('formes-des-lignes-du-rfn.geojson')

We only focus on the exploited lines. We drop the lines that are unexploited or private.

In [None]:
lines_france.drop(lines_france.loc[lines_france['libelle'] != 'Exploitée'].index, inplace = True)

Selection the id of the lines of interest :

In [None]:
values = [ '930000', '752000', '810000', '640000', '566000','956000', '957000', '991300', '752330', '893000', '431000', '420000','834000', '226000', '005000', '905000', '752340', '226310', '005390', '752100' ]

In [None]:
lines_france = lines_france[lines_france.code_ligne.isin(values)] 

# 3.3 Visualisation
And a plot of the beautiful map :

In [None]:
fig,ax = plt.subplots(figsize=(60, 30))
f_station.plot(ax = ax, alpha=0.9, edgecolor='black')
lines_france.plot(ax = ax, alpha=0.9, edgecolor='red')
ctx.add_basemap(ax, crs = lines_france.crs.to_string())
ax.set_axis_off()
ax


# 4 A look into the Belgian railway network

We are using databases coming from Belgian government open-data portal : https://data.gov.be/fr \
We focus on one database providing information on the railway stations and rail connections. \
The base is : tc-trajet-train-statique-sncb.geojson

# 4.1 Belgian railway stations

Because of the small size of the country, there is a focus on the cities of Brussels, Antwerp, and Gent. We also add the connection with France : line between Brussels and Lille in France, and between Antwerp and Rotterdam in the Netherland. \
Because of the poor data , we chose to create our own dataset. The very small amount of datas studied is a key element of such a choice. 
So, there is the creation of a dataframe with relevant coordinates.

In [None]:
df = pd.DataFrame(
    {'City' : ['Brussel', 'Brussel','Gent', 'Atwerpen', 'Rotterdam'],
     'Name': ['Brussel-Zuid','Brussel-Noord','Gent-Sint-Pieters','Antwerpen-Centraal','Rotterdam-Centraal'],
     'Country': ['Belgium', 'Belgium', 'Belgium', 'Belgium', 'Netherland'],
     'Longitude': [4.33545, 4.36015, 3.71083, 4.42115, 4.46883],
     'Latitude': [50.83605, 50.86045, 51.03609, 51.21722, 51.92528]})

Then, transformation of the dataframe to a geopandas one : the coordinates are transformed into a geometry recognized when plotted on a map.

In [None]:
b_station = gpd.GeoDataFrame(
    df, geometry=gpd.points_from_xy(df.Longitude, df.Latitude))

In [None]:
b_station.drop(['Longitude', 'Latitude'],1, inplace = True)
b_station

Unnamed: 0,City,Name,Country,geometry
0,Brussel,Brussel-Zuid,Belgium,POINT (4.33545 50.83605)
1,Brussel,Brussel-Noord,Belgium,POINT (4.36015 50.86045)
2,Gent,Gent-Sint-Pieters,Belgium,POINT (3.71083 51.03609)
3,Atwerpen,Antwerpen-Centraal,Belgium,POINT (4.42115 51.21722)
4,Rotterdam,Rotterdam-Centraal,Netherland,POINT (4.46883 51.92528)


# 4.2 Belgian lines

After selection of the stations, we matched them with their id. Then we have to clean the geodataframe and add the last missing information.

In [None]:
lines_belgium = gpd.read_file('tc-trajet-train-statique-sncb.geojson')

In [None]:
values = [603,140,166,126,259,257,54,274,172]
lines_belgium = lines_belgium[lines_belgium.id.isin(values)] 
lines_belgium.index = range(9)

In [None]:
departure = []
arrival = []
for k in range(9) :
  index_ = lines_belgium.long_name[k].index('--')
  departure.append(lines_belgium["long_name"][k][:index_])
  arrival.append(lines_belgium["long_name"][k][index_+3:])

lines_belgium.loc[:,"Departure"] = departure
lines_belgium.loc[:,"Arrival"] = arrival
lines_belgium.loc[:,"Country_dep"] = ['France', 'Belgium', 'Belgium', 'Belgium', 'Belgium', 'France', 'Netherland','Belgium', 'Belgium']
lines_belgium.loc[:,"Country_arr"] = 'Belgium'
lines_belgium.drop(['short_name', 'long_name', 'id', 'route_type', 'color'],1,inplace=True)

#4.3 Visualisation
And a visualisation of the achieved map :

In [None]:
fig,ax = plt.subplots(figsize=(50, 30))
b_station.plot(ax = ax, alpha=0.9, edgecolor='black')
lines_belgium.plot(ax = ax, alpha=0.9, edgecolor='red')
ctx.add_basemap(ax, crs = lines_belgium.crs.to_string())
ax.set_axis_off()
ax

# 5 The railway network of Switzerland

The base is : 'linie-mit-polygon.geojson'

# 5.1 The display of the lines

In [None]:
import folium
print(folium.__version__)

lines_suisse=gpd.read_file('linie-mit-polygon.geojson')

kanton_map = folium.Map(location=[46.8, 8.33],
                   tiles='OpenStreetMap', zoom_start=7)
kanton_map.choropleth(geo_data=lines_suisse)
kanton_map

# 5.2 We add the stations 

With the stations, with a good map of Switzerland railways. I only choose 95 stations (the longest ones )

In [5]:
#We have to convert the coordinates of the dataframe in order to plot a complete map (meaning of that function)
from pyproj import Proj, transform

def convert(x,y):
    inProj = Proj(init='epsg:2056')
    outProj = Proj(init='epsg:4326')
    x1,y1 = x,y
    x2,y2 = transform(inProj,outProj,x1,y1)
    return [y2,x2]

In [None]:
import folium 
import geopandas as gpd
import pandas as pd 

final_df=pd.read_csv('stations.csv')

map_suisse = folium.Map(location=[46.8, 8.33],
                   tiles='OpenStreetMap', zoom_start=7)
for i in range(0,len(final_df)):
      map_suisse.add_child(folium.Marker(location=convert(final_df['Coord. E'].iloc[i],final_df['Coord. N'].iloc[i]), popup=final_df['Nom (ordre alphab.)'].iloc[i]))

map_suisse.choropleth(geo_data=lines_suisse)
map_suisse

We can see that stations are not always put on lines. It's due to the lack of all our databases...

# 6. Europe Visualisation

In this part, we show a vizualisation of the Europe railways with the wld_trs_railways_wfp.csv

In [None]:
data=pd.read_csv('wld_trs_railways_wfp.csv')

In [None]:
France=data[data['country']=='France']
Belgium=data[data['country']=='Belgium']
Austria=data[data['country']=='Austria']
Germany=data[data['country']=='Germany']
Suisse=data[data['country']=='Switzerland']


In [None]:

Europe=pd.concat([France,Belgium,Austria,Germany,Suisse])

from shapely import wkt
Europe['shape'] = Europe['shape'].apply(wkt.loads)


geodata=gpd.GeoSeries(Europe['shape'])
geodata.plot(figsize=(20,20))


In [None]:
#We create a list with the  main cities of our countries

Europetown=pd.read_csv('european_cities_us_standard.csv',sep=",")

Cities=['Paris','Marseille 01','Lyon 01','Berlin','Hamburg','Mnchen','Antwerpen','Gent Gent','Charleroi','Zrich','Bern','Wien','Gratz','Linz']

Europetown.head()

Europetown2=Europetown[Europetown['European City']=='Lille']
for city in Cities:
    Europetown2=Europetown2.append(Europetown[Europetown['European City']==city])
Europetown2


In [None]:
#We plot the map_suisse
import folium
print(folium.__version__)


europe= folium.Map(location=[45, 5],
                   tiles='OpenStreetMap', zoom_start=5)

for i in range(0,len(Europetown2)):
      europe.add_child(folium.Marker(location=(Europetown2['Latitude'].iloc[i],Europetown2['Longitude'].iloc[i]), popup=Europetown2['European City'].iloc[i]))

europe.choropleth(geo_data=geodata)
europe
