In [21]:
import numpy as np
import pandas as pd
import geopandas as gpd

from lets_plot import *
from lets_plot.mapping import as_discrete
from lets_plot import tilesets            # Tilesets for the map.
from lets_plot.geo_data import *          # Geo-coding

LetsPlot.setup_html()

In [22]:
df_volc = pd.read_csv("kaggle/input/volcanoes-in-indonesia/volcano_indo.csv", encoding ='utf-8')
df_volc.head(3)

Unnamed: 0.1,Unnamed: 0,Name,Shape,Elevation,Last_Eruption,Geo_Location,Island
0,0,Weh,stratovolcano,"617 metres (2,024 ft)",Pleistocene,﻿5.82°N 95.28°E﻿,Sumatra
1,1,Seulawah Agam,stratovolcano,"1,810 metres (5,940 ft)",1839 (2),﻿5.448°N 95.658°E﻿,Sumatra
2,2,Peuet Sague,complex volcano,"2,801 metres (9,190 ft)",25 December 2000 (2),﻿4.914°N 96.329°E﻿,Sumatra


In [23]:
def dms_to_decimal(dms_string):
    """
    Convert DMS coordinates to decimal degrees.
    
    >>> dms_to_decimal("99.539°E")
    99.539
    
    >>> dms_to_decimal("99.539°W")
    -99.539
    
    """
    degrees, direction = dms_string.split('°')
    degrees = float(degrees)
    
    # Adjusting the sign based on direction
    if direction in ['S', 'W']:
        degrees *= -1
    
    return degrees

In [24]:
df_volc = df_volc.dropna(subset=['Geo_Location'])

# Split 'Geo_Location' str.
lat_lon_dms = df_volc.Geo_Location.str.split(' ').apply(lambda lst: lst[1:3])

# Remove BOM symbols (\ufeff) and
# convert DMS strings to decimal degrees.
lat_lon_dd = lat_lon_dms.apply(lambda lst: [dms_to_decimal(v.replace('\ufeff', '')) for v in lst])

# Create a Geodataframe by adding the 'geometry' column.
latitudes = lat_lon_dd.apply(lambda pair: pair[0])
longitudes = lat_lon_dd.apply(lambda pair: pair[1])

gdf_volc = gpd.GeoDataFrame(df_volc, geometry=gpd.points_from_xy(longitudes, latitudes))
gdf_volc.head(3)

Unnamed: 0.1,Unnamed: 0,Name,Shape,Elevation,Last_Eruption,Geo_Location,Island,geometry
0,0,Weh,stratovolcano,"617 metres (2,024 ft)",Pleistocene,﻿5.82°N 95.28°E﻿,Sumatra,POINT (95.28000 5.82000)
1,1,Seulawah Agam,stratovolcano,"1,810 metres (5,940 ft)",1839 (2),﻿5.448°N 95.658°E﻿,Sumatra,POINT (95.65800 5.44800)
2,2,Peuet Sague,complex volcano,"2,801 metres (9,190 ft)",25 December 2000 (2),﻿4.914°N 96.329°E﻿,Sumatra,POINT (96.32900 4.91400)


In [25]:
gdf_superv = gdf_volc[gdf_volc.Shape == "supervolcano"]
gdf_superv

Unnamed: 0.1,Unnamed: 0,Name,Shape,Elevation,Last_Eruption,Geo_Location,Island,geometry
7,7,Toba,supervolcano,"2,157 metres (7,077 ft)",75000 BC (8),﻿2.58°N 98.83°E﻿,Sumatra,POINT (98.83000 2.58000)


In [27]:
common_theme = theme(plot_title=element_text(family='bold', size=20, hjust=0.5, color='#1a1a1a'))

# Toba is a super-volcano
gdf_toba = gdf_volc[gdf_volc.Name == "Toba"]
toba = gdf_toba.geometry.iloc[0]
toba_le = gdf_toba.Last_Eruption.iloc[0] 

In [42]:
(ggplot() + geom_livemap(tiles=tilesets.NASA_COLOR_SHADED_RELIEF_30M) 
  # Equator
  + geom_hline(yintercept=0, linetype="dotted", color='white', tooltips=layer_tooltips().line('Equator'))

  # Show supervolcano Toba.
  + geom_point(data=gdf_superv, shape=0, color="red", size=20) 
  + geom_curve(x=toba.x + 4, 
               y=toba.y + 4, 
               xend=toba.x, 
               yend=toba.y,
               size_start=8, 
               size_end=23, 
               curvature=-0.4, 
               arrow=arrow(angle=20, length=8, type='open', ends='last'),
               color="white") 
  + geom_text(x=toba.x + 4, y=toba.y + 4,
              label="Supervolcano Toba\nlast eruption " + toba_le, 
              vjust='bottom',
              color="white") 
 
  # Add other volcanoes.
  + geom_point(data=gdf_volc, shape=21,fill="red", color="red", tooltips=layer_tooltips()
               .title("@Name\n(@Shape)") 
               .line("@Geo_Location")
               .line("@Elevation")
               .line("Last Eruption | @Last_Eruption")
              ) 

  + ggtitle("Volcanoes of Indonesia") + common_theme
  + ggsize(1000, 750))


In [32]:
df_erupt = pd.read_csv("kaggle/input/volcanoes-in-indonesia/Major_eruption.csv", encoding ='utf-8')
df_erupt.head()

Unnamed: 0.1,Unnamed: 0,Eruption date,Volcano,Cessation date,VEI,Characteristics,Tsunami,Tephra volume,Fatality
0,1,21 December 2018,Anak Krakatoa,10 January 2019,4,"cv,se,pf,fa,lm,cc",1–2 m,,437
1,2,13 February 2014,Kelut,15 February 2014,4,"cv,cl,pf,ph,ld,lm",no,0.16 km3,4
2,3,3 November 2010,Merapi,8 November 2010,4,"cv,pf,ld,lm",no,,353
3,4,10 February 1990,Kelut,March 1990,4,"cv,cl,pf,ph,ld,lm",no,0.13 km3,35
4,5,18 July 1983,Colo,December 1983,4,"cv,pf,ph",no,,0


In [31]:
(ggplot(df_erupt) 
     + geom_bar(aes(x=as_discrete('Volcano', order_by='..count..', order=1)),
                fill="#ef8a62",
                labels=layer_labels(['..count..']).format('..count..', 'd')) 
     + coord_flip() + labs(y="Number of Major Eruptions on Record"))

In [34]:
# Cleanup "Fatality"

# Remove Toba.
df_erupt = df_erupt[df_erupt.Volcano != 'Toba']

# Drop n/a values.
df_erupt = df_erupt.dropna(subset=['Fatality']).copy()

#
# Convert string values to numbers.
#
import re

# Define a function to remove non-numeric symbols.
clean_numeric = lambda x: re.sub(r'\D', '', x) if isinstance(x, str) else str(x)

# Cleanup strings.
df_erupt['Fatality'] = df_erupt['Fatality'].apply(clean_numeric)

# str -> number
df_erupt['Fatality'] = pd.to_numeric(df_erupt['Fatality'])

# Remove uninformative records
df_erupt = df_erupt[df_erupt.Fatality > 0].copy()

In [35]:
(ggplot(df_erupt) 
 + geom_bar(aes(x=as_discrete('Volcano', order_by='..count..', order=1), weight='Fatality'),
                fill="#ef8a62",
                labels=layer_labels(['..count..'])) 
 + labs(y='Fatalities Caused by Volcanic Activity')
 + coord_flip() + ggsize(1000, 300))

In [None]:
#tesdoang