In [2]:
import csv
import matplotlib.pyplot as plt
import matplotlib as mlp
import pandas as pd
import numpy as np
import seaborn as sns
from PIL import Image
import plotly
import plotlywidget
sns.set_theme(style="darkgrid")


In [3]:
!pip install pycountry
!pip install pycountry_convert
from pycountry_convert import country_alpha2_to_continent_code, country_name_to_country_alpha2, convert_continent_code_to_continent_name

Collecting pycountry
  Downloading pycountry-20.7.3.tar.gz (10.1 MB)
[K     |████████████████████████████████| 10.1 MB 5.4 MB/s 
[?25hBuilding wheels for collected packages: pycountry
  Building wheel for pycountry (setup.py) ... [?25l[?25hdone
  Created wheel for pycountry: filename=pycountry-20.7.3-py2.py3-none-any.whl size=10746883 sha256=d0423049065f1fb10a66cf31582754ec0675d6150f55ba1bad2e3c55b92b5b17
  Stored in directory: /root/.cache/pip/wheels/57/e8/3f/120ccc1ff7541c108bc5d656e2a14c39da0d824653b62284c6
Successfully built pycountry
Installing collected packages: pycountry
Successfully installed pycountry-20.7.3
Collecting pycountry_convert
  Downloading pycountry_convert-0.7.2-py3-none-any.whl (13 kB)
Collecting repoze.lru>=0.7
  Downloading repoze.lru-0.7-py3-none-any.whl (10 kB)
Collecting pprintpp>=0.3.0
  Downloading pprintpp-0.4.0-py2.py3-none-any.whl (16 kB)
Collecting pytest-cov>=2.5.1
  Downloading pytest_cov-3.0.0-py3-none-any.whl (20 kB)
Collecting pytest-mock>=1.6

In [4]:
volcanodf = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-05-12/volcano.csv')
eruptionsdf = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-05-12/eruptions.csv')

print(volcanodf.dtypes)

volcano_number                int64
volcano_name                 object
primary_volcano_type         object
last_eruption_year           object
country                      object
region                       object
subregion                    object
latitude                    float64
longitude                   float64
elevation                     int64
tectonic_settings            object
evidence_category            object
major_rock_1                 object
major_rock_2                 object
major_rock_3                 object
major_rock_4                 object
major_rock_5                 object
minor_rock_1                 object
minor_rock_2                 object
minor_rock_3                 object
minor_rock_4                 object
minor_rock_5                 object
population_within_5_km        int64
population_within_10_km       int64
population_within_30_km       int64
population_within_100_km      int64
dtype: object


In [5]:
print(volcanodf.shape)

volcanodf = volcanodf[(volcanodf['last_eruption_year'] != 'Unknown')]

volcanodf = volcanodf.astype({"last_eruption_year": int})
sorted_volcano = volcanodf.sort_values(by="last_eruption_year", ascending = True)[["volcano_number", "volcano_name", "country", "last_eruption_year",
                                                                                  "population_within_5_km", "population_within_10_km", "population_within_30_km", "population_within_100_km"]]

cut_labels = ['<-8000', '<-6000', '<-4000', '<-2000', '<0', '>0', '>2000']
cut_bins = [-12000, -8000, -6000, -4000, -2000, 0, 2000, 3000]

print(sorted_volcano.shape)

print(eruptionsdf.shape)
eruptionsdf = eruptionsdf[eruptionsdf['start_year'].notnull()][["volcano_number", "eruption_number", "vei", "start_year", "start_month", "end_year", "end_month", "latitude", "longitude"]]
print(eruptionsdf.shape)
eruptionsdf = eruptionsdf.astype({"start_year": int})
#sorted_eruptions = eruptionsdf.sort_values(by="start_year", ascending = True)
merged_volcano_eruptions = pd.merge(sorted_volcano, eruptionsdf, on='volcano_number', how='inner')
merged_volcano_eruptions['cut_year'] = pd.cut(merged_volcano_eruptions['start_year'], bins=cut_bins, labels=cut_labels)
merged_volcano_eruptions = merged_volcano_eruptions.sort_values(by="start_year", ascending = True)
print(merged_volcano_eruptions.shape)

(958, 26)
(657, 8)
(11178, 15)
(11177, 9)
(9510, 17)


In [6]:
def get_continent(col):
    try:
        cn_a2_code =  country_name_to_country_alpha2(col)
    except:
        cn_a2_code = 'Unknown' 
    try:
        cn_continent = country_alpha2_to_continent_code(cn_a2_code)
    except:
        cn_continent = 'Unknown'
    try:
        cn_continent_name = convert_continent_code_to_continent_name(cn_continent)
    except:
        cn_continent_name = 'Unknown' 
    return cn_continent_name

merged_volcano_eruptions["continent"] =  merged_volcano_eruptions.country.apply(get_continent)
merged_volcano_eruptions.head()

Unnamed: 0,volcano_number,volcano_name,country,last_eruption_year,population_within_5_km,population_within_10_km,population_within_30_km,population_within_100_km,eruption_number,vei,start_year,start_month,end_year,end_month,latitude,longitude,cut_year,continent
344,357121,Quetrupillan,Chile,255,129,518,11191,269087,22352,3.0,-11345,,,,-39.496,-71.722,<-8000,South America
343,357121,Quetrupillan,Chile,255,129,518,11191,269087,22351,3.0,-10658,,,,-39.496,-71.722,<-8000,South America
0,222161,Igwisi Hills,Tanzania,-10450,37034,37034,105827,774440,22141,1.0,-10450,,,,-4.889,31.933,<-8000,Africa
277,324020,Craters of the Moon,United States,-130,15,15,1459,93300,21101,0.0,-10060,,,,43.42,-113.5,<-8000,North America
919,213020,Nemrut Dagi,Turkey,1650,608,7399,230531,1618263,13908,,-9950,0.0,,,38.654,42.229,<-8000,Asia


In [7]:
#dunyadaki volkan patlamalarının genel görüntüsü
import plotly.express as px

px.set_mapbox_access_token("pk.eyJ1IjoiaGF6YWx1c3RhIiwiYSI6ImNram9jeng2bDZ2aGoyeWxnYWVlZm5pcXgifQ.Ygj0JgnLagkVCVfHqKrxUQ")

fig = px.scatter_mapbox(merged_volcano_eruptions,
                        lat=merged_volcano_eruptions.latitude,
                        lon=merged_volcano_eruptions.longitude,
                        hover_data=["volcano_name", "start_year"],
                        animation_frame="cut_year",
                        animation_group= "eruption_number",
                        color="continent",
                        color_discrete_sequence=px.colors.qualitative.Set1,
                        title="Worldwide Volcano Eruptions",
                        zoom=1)
fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = 1500
fig.show()


In [8]:
grouped_df = merged_volcano_eruptions.groupby(['continent', 'cut_year']).size().reset_index(name='count') 


fig = px.bar(grouped_df, x="continent",y="count", color="continent",
  animation_frame="cut_year", range_y=[0,"2600"], title="Total Volcano Eruptions - Continent-based")
fig.show()

In [9]:
import pycountry
def get_countryCode(col):
    try:
        cn_a2_code =  pycountry.countries.get(name=col).alpha_3
    except:
        cn_a2_code = 'NA' 

    return cn_a2_code

grouped_eruptions = merged_volcano_eruptions.groupby(["country"]).size().reset_index(name='count')
grouped_eruptions["iso_country_code"] = grouped_eruptions.country.apply(get_countryCode)
grouped_eruptions = grouped_eruptions[grouped_eruptions['iso_country_code']  != 'NA']
#grouped_eruptions

In [10]:
import plotly.graph_objects as go
from plotly.graph_objs import *
fig = px.choropleth(grouped_eruptions, locations="iso_country_code",
                    color=np.log2(grouped_eruptions['count']), # lifeExp is a column of gapminder
                    hover_data = ["country", "count"],
                    labels={'count':'Number of eruptions'},
                    color_continuous_scale="Hot_r",
                   )
fig.update_layout(title_text='Total Volcano Eruptions - Country-based',
    geo = dict(
        showocean=True, # lakes
        oceancolor='rgb(127,205,255)'),
)

#fig.layout.coloraxis.colorbar.tickvals = [10,20,30,40,100]

fig.show()

In [11]:
count = merged_volcano_eruptions["vei"].isna().sum()
print(count)
print(merged_volcano_eruptions.shape)
filtered_merged = merged_volcano_eruptions[merged_volcano_eruptions['vei'].notnull()]
print(filtered_merged.shape)
after_0 = filtered_merged['start_year'] > 0
filtered_merged = filtered_merged[after_0]
filtered_merged["vei"] = filtered_merged["vei"] + 1
print(filtered_merged.shape)
filtered_merged.head()

2364
(9510, 18)
(7146, 18)
(6301, 18)


Unnamed: 0,volcano_number,volcano_name,country,last_eruption_year,population_within_5_km,population_within_10_km,population_within_30_km,population_within_100_km,eruption_number,vei,start_year,start_month,end_year,end_month,latitude,longitude,cut_year,continent
2189,360120,Pelee,France,1932,251,5025,382633,609576,12411,5.0,10,0.0,,,14.809,-61.165,>0,Europe
9009,263250,Merapi,Indonesia,2020,49205,185849,4348473,24728414,15824,5.0,20,0.0,,,-7.54,110.446,>0,Asia
472,282070,Ata,Japan,885,93134,93134,171590,1891380,17017,4.0,30,0.0,,,31.22,130.57,>0,Asia
342,357121,Quetrupillan,Chile,255,129,518,11191,269087,22350,5.0,35,,,,-39.496,-71.722,>0,South America
630,211030,Ischia,Italy,1302,18669,20210,383661,5729354,13381,4.0,40,0.0,,,40.73,13.897,>0,Europe


In [18]:
fig = px.density_mapbox(filtered_merged, lat=filtered_merged.latitude, lon=filtered_merged.longitude, z='vei', radius=10,
                        center=dict(lat=0, lon=180), zoom=1, hover_data = ["volcano_name", "start_year"],
                        mapbox_style="stamen-terrain", title="Volcano density in terms of VEI numbers")
fig.show()


In [19]:
import plotly.figure_factory as ff

japan = filtered_merged['country'] == "Japan"
asia = filtered_merged['continent'] == "Asia"

fig = ff.create_hexbin_mapbox(
    data_frame=filtered_merged[japan], lat="latitude", lon="longitude",
    nx_hexagon=10, opacity=0.7, labels={"count": "Point Count"}, color_continuous_scale="Inferno_r",
     min_count=1, title="Volcanos in Japan and Eruption times",
    show_original_data=True,
    original_data_marker=dict(size=4, opacity=0.6, color="Blue")
)
fig.layout.coloraxis.colorbar.title = '#Eruptions in Japan'
fig.update_layout(margin=dict(b=0, t=0, l=0, r=0))
fig.show()

AttributeError: ignored