<a href="https://colab.research.google.com/github/gabrielborja/python_data_analysis/blob/main/ecuador_analytics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Ecuador Volcanoes Analysis

Ecuador is located at the center of the world, and is aslo known as the land of volcanoes. Data about Ecuador volcanoes can be found [here](https://en.wikipedia.org/wiki/List_of_volcanoes_in_Ecuador)

## Uploading packages and data

In [None]:
#!pip freeze

In [1]:
#Importing data manipulation packages
import numpy as np
import pandas as pd

##Retrieving data from URL

In [8]:
#Parsing tables from URL
ecu_tables = pd.read_html('https://en.wikipedia.org/wiki/List_of_volcanoes_in_Ecuador',
                           attrs = {'class': 'wikitable sortable'})

In [15]:
#Saving table 1 to dataframe for mainland volcanoes
ecu_main = ecu_tables[0]
ecu_main.tail()

Unnamed: 0,Name,Meters,Feet,Coordinates,Last Eruption
28,Soche,3955,12972,0°33′07″N 77°34′48″W﻿ / ﻿0.552°N 77.580°W,-
29,Sumaco,3990,13087,0°32′S 77°37′W﻿ / ﻿0.53°S 77.62°W,1933
30,Tulabug,3336,10942,1°46′48″S 78°36′47″W﻿ / ﻿1.78°S 78.613°W,Holocene
31,Tungurahua,5023,16475,1°28′01″S 78°26′31″W﻿ / ﻿1.467°S 78.442°W,2016
32,Yanaurcu,4535,14879,0°29′55″N 78°20′02″W﻿ / ﻿0.49849°N 78.33389°W,"60,600 ± 20,000 years ago"


In [14]:
#Save table 2 to dataframe for galapagos volcanoes
ecu_galap = ecu_tables[1]
ecu_galap.tail()

Unnamed: 0,Name,Meters,Feet,Coordinates,Last Eruption
10,San Cristóbal,759,2490,0°53′S 89°30′W﻿ / ﻿0.88°S 89.50°W,-
11,Santa Cruz,964,2834,0°37′S 90°20′W﻿ / ﻿0.62°S 90.33°W,-
12,Santiago,920,3018,0°13′S 90°46′W﻿ / ﻿0.22°S 90.77°W,1906
13,Sierra Negra,1124,3687,0°50′S 91°10′W﻿ / ﻿0.83°S 91.17°W,2018
14,Wolf,1710,5609,0°01′N 91°21′W﻿ / ﻿0.02°N 91.35°W,2015


In [16]:
#Insert column to classify mainland volcanoes vs galapagos volcanoes
ecu_main['Category'] = 'Mainland'
ecu_galap['Category'] = 'Galapagos'

In [18]:
#Concatenate both dataframes
ecu_volc = pd.concat([ecu_main, ecu_galap], ignore_index=True)
ecu_volc.shape

(48, 6)

##Data Cleaning

In [22]:
#Checking the concatenated dataframe tail
ecu_volc.head()

Unnamed: 0,Name,Meters,Feet,Coordinates,Last Eruption,Category
0,El Altar / Kapak Urku,5405,17730,".mw-parser-output .geo-default,.mw-parser-outp...",Unknown,Mainland
1,Antisana,5753,18870,0°28′52″S 78°08′24″W﻿ / ﻿0.481°S 78.14°W,1802,Mainland
2,Atacazo,4463,14639,0°21′11″S 78°37′01″W﻿ / ﻿0.353°S 78.617°W,-,Mainland
3,Carihuairazo,5018,16463,01°24′25″S 78°45′00″W﻿ / ﻿1.40694°S 78.75000°W,Unknown,Mainland
4,Cayambe,5790,18991,0°01′44″N 77°59′10″W﻿ / ﻿0.029°N 77.986°W,1786,Mainland


In [23]:
#Replace first value of coordinates column with correct DMS coordinate
ecu_volc['Coordinates'][0] = '1.68°S 78.42°W'

In [24]:
#Unique values
ecu_volc['Coordinates'].unique()

array(['1.68°S 78.42°W',
       '0°28′52″S 78°08′24″W\ufeff / \ufeff0.481°S 78.14°W',
       '0°21′11″S 78°37′01″W\ufeff / \ufeff0.353°S 78.617°W',
       '01°24′25″S 78°45′00″W\ufeff / \ufeff1.40694°S 78.75000°W',
       '0°01′44″N 77°59′10″W\ufeff / \ufeff0.029°N 77.986°W',
       '0°22′S 78°15′W\ufeff / \ufeff0.37°S 78.25°W',
       '0°47′52″N 77°57′3″W\ufeff / \ufeff0.79778°N 77.95083°W',
       '01°28′09″S 78°49′03″W\ufeff / \ufeff1.46917°S 78.81750°W',
       '0°26′S 77°43′W\ufeff / \ufeff0.43°S 77.72°W',
       '0°21′39″N 78°20′57″W\ufeff / \ufeff0.36083°N 78.34917°W',
       '0°40′37″S 78°26′10″W\ufeff / \ufeff0.677°S 78.436°W',
       '0°18′30″N 78°21′50″W\ufeff / \ufeff0.30833°N 78.36389°W',
       '0°09′N 78°08′W\ufeff / \ufeff0.15°N 78.14°W',
       '0°39′32″S 78°42′50″W\ufeff / \ufeff0.659°S 78.714°W',
       '0°16′N 78°11′W\ufeff / \ufeff0.26°N 78.18°W',
       '1°46′48″S 78°36′47″W\ufeff / \ufeff1.78°S 78.613°W',
       '0°08′N 78°16′W\ufeff / \ufeff0.13°N 78.27°W', nan,

In [None]:
#import re
#lat = '''51°36'9.18"N'''
#deg, minutes, seconds, direction =  re.split('[°\'"]', lat)
#(float(deg) + float(minutes)/60 + float(seconds)/(60*60)) * (-1 if direction in ['W', 'S'] else 1)