# Visualizing wildfires in Mexico during 2017

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from astroML.plotting import setup_text_plots
setup_text_plots(usetex=True)

#colours being used
colours = [ '#264653', '#2A9D8F', '#E9C46A', '#F4A261', '#E76F51' ]

#load data
#load_data = 'data/Annual_Fire_History_Series_MX_(2017).xlsx'
load_data = 'https://github.com/isaacarroyov/data_visualization_practice/blob/master/Folium/Wildfires/data/Annual_Fire_History_Series_MX_(2017).xlsx?raw=true'

## The data: Where does it come from? 

In [2]:
#importing data and save it as pandas DataFrame
changes = {
    'Grados':'Degrees', 'Minutos':'Minutes', 'Segundos':'Seconds',
    'Grados.1':'Degrees.1', 'Minutos.1':'Minutes.1', 'Segundos.1':'Seconds.1',
    'Duración días': 'Duration in days', 'Total':'Total area affected', 'Estado':'STATE'
    }
df = pd.read_excel(load_data).rename(columns = changes)
df.head()

Unnamed: 0,Número de Incendio,Clave del incendio,Degrees,Minutes,Seconds,Degrees.1,Minutes.1,Seconds.1,Clave municipio,STATE,...,Gobierno del Estado,Municipios,Propietarios de Terrenos Forestales,Voluntarios,Federales,Total.1,Categoria,Tipo,Region,Unnamed: 52
0,1,17-01-0001,21,53,53.7,102,49,18.8,3,Aguascalientes,...,0,9,0,6,0,15,Área de Protección de los Recursos Naturales Z...,Federal,Occidente,
1,2,17-01-0002,21,49,44.0,102,7,26.4,10,Aguascalientes,...,0,8,0,5,0,13,,,Occidente,
2,3,17-01-0003,21,52,3.25,102,23,30.0,1,Aguascalientes,...,10,10,3,13,0,39,,,Occidente,
3,4,17-01-0004,21,43,17.12,102,21,52.45,1,Aguascalientes,...,4,4,4,5,0,20,,,Occidente,
4,5,17-01-0005,21,59,14.8,102,12,34.54,11,Aguascalientes,...,3,10,0,6,0,26,,,Occidente,


## Exploratory Data Analysis (EDA)

### General observations

**DataFrame structure**

In [None]:
print('Number of rows: {}'.format(df.shape[0]))
print('Number of columns: {}'.format(df.shape[1]))

**Name of columns**

In [None]:
print('Name of the columns:\n')
df.columns.values

**Data types**

In [None]:
df.dtypes

**Statistical sumary**

In [None]:
df.describe().T

### Missing Values

In [None]:
missing_values = df.isnull().sum()
total_cells = np.product(df.shape)
total_missing = missing_values.sum()
perc_nan_rows = (total_missing/total_cells) * 100

print('We have {} % rows with at least one missing value\n\n'.format(perc_nan_rows) )

for i in df.columns.values:
    # count number of rows with missing values
    n_miss = df[i].isnull().sum()
    perc = n_miss / df[i].shape[0] * 100
    print(i, 'has', n_miss , 'missing values. This means ', perc, ' % from the total\n')

## Unit conversion

### Suspicious data

In [None]:
df['Degrees'].value_counts()

In [None]:
correction = np.where( df['Degrees'].values != '19°', df['Degrees'].values, 19 )
df['Degrees'] = correction
df['Degrees'].value_counts()

### Conversion function

In [None]:
df[['Degrees', 'Minutes', 'Seconds','Degrees.1', 'Minutes.1', 'Seconds.1']].dtypes

In [None]:
df['Degrees'].astype(int)

In [None]:
def degrees2decimal(a_degree, a_minute, a_second):
    degree = a_degree.astype(int)
    minute = a_minute.astype(int)
    second = a_second.astype(int)
    return degree + minute/60 + second/3600

In [None]:
df['Latitude'] = degrees2decimal( df['Degrees'], df['Minutes'], df['Seconds'] )
df['Longitude'] = degrees2decimal( df['Degrees.1'], df['Minutes.1'], df['Seconds.1'] )
df['Longitude'] = df['Longitude'].values * -1
df.drop(columns = ['Degrees', 'Minutes', 'Seconds', 'Degrees.1', 'Minutes.1', 'Seconds.1'], inplace=True)

In [None]:
df.head()

## Visualizing wildfire information with `folium`