<a href="https://colab.research.google.com/github/francianerod/Alerta-Raios/blob/main/4_Alerta_Raios_Distancia.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Código feito por:** Franciane Rodrigues

**Empresa:** EPL Informática Ltda

**Área/Setor:** Operacional/Pesquisa e Desenvolvimento

**Motivo do código:** Caracterização do Campo Elétrico no Vale do Paraíba - SP

**Técnica:** Análise Bivariada - Gráficos Interativos

## **1) ABERTURA DE PACOTES E ORGANIZAÇÃO DA BASE DE DADOS**

In [1]:
# Comando de abertura para Google Drive

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
!pip install plotly



In [3]:
#Importação de Pacotes
import numpy as np
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
from datetime import datetime
from matplotlib.ticker import PercentFormatter
from math import radians, cos, sin, asin, sqrt, atan2, degrees

import warnings
warnings.filterwarnings("ignore")

In [4]:
#Organização da base de dados
df_completo = pd.read_excel('/content/drive/MyDrive/Alerta_Raios_Análise/planilhas_dados/starnet_2.xlsx', sheet_name='tudo')
df_completo = df_completo.dropna()
# df_completo.info() comando para informações da base de dados

In [5]:
display(df_completo) # comando para mostrar a base de dados

Unnamed: 0,Dia,hora,latitude,longitude,distancia_km,campo_eletrico,sensor,ce_abs,lat_sensor,lon_sensor
0,2011-11-27,00:45:38,-23.31151,-45.90855,11.788000,90.10,aero,90.10,-23.224739,-45.862521
1,2011-11-28,17:45:25,-23.27701,-45.66623,21.972546,1.09,aero,1.09,-23.224739,-45.862521
2,2011-11-28,17:47:32,-23.07911,-45.75104,19.061771,-2.74,aero,2.74,-23.224739,-45.862521
3,2011-11-28,17:56:17,-23.17532,-45.64277,23.495614,-20.11,aero,20.11,-23.224739,-45.862521
4,2011-11-28,17:59:49,-23.23731,-45.63146,24.460011,-20.11,aero,20.11,-23.224739,-45.862521
...,...,...,...,...,...,...,...,...,...,...
1191,2012-03-13,16:30:57,-23.08602,-45.89943,18.899708,-74.20,ieav,74.20,-23.251728,-45.857773
1192,2012-03-13,16:39:29,-23.08426,-45.87632,18.706115,-331.20,ieav,331.20,-23.251728,-45.857773
1193,2012-03-27,20:16:18,-23.15558,-45.81763,11.444128,1136.30,ieav,1136.30,-23.251728,-45.857773
1194,2012-03-27,20:18:27,-23.19040,-45.82104,7.779295,-839.50,ieav,839.50,-23.251728,-45.857773


# **2) MEDIDAS RESUMO DA BASE DE DADOS**

In [6]:
# MEDIDAS DE POSIÇÃO OU LOCALIZAÇÃO
# Comando que fornece informações da medida resumo
df_completo.describe()

Unnamed: 0,latitude,longitude,distancia_km,campo_eletrico,ce_abs,lat_sensor,lon_sensor
count,1196.0,1196.0,1196.0,1196.0,1196.0,1196.0,1196.0
mean,-23.238131,-45.888644,17.157195,111.138186,311.439239,-23.232137,-45.865629
std,0.111592,0.126233,5.686971,517.715338,428.153754,0.018101,0.009746
min,-23.47106,-46.10971,1.22583,-1341.0,1.09,-23.251728,-45.880862
25%,-23.333977,-46.004325,13.442161,-93.2,46.24,-23.251728,-45.880862
50%,-23.21775,-45.894115,18.247814,-11.88,115.6,-23.224739,-45.862521
75%,-23.150103,-45.786505,21.829118,186.65,384.66,-23.209431,-45.857773
max,-22.99951,-45.6157,24.995029,2146.67,2146.67,-23.209431,-45.857773


In [7]:
# Boxplot referente a distância
fig = px.box(df_completo, y="distancia_km", points="all")
fig.update_layout(width=500, height=500) 
fig.show()

In [8]:
# Boxplot referente ao campo elétrico
fig = px.box(df_completo, y="ce_abs", points="all")
fig.update_layout(width=500, height=700) 
fig.show()

## **3) ANÁLISE GERAIS: TABELAS E GRÁFICOS**

In [9]:
# TABELA: DISTRIBUIÇÃO DE FREQUÊNCIA
# VARIÁVEL: DISTÂNCIA
# Classe de aproximadamente 5 km

distancia = df_completo["distancia_km"].value_counts(bins=8, ascending=False)
distancia = pd.DataFrame(distancia)
distancia['Freq_rel'] = round(((distancia['distancia_km']/1196.0)*100), 1)
distancia = pd.DataFrame(distancia)
distancia["Freq_rel_acum"] = round(distancia["distancia_km"].cumsum()/distancia["distancia_km"].sum()*100, 1)
distancia = pd.DataFrame(distancia)
display(distancia)

Unnamed: 0,distancia_km,Freq_rel,Freq_rel_acum
"(22.024, 24.995]",271,22.7,22.7
"(19.053, 22.024]",257,21.5,44.1
"(16.082, 19.053]",236,19.7,63.9
"(13.11, 16.082]",144,12.0,75.9
"(10.139, 13.11]",121,10.1,86.0
"(7.168, 10.139]",87,7.3,93.3
"(4.197, 7.168]",45,3.8,97.1
"(1.201, 4.197]",35,2.9,100.0


In [10]:
# TABELA: DISTRIBUIÇÃO DE FREQUÊNCIA
# VARIÁVEL: campo elétrico

campo = df_completo["ce_abs"].value_counts(bins=18, ascending=False)
campo = pd.DataFrame(campo)
campo['Freq_rel'] = round(((campo['ce_abs']/1196.0)*100), 1)
campo = pd.DataFrame(campo)
campo["Freq_rel_acum"] = round(campo["ce_abs"].cumsum()/campo["ce_abs"].sum()*100, 1)
campo = pd.DataFrame(campo)
display(campo)

Unnamed: 0,ce_abs,Freq_rel,Freq_rel_acum
"(-1.057, 120.289]",605,50.6,50.6
"(120.289, 239.488]",155,13.0,63.5
"(239.488, 358.687]",113,9.4,73.0
"(358.687, 477.886]",92,7.7,80.7
"(477.886, 597.084]",37,3.1,83.8
"(597.084, 716.283]",32,2.7,86.5
"(835.482, 954.681]",29,2.4,88.9
"(954.681, 1073.88]",22,1.8,90.7
"(1193.079, 1312.278]",18,1.5,92.2
"(1073.88, 1193.079]",17,1.4,93.6


In [11]:
# Grafico de dispersão distancia x campo elétrico
fig = px.scatter(df_completo, x="distancia_km", y='ce_abs', color="sensor")
fig.update_layout(width=1200, height=500) 
fig.show()

In [12]:
# Filtragem 1: Eliminação de Campo elétrico abaixo de 350 V/m (áreas de ruído, baixo risco e atenção)
# Considerando apenas a faixa que contém raios (numericamente)

df_completo_filtro = (df_completo['ce_abs'] > 350.)
df_completo_filtro = df_completo[df_completo_filtro]
df_completo_filtro

Unnamed: 0,Dia,hora,latitude,longitude,distancia_km,campo_eletrico,sensor,ce_abs,lat_sensor,lon_sensor
49,2011-11-29,18:57:38,-23.38560,-45.89684,19.524016,430.96,aero,430.96,-23.224739,-45.862521
54,2011-11-29,18:58:26,-23.36201,-45.90402,17.070705,417.17,aero,417.17,-23.224739,-45.862521
55,2011-11-29,18:58:26,-23.29844,-45.83661,10.199424,417.17,aero,417.17,-23.224739,-45.862521
57,2011-11-29,19:00:51,-23.41173,-45.84504,22.363325,391.24,aero,391.24,-23.224739,-45.862521
58,2011-11-29,19:00:51,-23.41374,-45.84405,22.596643,391.24,aero,391.24,-23.224739,-45.862521
...,...,...,...,...,...,...,...,...,...,...
1164,2012-03-11,17:41:53,-23.35742,-46.04972,22.840976,-470.50,ieav,470.50,-23.251728,-45.857773
1165,2012-03-11,17:43:29,-23.32640,-46.06813,23.018970,-482.10,ieav,482.10,-23.251728,-45.857773
1193,2012-03-27,20:16:18,-23.15558,-45.81763,11.444128,1136.30,ieav,1136.30,-23.251728,-45.857773
1194,2012-03-27,20:18:27,-23.19040,-45.82104,7.779295,-839.50,ieav,839.50,-23.251728,-45.857773


In [13]:
# Grafico de dispersão distancia x campo elétrico - com filtragem de campo  em risco alto e extremo
fig = px.scatter(df_completo_filtro, x="distancia_km", y='ce_abs', color="sensor")
fig.update_layout(width=1200, height=500) 
fig.show()

In [14]:
# Filtragem 2: Eliminação de Campo elétrico abaixo de 126 V/m (área de ruído e baixo risco )
# Considerando apenas a faixa que contém raios (numericamente)

df_completo_filtro2 = (df_completo['ce_abs'] < 126.)
df_completo_filtro2 = df_completo[df_completo_filtro2]
df_completo_filtro2

Unnamed: 0,Dia,hora,latitude,longitude,distancia_km,campo_eletrico,sensor,ce_abs,lat_sensor,lon_sensor
0,2011-11-27,00:45:38,-23.31151,-45.90855,11.788000,90.10,aero,90.10,-23.224739,-45.862521
1,2011-11-28,17:45:25,-23.27701,-45.66623,21.972546,1.09,aero,1.09,-23.224739,-45.862521
2,2011-11-28,17:47:32,-23.07911,-45.75104,19.061771,-2.74,aero,2.74,-23.224739,-45.862521
3,2011-11-28,17:56:17,-23.17532,-45.64277,23.495614,-20.11,aero,20.11,-23.224739,-45.862521
4,2011-11-28,17:59:49,-23.23731,-45.63146,24.460011,-20.11,aero,20.11,-23.224739,-45.862521
...,...,...,...,...,...,...,...,...,...,...
1184,2012-03-13,15:56:07,-23.03373,-45.88604,24.396639,-93.20,ieav,93.20,-23.251728,-45.857773
1185,2012-03-13,16:01:37,-23.08221,-45.81617,19.311255,-67.30,ieav,67.30,-23.251728,-45.857773
1189,2012-03-13,16:23:33,-23.15668,-45.95584,14.556227,-125.10,ieav,125.10,-23.251728,-45.857773
1190,2012-03-13,16:26:43,-23.08889,-45.94307,20.084282,-111.30,ieav,111.30,-23.251728,-45.857773


In [15]:
df_completo_filtro2.describe()

Unnamed: 0,latitude,longitude,distancia_km,campo_eletrico,ce_abs,lat_sensor,lon_sensor
count,619.0,619.0,619.0,619.0,619.0,619.0,619.0
mean,-23.224079,-45.869057,18.159427,-19.789645,49.862504,-23.234315,-45.864552
std,0.115126,0.135026,4.936395,57.892202,35.404497,0.017917,0.009324
min,-23.47106,-46.10971,2.113871,-125.1,1.09,-23.251728,-45.880862
25%,-23.32274,-45.997595,15.279679,-68.8,17.67,-23.251728,-45.862521
50%,-23.20332,-45.87283,19.073929,-20.1,49.0,-23.224739,-45.862521
75%,-23.12866,-45.7441,21.968402,11.88,76.8,-23.224739,-45.857773
max,-22.99951,-45.62212,24.955852,124.0,125.1,-23.209431,-45.857773


In [16]:
# TABELA: DISTRIBUIÇÃO DE FREQUÊNCIA
# VARIÁVEL: campo elétrico para campos abaixo de 126 V/m

campo_sr = df_completo_filtro2["ce_abs"].value_counts(bins=5, ascending=False)
campo_sr = pd.DataFrame(campo_sr)
campo_sr['Freq_rel'] = round(((campo_sr['ce_abs']/619.0)*100), 1)
campo_sr = pd.DataFrame(campo_sr)
campo_sr["Freq_rel_acum"] = round(campo_sr["ce_abs"].cumsum()/campo_sr["ce_abs"].sum()*100, 1)
campo_sr = pd.DataFrame(campo_sr)
display(campo_sr)

Unnamed: 0,ce_abs,Freq_rel,Freq_rel_acum
"(0.965, 25.892]",220,35.5,35.5
"(50.694, 75.496]",118,19.1,54.6
"(75.496, 100.298]",117,18.9,73.5
"(25.892, 50.694]",108,17.4,91.0
"(100.298, 125.1]",56,9.0,100.0


In [17]:
# TABELA: DISTRIBUIÇÃO DE FREQUÊNCIA
# VARIÁVEL: distancia para campos abaixo de 126 V/m

distancia_sr = df_completo_filtro2["distancia_km"].value_counts(bins=8, ascending=False)
distancia_sr = pd.DataFrame(distancia_sr)
distancia_sr['Freq_rel'] = round(((distancia_sr['distancia_km']/619.0)*100), 1)
distancia_sr = pd.DataFrame(distancia_sr)
distancia_sr["Freq_rel_acum"] = round(distancia_sr["distancia_km"].cumsum()/distancia_sr["distancia_km"].sum()*100, 1)
distancia_sr = pd.DataFrame(distancia_sr)
display(distancia_sr)

Unnamed: 0,distancia_km,Freq_rel,Freq_rel_acum
"(19.245, 22.101]",150,24.2,24.2
"(22.101, 24.956]",148,23.9,48.1
"(16.39, 19.245]",126,20.4,68.5
"(13.535, 16.39]",90,14.5,83.0
"(10.68, 13.535]",45,7.3,90.3
"(7.824, 10.68]",37,6.0,96.3
"(2.0900000000000003, 4.969]",14,2.3,98.5
"(4.969, 7.824]",9,1.5,100.0


In [18]:
# Grafico de dispersão distancia x campo elétrico - com filtragem de campo elétrico
fig = px.scatter(df_completo_filtro2, x="distancia_km", y='ce_abs', color="sensor")
fig.update_layout(width=1200, height=500) 
fig.show()

## **4) Distribuição Espacial via Mapa**

In [19]:
display(df_completo)

Unnamed: 0,Dia,hora,latitude,longitude,distancia_km,campo_eletrico,sensor,ce_abs,lat_sensor,lon_sensor
0,2011-11-27,00:45:38,-23.31151,-45.90855,11.788000,90.10,aero,90.10,-23.224739,-45.862521
1,2011-11-28,17:45:25,-23.27701,-45.66623,21.972546,1.09,aero,1.09,-23.224739,-45.862521
2,2011-11-28,17:47:32,-23.07911,-45.75104,19.061771,-2.74,aero,2.74,-23.224739,-45.862521
3,2011-11-28,17:56:17,-23.17532,-45.64277,23.495614,-20.11,aero,20.11,-23.224739,-45.862521
4,2011-11-28,17:59:49,-23.23731,-45.63146,24.460011,-20.11,aero,20.11,-23.224739,-45.862521
...,...,...,...,...,...,...,...,...,...,...
1191,2012-03-13,16:30:57,-23.08602,-45.89943,18.899708,-74.20,ieav,74.20,-23.251728,-45.857773
1192,2012-03-13,16:39:29,-23.08426,-45.87632,18.706115,-331.20,ieav,331.20,-23.251728,-45.857773
1193,2012-03-27,20:16:18,-23.15558,-45.81763,11.444128,1136.30,ieav,1136.30,-23.251728,-45.857773
1194,2012-03-27,20:18:27,-23.19040,-45.82104,7.779295,-839.50,ieav,839.50,-23.251728,-45.857773


In [20]:
# Distribuição espacial dosdos raios por sensor
fig = px.scatter_mapbox(df_completo, lat="latitude", lon="longitude", hover_data=["sensor"], color="sensor")
fig.update_layout(width=1000, height=500, mapbox_style="open-street-map")
fig.show()

In [21]:
# Mapa de densidade de calor usando campo elétrico como métrica
fig = px.density_mapbox(df_completo, lat='latitude', lon='longitude', z='ce_abs', mapbox_style="stamen-terrain")
fig.update_layout(width=1000, height=500)
fig.show()

## **5) Direção**

**Cálculo Angular a partir de dois pontos geoespacial**

Formula to find Bearing, when two different points latitude, longitude is given:
Bearing from point A to B, can be calculated as:

β = atan2(X,Y), where, X and Y are two quantities and can be calculated as:
X = cos θb * sin ∆L
Y = cos θa * sin θb – sin θa * cos θb * cos ∆L

Consulta em: https://www.igismap.com/formula-to-find-bearing-or-heading-angle-between-two-points-latitude-longitude/

In [22]:
# Adaptado de: https://stackoverflow.com/questions/4913349/haversine-formula-in-python-bearing-and-distance-between-two-gps-points

def initial_bearing(row):

    lat1 = radians(row['lat_sensor'])
    lat2 = radians(row['latitude']) 

    diffLong = radians(row['longitude'] - row['lon_sensor'])

    x = sin(diffLong) * cos(lat2)
    y = cos(lat1) * sin(lat2) - (sin(lat1) * cos(lat2) * cos(diffLong))

    initial_bearing = atan2(x, y)

    # Now we have the initial bearing but math.atan2 return values
    # from -180° to + 180° which is not what we want for a compass bearing
    # The solution is to normalize the initial bearing as shown below

    initial_bearing = degrees(initial_bearing)
    compass_bearing = (initial_bearing + 360) % 360
    return compass_bearing


In [23]:
# Inclusão do cálculo no dataframe
df_completo['angulo'] = df_completo.apply(lambda row: initial_bearing(row), axis=1)
display(df_completo)

Unnamed: 0,Dia,hora,latitude,longitude,distancia_km,campo_eletrico,sensor,ce_abs,lat_sensor,lon_sensor,angulo
0,2011-11-27,00:45:38,-23.31151,-45.90855,11.788000,90.10,aero,90.10,-23.224739,-45.862521,205.971837
1,2011-11-28,17:45:25,-23.27701,-45.66623,21.972546,1.09,aero,1.09,-23.224739,-45.862521,106.202012
2,2011-11-28,17:47:32,-23.07911,-45.75104,19.061771,-2.74,aero,2.74,-23.224739,-45.862521,35.162248
3,2011-11-28,17:56:17,-23.17532,-45.64277,23.495614,-20.11,aero,20.11,-23.224739,-45.862521,76.294747
4,2011-11-28,17:59:49,-23.23731,-45.63146,24.460011,-20.11,aero,20.11,-23.224739,-45.862521,93.433855
...,...,...,...,...,...,...,...,...,...,...,...
1191,2012-03-13,16:30:57,-23.08602,-45.89943,18.899708,-74.20,ieav,74.20,-23.251728,-45.857773,346.978465
1192,2012-03-13,16:39:29,-23.08426,-45.87632,18.706115,-331.20,ieav,331.20,-23.251728,-45.857773,354.182634
1193,2012-03-27,20:16:18,-23.15558,-45.81763,11.444128,1136.30,ieav,1136.30,-23.251728,-45.857773,21.001721
1194,2012-03-27,20:18:27,-23.19040,-45.82104,7.779295,-839.50,ieav,839.50,-23.251728,-45.857773,28.837311


In [24]:
# Categorizando as direções
# As condições descritas podem ser vistas nessa figura: https://www.monolitonimbus.com.br/rosa-dos-ventos-e-os-deuses/
condicao = [(df_completo['angulo'] <= 22.5), 
            (df_completo['angulo'] <= 67.5),
            (df_completo['angulo'] <= 112.5),
            (df_completo['angulo'] <= 157.5),
            (df_completo['angulo'] <= 202.5),
            (df_completo['angulo'] <= 247.5),
            (df_completo['angulo'] <= 292.5),
            (df_completo['angulo'] <= 337.5),
            (df_completo['angulo'] <= 360.0)]

categoria = ['N', 'NE', 'L', 'SE', 'S','SO','O', 'NO', 'N']

# Implementação na base de dados
df_completo['direcao'] = np.select(condicao, categoria)
display(df_completo)

Unnamed: 0,Dia,hora,latitude,longitude,distancia_km,campo_eletrico,sensor,ce_abs,lat_sensor,lon_sensor,angulo,direcao
0,2011-11-27,00:45:38,-23.31151,-45.90855,11.788000,90.10,aero,90.10,-23.224739,-45.862521,205.971837,SO
1,2011-11-28,17:45:25,-23.27701,-45.66623,21.972546,1.09,aero,1.09,-23.224739,-45.862521,106.202012,L
2,2011-11-28,17:47:32,-23.07911,-45.75104,19.061771,-2.74,aero,2.74,-23.224739,-45.862521,35.162248,NE
3,2011-11-28,17:56:17,-23.17532,-45.64277,23.495614,-20.11,aero,20.11,-23.224739,-45.862521,76.294747,L
4,2011-11-28,17:59:49,-23.23731,-45.63146,24.460011,-20.11,aero,20.11,-23.224739,-45.862521,93.433855,L
...,...,...,...,...,...,...,...,...,...,...,...,...
1191,2012-03-13,16:30:57,-23.08602,-45.89943,18.899708,-74.20,ieav,74.20,-23.251728,-45.857773,346.978465,N
1192,2012-03-13,16:39:29,-23.08426,-45.87632,18.706115,-331.20,ieav,331.20,-23.251728,-45.857773,354.182634,N
1193,2012-03-27,20:16:18,-23.15558,-45.81763,11.444128,1136.30,ieav,1136.30,-23.251728,-45.857773,21.001721,N
1194,2012-03-27,20:18:27,-23.19040,-45.82104,7.779295,-839.50,ieav,839.50,-23.251728,-45.857773,28.837311,NE


In [25]:
# TABELA: DISTRIBUIÇÃO DE FREQUÊNCIA
# VARIÁVEL: direção

direcao = df_completo["direcao"].value_counts()
direcao = pd.DataFrame(direcao)
direcao['Freq_rel'] = round(((direcao['direcao']/1196.0)*100), 1)
direcao = pd.DataFrame(direcao)
direcao["Freq_rel_acum"] = round(direcao["direcao"].cumsum()/direcao["direcao"].sum()*100, 1)
direcao = pd.DataFrame(direcao)
display(direcao)

Unnamed: 0,direcao,Freq_rel,Freq_rel_acum
O,227,19.0,19.0
NO,188,15.7,34.7
S,150,12.5,47.2
SO,148,12.4,59.6
NE,129,10.8,70.4
L,124,10.4,80.8
SE,119,9.9,90.7
N,111,9.3,100.0


In [26]:
fig = px.scatter_polar(df_completo, r="distancia_km", theta="angulo", color='sensor')
fig.show()

**Legenda dos pontos cardeais e colaterais:**

*   N: Norte - 0°
*   NE: Nordeste - 
*   L: Leste
*   SE: Sudeste
*   S: Sul
*   SO: Sudoeste
*   O: Oeste
*   NO: Noroeste