# Análise de Tráfego e Acidentes em Minas Gerais (MG)

Este notebook realiza a análise dos dados de tráfego e acidentes em Minas Gerais no ano de 2020, seguindo os seguintes passos:

1. Pré-processamento dos dados
2. Análise Exploratória
3. Análise de Correlação
4. Análise de Hotspots



In [None]:
import pandas as pd
import unicodedata

def remove_accents(input_str):
    nfkd_form = unicodedata.normalize('NFKD', input_str)
    return u"".join([c for c in nfkd_form if not unicodedata.combining(c)])

# Paths to raw data
traffic_data_path = 'data/volume-trafego-praca-pedagio-2020.csv'
accident_data_path = 'data/datatran2020.csv'

# Paths to cleaned data
cleaned_traffic_data_path = 'data/processed/volume-trafego-praca-pedagio-2020-cleaned.csv'
cleaned_accident_data_path = 'data/processed/datatran2020-cleaned.csv'

# Load and clean traffic data
traffic_data = pd.read_csv(traffic_data_path, delimiter=';', encoding='latin1', quotechar='"')
traffic_data.columns = [remove_accents(col) for col in traffic_data.columns]
for col in traffic_data.select_dtypes(include=['object']).columns:
    traffic_data[col] = traffic_data[col].apply(lambda x: remove_accents(x) if isinstance(x, str) else x)
traffic_data.to_csv(cleaned_traffic_data_path, index=False, encoding='utf-8')

# Load and clean accident data
accident_data = pd.read_csv(accident_data_path, delimiter=';', encoding='latin1', quotechar='"')
accident_data.columns = [remove_accents(col) for col in accident_data.columns]
for col in accident_data.select_dtypes(include=['object']).columns:
    accident_data[col] = accident_data[col].apply(lambda x: remove_accents(x) if isinstance(x, str) else x)
accident_data.to_csv(cleaned_accident_data_path, index=False, encoding='utf-8')

print("Dados pré-processados e salvos com sucesso.")


In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Load cleaned data
traffic_data = pd.read_csv('data/processed/volume-trafego-praca-pedagio-2020-cleaned.csv')
accident_data = pd.read_csv('data/processed/datatran2020-cleaned.csv')

# Análise Exploratória
traffic_data['mes_ano'] = pd.to_datetime(traffic_data['mes_ano'], format='%d/%m/%Y')
monthly_traffic_volume = traffic_data.groupby(traffic_data['mes_ano'].dt.to_period('M')).sum()['volume_total']

plt.figure(figsize=(12, 6))
monthly_traffic_volume.plot(kind='line', marker='o')
plt.title('Volume de Tráfego nas Praças de Pedágio em MG (2020)')
plt.xlabel('Mês')
plt.ylabel('Volume Total de Veículos')
plt.grid(True)
plt.show()

accident_data['data_inversa'] = pd.to_datetime(accident_data['data_inversa'])
monthly_accidents = accident_data.groupby(accident_data['data_inversa'].dt.to_period('M')).size()

plt.figure(figsize=(12, 6))
monthly_accidents.plot(kind='bar')
plt.title('Número de Acidentes em MG (2020)')
plt.xlabel('Mês')
plt.ylabel('Número de Acidentes')
plt.grid(True)
plt.show()

weekday_accidents = accident_data['dia_semana'].value_counts().sort_index()

plt.figure(figsize=(12, 6))
weekday_accidents.plot(kind='bar')
plt.title('Número de Acidentes por Dia da Semana em MG (2020)')
plt.xlabel('Dia da Semana')
plt.ylabel('Número de Acidentes')
plt.xticks(rotation=45)
plt.grid(True)
plt.show()

causes_accidents = accident_data['causa_acidente'].value_counts().head(10)

plt.figure(figsize=(12, 6))
causes_accidents.plot(kind='bar')
plt.title('Principais Causas de Acidentes em MG (2020)')
plt.xlabel('Causa do Acidente')
plt.ylabel('Número de Acidentes')
plt.xticks(rotation=45)
plt.grid(True)
plt.show()

severity_accidents = accident_data[['feridos_leves', 'feridos_graves', 'ilesos', 'mortos']].sum()

plt.figure(figsize=(12, 6))
severity_accidents.plot(kind='bar')
plt.title('Severidade dos Acidentes em MG (2020)')
plt.xlabel('Tipo de Severidade')
plt.ylabel('Número de Pessoas')
plt.xticks(rotation=0)
plt.grid(True)
plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Load cleaned data
traffic_data = pd.read_csv('data/processed/volume-trafego-praca-pedagio-2020-cleaned.csv')
accident_data = pd.read_csv('data/processed/datatran2020-cleaned.csv')

# Análise de Correlação
traffic_data['mes_ano'] = pd.to_datetime(traffic_data['mes_ano'], format='%d/%m/%Y')
monthly_traffic_volume = traffic_data.groupby(traffic_data['mes_ano'].dt.to_period('M')).sum()['volume_total']

accident_data['data_inversa'] = pd.to_datetime(accident_data['data_inversa'])
monthly_accidents = accident_data.groupby(accident_data['data_inversa'].dt.to_period('M')).size()

traffic_accidents_df = pd.DataFrame({
    'Volume_Trafego': monthly_traffic_volume,
    'Numero_Acidentes': monthly_accidents
})

correlation = traffic_accidents_df.corr()

plt.figure(figsize=(12, 6))
traffic_accidents_df['Volume_Trafego'].plot(kind='line', marker='o', color='blue', label='Volume de Tráfego')
plt.title('Volume de Tráfego e Número de Acidentes em MG (2020)')
plt.ylabel('Volume de Tráfego')
plt.grid(True)
plt.legend()

plt.twinx()
traffic_accidents_df['Numero_Acidentes'].plot(kind='line', marker='o', color='red', label='Número de Acidentes')
plt.ylabel('Número de Acidentes')
plt.grid(True)
plt.legend()

plt.show()

print('Correlação:', correlation)


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
import numpy as np

# Load cleaned data
accident_data = pd.read_csv('data/processed/datatran2020-cleaned.csv')

# Análise de Hotspots
accident_data['latitude'] = accident_data['latitude'].str.replace(',', '.').astype(float)
accident_data['longitude'] = accident_data['longitude'].str.replace(',', '.').astype(float)

latitudes = accident_data['latitude'].values
longitudes = accident_data['longitude'].values

xmin, xmax = latitudes.min(), latitudes.max()
ymin, ymax = longitudes.min(), longitudes.max()
X, Y = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
positions = np.vstack([X.ravel(), Y.ravel()])
values = np.vstack([latitudes, longitudes])

kernel = stats.gaussian_kde(values)
Z = np.reshape(kernel(positions).T, X.shape)

plt.figure(figsize=(12, 6))
plt.imshow(np.rot90(Z), cmap='hot', extent=[ymin, ymax, xmin, xmax])
plt.scatter(longitudes, latitudes, c='blue', s=1, label='Acidentes')
plt.colorbar(label='Densidade de Acidentes')
plt.title('Densidade de Acidentes em Minas Gerais')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.legend()
plt.grid(True)
plt.show()
