# Mapping Potholes
The idea of this experiment is to generate a solution that would help the institutions responsible for street maintenance, deciding where to allocate their resources.

In order for the solution to reach its potential, a large number of cars should make use of vibration sensors, through devices installed in the cars or an application installed on the driver's cell phone. By capturing the data, such as .csv files on data/, they would be sent to a processing center where would run the algorithm of this repository, which uses the Crowdsensing concept to generate a map with the most harmful potholes to drivers.

## 1 Libraries and Constants

In [None]:
import pandas as pd
import numpy as np
import folium as fol
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib notebook

from os import walk
from geopy.distance import geodesic

FLOAT_COLS = ['gFx', 'gFy', 'gFz', 'Latitude', 'Longitude', 'Speed (m/s)']

## 2 Function Definition

In [None]:
def read_file(file_path):
    # Read dataset
    df = pd.read_csv(file_path, sep=',')

    #Preparing dateset
    df = df.apply(lambda x: x.str.replace(',', '.'), axis=1)
    df[FLOAT_COLS] = df[FLOAT_COLS].astype(np.float32)
    df = df[(df['Latitude'] != 0) & (df['Longitude'] != 0)]
    df['Speed (m/s)'] = df['Speed (m/s)']*3.6
    
    # renaming column name to better understanding
    df = df.rename(index=str, columns={'Speed (m/s)': 'Speed(km/h)'})

    return df[['time', 'gFx', 'gFy', 'gFz', 'Latitude', 'Longitude', 'Speed(km/h)']]

def define_col_to_use(df):
    stats = {
        'gFx': df['gFx'].median(),
        'gFy': df['gFy'].median(),
        'gFz': df['gFz'].median()
    }

    return max(stats, key=stats.get)

def select_bad_point_nearby(row, df_bad_points):
    if(len(df_bad_points) == 0): return -1

    point_lat_sup = row['latitude'] + 0.0001
    point_lon_sup = row['longitude'] + 0.0001
    point_lat_inf = row['latitude'] - 0.0001
    point_lon_inf = row['longitude'] - 0.0001
    
    #print(len(df_bad_points))
    #print('Existe Algum ' + str((((df_bad_points['latitude'] > point_lat_inf) & (df_bad_points['longitude'] > point_lon_inf)) & ((df_bad_points['latitude'] < point_lat_sup) & (df_bad_points['longitude'] < point_lon_sup))).any()))
    df = df_bad_points[(((df_bad_points['latitude'] > point_lat_inf) & (df_bad_points['longitude'] > point_lon_inf)) & ((df_bad_points['latitude'] < point_lat_sup) & (df_bad_points['longitude'] < point_lon_sup)))]

    if len(df) == 0:
        return -1
    else:
        row_return = -1
        distance = 9999999 # Init distance

        for index, row2 in df.iterrows():

            geodesic_distance = geodesic((row2['latitude'], row2['longitude']), (row['latitude'], row['longitude']))
            if(geodesic_distance < distance):
                distance = geodesic_distance
                row_return = index

    return row_return
    
    
def holes_voting(pontos_ruins):

    df = pd.DataFrame(columns=['latitude','longitude','pontos'])
    for index, row in pontos_ruins.iterrows():
        #print(df['pontos'])
        ponto_encontrado = select_bad_point_nearby(row, df)
        #print(type(ponto_encontrado))
        if(ponto_encontrado < 0):
            data = pd.Series({'latitude':row['latitude'], 'longitude':row['longitude'], 'pontos':1})
            df = df.append(data, ignore_index=True)
        else:
            #return df
            df.loc[ponto_encontrado,'pontos'] += 1
    return df

def detect_bad_points(df):
    # Sophisticaded method
    cut_limit = df['var'].quantile([0.95, 1]).iloc[0]

    df = df[df['var'] > cut_limit]

    return df

def preprocess_file(path):

    df = read_file(path)
    main_col = define_col_to_use(df)
    df['accy'] = df[main_col] * 9.8
    
    # Agrupa os pontos por coordenada, calculando variância
    f = {'accy': 'var', 'time': 'min'}
    pontos = df.groupby(['Latitude', 'Longitude'], as_index=False).agg(f)[['Latitude', 'Longitude', 'accy', 'time']].sort_values('time', ascending=True).reset_index()
    pontos.drop(['index', 'time'], axis=1, inplace=True)
    pontos.columns = ['latitude', 'longitude', 'var']
    pontos_ruins = detect_bad_points(pontos)

    return pontos_ruins

def is_it_near_bumps(row, bumps):
    
    point_lat_sup = row['latitude'] + 0.0001
    point_lon_sup = row['longitude'] + 0.0001
    point_lat_inf = row['latitude'] - 0.0001
    point_lon_inf = row['longitude'] - 0.0001

    return (((bumps['lat'] > point_lat_inf) & (bumps['lon'] > point_lon_inf)) & ((bumps['lat'] < point_lat_sup) & (bumps['lon'] < point_lon_sup))).any()

def remove_speed_bumps(bad_points):
    columns = ['lat','lon','color']
    bumps = pd.read_csv('../input/lombadas-sp/lombadasCatalogadasFinal.csv.txt', delimiter=';', names=columns)
    bumps.drop('color', axis=1, inplace=True)

    bad_points_filter = bad_points.apply(lambda x: is_it_near_bumps(x, bumps), axis=1)

    return bad_points[~bad_points_filter]

# 3 Executing Main Pipeline

In [None]:
def read_files_in_path(path):
    df = pd.DataFrame()
    for (dirpath, _, filenames) in walk(path):
        for file in filenames:
            if(dirpath == '../data/lombadas-sp'):
                continue
            df = df.append( preprocess_file(str(dirpath) + '/' + str(file) ))
    return df

df = read_files_in_path('../data') # retorna pontos ruins dos arquivos csvs dentro da pasta input
df_normais = remove_speed_bumps(df)

df = holes_voting(df_normais)

## 3.1 Cataloging the identified points

In [None]:
color_limits = df['pontos'].quantile([0.5, 0.75, 1])

df['color'] = 'yellow'
df.loc[((df['pontos'] > color_limits[0.5]) & (df['pontos'] <= color_limits[0.75])), 'color'] = "orange"
df.loc[((df['pontos'] > color_limits[0.75]) & (df['pontos'] <= color_limits[1])), 'color'] = "red"

df.drop('pontos', axis=1, inplace=True)

## 3.2 Saving results

In [None]:
df.to_csv('identified_potholes.csv', index=False)

# 4 Plotting
## 4.1 Bumps Map

In [None]:
bumps = pd.read_csv('../data/cataloged_street_bumps/lombadasCatalogadasFinal.csv', sep=';')

In [None]:

map_plot = fol.Map(location=[bumps.iloc[0][0], bumps.iloc[0][1]], zoom_start=13)
for _, ponto in bumps.iterrows():
    pop = \
        'ID: ' + str(ponto.name) + '; ' + \
        'Latitude: ' + str(ponto['lat']) + '; ' + \
        'Longitude: ' + str(ponto['lon']) + '; '

    fol.Circle(popup=pop, location=[ponto['lat'], ponto['lon']], radius=5, \
               fill=True, fill_color=ponto['color'], color=ponto['color']).add_to(map_plot)
map_plot


## 4.2 Potholes Map

In [None]:
pontos = df
map_plot = fol.Map(location=[pontos.iloc[0][0], pontos.iloc[0][1]], zoom_start=13)
for _, ponto in pontos.iterrows():
    pop = \
        'ID: ' + str(ponto.name) + '; ' + \
        'Latitude: ' + str(ponto['latitude']) + '; ' + \
        'Longitude: ' + str(ponto['longitude']) + '; '

    fol.Circle(popup=pop, location=[ponto['latitude'], ponto['longitude']], radius=5, \
               fill=True, fill_color='red', color='red').add_to(map_plot)
map_plot
