# API para obtener datos sobre contaminación sonora

In [108]:
#!pip install requests bs4
#Esta es la URL del proyecto
#https://noiseproject.org/data-download/
#!pip install geopy

In [109]:

import os
import requests
from bs4 import BeautifulSoup
import pandas as pd
import io
from pandas.errors import EmptyDataError
import warnings

In [110]:
# Inicializamos un df para almacenar los datos
df = ''

In [111]:
# Descargamos los datos de contaminación sonora desde la fuente en línea
csv_url = 'https://citsci-noise-server.ornith.cornell.edu/noise/download/csv?lang=en'
response_csv = requests.get(csv_url)

# Verificamos si la respuesta es exitosa
if response_csv.status_code == 200:
    csv_content = io.StringIO(response_csv.text)
    df = pd.read_csv(csv_content)
else:
    print("Error al descargar los datos")

In [112]:
# Convertimos las fechas y tiempos en datetime para su uso
df['Start date/time in UTC'] = pd.to_datetime(df['Start date/time in UTC'])
df['Start date UTC'] = df['Start date/time in UTC'].dt.normalize()
df['Start time UTC'] = df['Start date/time in UTC'].dt.time

# cambiamos la especificacion America/NY solo por New York
df['Timezone'] = df['Timezone'].str.replace('America/New_York', 'New York')


In [113]:
#Seleccionamos los valores que competen solo a NY
df = df[df['Timezone'] == 'New York']

In [114]:
df

Unnamed: 0,Start date/time in UTC,Timezone,Duration (s),Observer ID,Latitude,Longitude,Mean volume (dBA),Mood,Could control noise exposure,Indoors,Emoji,Emoji descriptions,Nominated as noise refuge,Start date UTC,Start time UTC
0,2022-02-08 14:07:20,New York,29,56,42.71516,-76.40451,42,2,False,True,🗣🤣,"speaking head in silhouette,rolling on the flo...",False,2022-02-08,14:07:20
1,2022-02-08 14:56:00,New York,29,54,42.71489,-76.40509,39,1,True,True,🎶,multiple musical notes,False,2022-02-08,14:56:00
2,2022-02-08 15:46:02,New York,29,55,42.38951,-71.14181,70,3,False,False,🚗🛣,"automobile,motorway",False,2022-02-08,15:46:02
3,2022-02-09 10:17:05,New York,29,61,41.82063,-71.42895,72,1,True,True,🎶🎻🎺,"multiple musical notes,violin,trumpet",False,2022-02-09,10:17:05
4,2022-02-09 11:07:23,New York,29,64,42.47998,-76.45123,39,2,False,True,,,False,2022-02-09,11:07:23
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2079,2023-12-28 13:07:43,New York,29,1537,43.73419,-70.29528,63,3,False,True,,,False,2023-12-28,13:07:43
2087,2024-01-02 01:39:31,New York,29,242,33.88973,-84.27919,33,2,False,True,🚗,automobile,False,2024-01-02,01:39:31
2088,2024-01-03 09:43:26,New York,29,281,42.42252,-71.19009,55,2,True,True,🚇🛠🕍,"metro,hammer and wrench,synagogue",False,2024-01-03,09:43:26
2089,2024-01-03 09:44:36,New York,29,62,42.48003,-76.45111,51,2,False,True,🏢🖥,"office building,desktop computer",False,2024-01-03,09:44:36


In [115]:
#Usamos la descripcion de los emojis como indicador de qué tipo de fuente produce el ruido
df = df.rename(columns={'Emoji descriptions':'Noise_Source'})

In [116]:
# Mapeamos las emociones a nombres más comprensibles
def best_name(Mood):
    if Mood == 1:
        return "Extremely Happy"
    elif Mood == 2:
        return "Happy"
    elif Mood == 3:
        return "Neutral"
    elif Mood == 4:
        return "Somewhat Sad"
    elif Mood == 5:
        return "Very Sad"
    else:
        return "Unknown" # Para manejar valores inesperados

# Aplicamos la función de mapeo a la columna 'Mood'
df['Mood_scale'] = df['Mood'].apply(best_name)


In [117]:
df.columns

Index(['Start date/time in UTC', 'Timezone', 'Duration (s)', 'Observer ID',
       'Latitude', 'Longitude', 'Mean volume (dBA)', 'Mood',
       'Could control noise exposure', 'Indoors', 'Emoji', 'Noise_Source',
       'Nominated as noise refuge', 'Start date UTC', 'Start time UTC',
       'Mood_scale'],
      dtype='object')

In [118]:
# Obtener los valores únicos en la columna 'Noise_Source'
unique_noise_sources = df['Noise_Source'].unique()

# Mostrar los valores únicos
print(unique_noise_sources)


['speaking head in silhouette,rolling on the floor laughing'
 'multiple musical notes' 'automobile,motorway'
 'multiple musical notes,violin,trumpet' nan 'television' 'school'
 'desktop computer' 'bell,rolling on the floor laughing'
 'automobile,bird,taxi,bus,man+personal computer,office building' 'woman'
 'automobile,multiple musical notes,fountain,bird,bus,trumpet,honeybee,deciduous tree,speaking head in silhouette,rolling on the floor laughing'
 'automobile,bird,bus,wind blowing face,deciduous tree,speaking head in silhouette,rolling on the floor laughing'
 'automobile,vertical traffic light,bird,bus,wind blowing face,deciduous tree,speaking head in silhouette,rolling on the floor laughing'
 'speaking head in silhouette'
 'speaker with three sound waves,ice skate,office building,speaking head in silhouette,soccer ball'
 'pistol' 'national park' 'house with garden' 'automobile'
 'bird,airplane,honeybee'
 'hammer,automobile,bus,delivery truck,racing car'
 'television,speaking head in 

In [119]:
# Filtramos solo los registros relacionados con ruidos de automoviles o taxis
#Debemos usar de automoviles pues esos son los ubers, es la categoría más cercana
#^pues no hay categoría de Uber como tal
df = df[(df['Noise_Source'].str.contains('taxi')) | (df['Noise_Source'].str.contains('automobile'))]


In [120]:

# Ordenamos los datos por fecha y hora en orden descendente
df = df.sort_values(by='Start date/time in UTC', ascending=False).reset_index(drop=True)


In [121]:
# Mostramos el DataFrame resultante
df

Unnamed: 0,Start date/time in UTC,Timezone,Duration (s),Observer ID,Latitude,Longitude,Mean volume (dBA),Mood,Could control noise exposure,Indoors,Emoji,Noise_Source,Nominated as noise refuge,Start date UTC,Start time UTC,Mood_scale
0,2024-01-02 01:39:31,New York,29,242,33.88973,-84.27919,33,2,False,True,🚗,automobile,False,2024-01-02,01:39:31,Happy
1,2023-12-14 23:08:02,New York,29,1520,43.68096,-70.45101,57,3,False,False,🚗🔊🌳,"automobile,speaker with three sound waves,deci...",False,2023-12-14,23:08:02,Neutral
2,2023-12-08 19:28:58,New York,29,1554,43.68148,-70.44591,74,5,False,False,🚕🚗🚁🚜🚨🚑🗣🏓⛈,"taxi,automobile,helicopter,tractor,police cars...",False,2023-12-08,19:28:58,Very Sad
3,2023-11-30 10:48:57,New York,29,1511,43.68446,-70.44591,70,3,False,False,🚗🚚🚁,"automobile,delivery truck,helicopter",False,2023-11-30,10:48:57,Neutral
4,2023-11-30 10:48:48,New York,29,1521,43.68160,-70.45195,69,4,False,False,🚁🚗🐦,"helicopter,automobile,bird",False,2023-11-30,10:48:48,Somewhat Sad
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76,2022-02-11 15:43:31,New York,29,62,42.47696,-76.46715,78,3,False,False,🚗🚦🐦🚌🌬🌳🗣🤣,"automobile,vertical traffic light,bird,bus,win...",False,2022-02-11,15:43:31,Neutral
77,2022-02-11 15:39:19,New York,29,62,42.47642,-76.46707,81,3,False,False,🚗🐦🚌🌬🌳🗣🤣,"automobile,bird,bus,wind blowing face,deciduou...",False,2022-02-11,15:39:19,Neutral
78,2022-02-11 13:43:37,New York,29,87,39.94971,-75.17148,68,2,False,False,🚗🎶⛲🐦🚌🎺🐝🌳🗣🤣,"automobile,multiple musical notes,fountain,bir...",True,2022-02-11,13:43:37,Happy
79,2022-02-10 15:18:48,New York,29,87,39.94969,-75.16720,53,2,False,True,🚗🐦🚕🚌👨‍💻🏢,"automobile,bird,taxi,bus,man+personal computer...",False,2022-02-10,15:18:48,Happy
