# notebook for the capstone

### load/install librarys

In [6]:
import numpy as np
import pandas as pd
from pandas.io.json import json_normalize
import requests
import json

from sklearn.cluster import KMeans
from pyproj import Transformer

import geopandas as gpd  # need to build a json with WGS format, folium use that format in geojson
# !pip install folium
import folium

In [3]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

### data collection

In [4]:
!wget -q -O 'valencia_data.json' http://mapas.valencia.es/lanzadera/opendata/Barrios/JSON
print('Data downloaded!')

Data downloaded!


In [7]:
with open('valencia_data.json') as json_data:
    valencia_data = json.load(json_data)

In [87]:
neighborhoods_data = valencia_data['features']
# define the dataframe columns
column_names = ['Borough', 'Neighborhood', 'Latitude', 'Longitude'] 

# instantiate the dataframe
neighborhoods = pd.DataFrame(columns=column_names)
transformer = Transformer.from_crs("epsg:25830", "epsg:4326")  # convert utm30 to wgs84

for data in neighborhoods_data:
    borough = neighborhood_name = data['properties']['coddistrit'] 
    neighborhood_name = data['properties']['nombre']
        
    neighborhood_latlon = np.array(data['geometry']['coordinates'][0])
    
    #neighborhood_lat = neighborhood_latlon.mean(axis=0)[1]
    #neighborhood_lon = neighborhood_latlon.mean(axis=0)[0]
    neighborhood_lon, neighborhood_lat = transformer.transform(neighborhood_latlon.mean(axis=0)[0], neighborhood_latlon.mean(axis=0)[1])
    neighborhoods = neighborhoods.append({'Borough': borough,
                                          'Neighborhood': neighborhood_name,
                                          'x': neighborhood_latlon.mean(axis=0)[0],
                                          'y': neighborhood_latlon.mean(axis=0)[1],
                                          'Latitude': neighborhood_lat,
                                          'Longitude': neighborhood_lon}, ignore_index=True)

In [88]:
neighborhoods.style.format("{:.4f}")
pd.options.display.float_format = '{:.5f}'.format
neighborhoods.head()


Unnamed: 0,Borough,Neighborhood,Latitude,Longitude,x,y
0,17,BENIFARAIG,-0.38583,39.52843,724687.45042,4378683.63279
1,16,BENICALAP,-0.39022,39.4926,724425.06294,4374694.98757
2,15,TORREFIEL,-0.37592,39.49518,725646.59555,4375017.44694
3,5,TORMOS,-0.37955,39.48895,725354.34755,4374316.40432
4,5,SANT ANTONI,-0.37355,39.48838,725872.71353,4374268.68979


In [32]:
df_airbnb = pd.read_csv('https://code.montera34.com/airbnb/valencia/-/raw/master/data/original/airbnb/190227/listings_summary_valencia_insideairbnb.csv', index_col=False)

In [33]:
df_airbnb.head()

Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365
0,4620,VALENCIA HISTORIC HOUSE 50M BEACH,7093832,Francisca,POBLATS MARITIMS,LA MALVA-ROSA,39.47553,-0.32461,Entire home/apt,150,2,76,2019-01-21,0.91,1,305
1,45528,Valencia: El Saler Beach and Albufera Lake,202104,Lidia,POBLATS DEL SUD,EL SALER,39.35621,-0.32292,Entire home/apt,60,3,46,2018-12-08,0.65,1,50
2,48154,beautiful apartment wifi,219476,Toni,LA SAIDIA,MORVEDRE,39.48575,-0.37428,Entire home/apt,45,3,71,2018-12-06,0.7,3,345
3,55692,Delight in city center,120649,Pilar,CIUTAT VELLA,LA SEU,39.47681,-0.37523,Entire home/apt,100,1,1,2014-11-11,0.02,1,146
4,95393,Modern and bright Duplex 3 Bedrooms Center Wi-Fi,508042,Carmen,CIUTAT VELLA,EL CARME,39.47787,-0.38142,Entire home/apt,134,1,88,2019-02-22,0.94,6,167


In [109]:
np.sort(neighborhoods['Neighborhood'].unique())

array(['AIORA', 'ALBORS', 'ARRANCAPINS', 'BENICALAP', 'BENIFARAIG',
       'BENIFERRI', 'BENIMACLET', 'BENIMAMET', 'BETERO', 'BORBOTO',
       'CABANYAL-CANYAMELAR', 'CAMI DE VERA', 'CAMI FONDO', 'CAMI REAL',
       'CAMPANAR', 'CARPESA', "CASTELLAR-L'OLIVERAL",
       'CIUTAT DE LES ARTS I DE LES CIENCIES', 'CIUTAT FALLERA',
       'CIUTAT JARDI', 'CIUTAT UNIVERSITARIA', 'EL BOTANIC', 'EL CALVARI',
       'EL CARME', "EL FORN D'ALCEDO", 'EL GRAU', 'EL MERCAT',
       'EL PALMAR', 'EL PERELLONET', 'EL PILAR', 'EL PLA DEL REMEI',
       'EL SALER', 'ELS ORRIOLS', 'EN CORTS', 'EXPOSICIO', 'FAITANAR',
       'FAVARA', 'JAUME ROIG', "L'AMISTAT", "L'HORT DE SENABRE",
       "L'ILLA PERDUDA", 'LA CARRASCA', 'LA CREU COBERTA',
       'LA CREU DEL GRAU', 'LA FONTETA S.LLUIS', 'LA FONTSANTA',
       'LA GRAN VIA', 'LA LLUM', 'LA MALVA-ROSA', 'LA PETXINA',
       'LA PUNTA', 'LA RAIOSA', 'LA ROQUETA', 'LA SEU', 'LA TORRE',
       'LA VEGA BAIXA', 'LA XEREA', 'LES CASES DE BARCENA',
       'LES T

In [126]:
df = df_airbnb.groupby('neighbourhood')['price'].mean().to_frame().reset_index().rename(columns={'neighbourhood': 'Neighborhood'})


In [127]:
df.head()

Unnamed: 0,Neighborhood,price
0,AIORA,47.1123
1,ALBORS,53.35
2,ARRANCAPINS,59.62745
3,BENICALAP,45.8125
4,BENIFERRI,52.66667


In [128]:
df1 = df.merge(neighborhoods, on='Neighborhood')
df1.head()

Unnamed: 0,Neighborhood,price,Borough,Latitude,Longitude,x,y
0,AIORA,47.1123,12,-0.34449,39.46619,728444.62707,4371878.35507
1,ALBORS,53.35,12,-0.35297,39.46869,727707.01365,4372134.44271
2,ARRANCAPINS,59.62745,3,-0.38459,39.46453,724999.51236,4371592.98303
3,BENICALAP,45.8125,16,-0.39022,39.4926,724425.06294,4374694.98757
4,BENIFERRI,52.66667,18,-0.40749,39.49821,722921.89181,4375274.8001


In [129]:
df1.shape

(87, 7)

In [130]:
neighborhoods.shape

(88, 6)

In [166]:
data = gpd.read_file("valencia_data.json")
data.to_crs(epsg=4326).to_file("valencia_data_wgs.json", driver="GeoJSON")
m = folium.Map(location=[39.46, -.3], zoom_start=11)
folium.GeoJson(
    "valencia_data_wgs.json",
    name='geojson'
).add_to(m)


m

In [145]:
folium.GeoJson(data=(open("valencia_data.json", "r", encoding="utf-8-sig")).read())

<folium.features.GeoJson at 0x7fd7ce544358>