In [None]:
from geopandas import GeoDataFrame
import pandas as pd
from shapely.geometry import LineString
import typing as T
from itertools import zip_longest
import os
import folium
import numpy as np

# Data Collection

In [None]:
dist = pd.read_csv('../data/Enrollment.csv')

# Plot Barcelona streets

In [None]:
def convert_line_to_wkt(line: str) -> LineString:
    splitted_line = line.split(",")
    reshaped_line = reshape_line(splitted_line, chunksize=2, fillvalue=None)
    return LineString(reshaped_line)
    
def reshape_line(line: T.List[str], chunksize: int, fillvalue: int) -> T.Iterable[T.Tuple[float]]:
    """Reshape a line to match the LineString WKT format
    
    This is based on `zip_longest`, read more in 
    <https://docs.python.org/3/library/itertools.html#itertools.zip_longest> and
    in the StackOverflow solution posted in <https://stackoverflow.com/a/434411/5819113>
    
    
        zip_longest('ABCD', 'xy', fillvalue='-') --> Ax By C- D-
       
    """
    args = [iter(float(el) for el in line)] * chunksize
    return zip_longest(*args, fillvalue=fillvalue)

In [None]:
streets = pd.read_csv("../data/transit_relacio_trams.csv", sep=",")
# let's process the whole column
coords_wkt = streets["Coordenades"].apply(lambda line: convert_line_to_wkt(line))
crs = {'init': 'epsg:4326'}
gdf = GeoDataFrame(streets, crs=crs, geometry=coords_wkt)
ax = gdf.plot(cmap="viridis")
barcelona_map = folium.Map([41.3947,2.1557], zoom_start=12.4, tiles='cartodbpositron')
folium.GeoJson(gdf).add_to(barcelona_map)

In [None]:
list_schools = ['EEABB', 'EEBE', 'EETAC', 'EPSEB', 'EPSEM', 'ESEIAAT', 'ETSAB', 'ETSECCPB', 'ETSEIB', 'ETSETB', 'FIB', 'FME', 'FNB', 'FOOT']
data = pd.DataFrame({
   'lon':[1.9866716999999983, 
          2.2231519345468, 
          1.9872505, 
          2.1130787881117503, 
          1.8290409554431237, 
          2.020260374655512,
          2.115296240644385, 
          2.1114770846571855,
         2.11563799713493,
         2.112271912477879,
         2.113366253609662,
         2.1156621824795994,
         2.184497723790824,
         2.0236185376204054],
   'lat':[41.275919200000004, 
          41.414261499999995, 
          41.2755674, 
          41.3835842, 
          41.737127900000004, 
          41.56258005, 
          41.384277749999995, 
          41.38902913883299,
         41.38491311671433,
         41.38881605868467,
         41.38950017997591,
         41.38390531398507,
         41.38396190629874,
         41.56884538208665],
   'Name': list_schools
   #'Num_of_students':[10, 12, 40, 70, 23, 43, 100, 43]
}, dtype=str)
# add marker one by one on the map
for i in range(0,len(data)):
   folium.Marker(
      location=[data.iloc[i]['lat'], data.iloc[i]['lon']],
      popup=data.iloc[i]['Name'],
   ).add_to(barcelona_map)

# Show the map again
barcelona_map

# Municipalities

In [None]:

# files = glob.iglob('/home/jan/Misc/ds-codigos-postales/data*.geojson')
# gdfs = (gpd.read_file(file) for file in files) # generator


In [None]:
municipalities = pd.read_csv("../data/codigos_postales_municipios.csv", dtype="string")
municipalities.head()

In [None]:
import geopandas as gpd

path = "../data/codigos_postales.shp"
spain = gpd.read_file(path)
municipalities.codigo_postal = municipalities['codigo_postal'].str.zfill(5)
spain = pd.merge(spain, municipalities[['codigo_postal', 'municipio_nombre']], how='inner', left_on=['COD_POSTAL'], right_on=['codigo_postal'])
spain.head()


# Filtering

In [None]:
raw_student_data = pd.read_excel("../data/Datathon_Results_MOBILITY_2022_original_Students.xlsx", dtype='string')
raw_student_data.iloc[:, 6].astype("string").head()

In [None]:
raw_student_data.head()

In [None]:
spain.head()

In [None]:
students_per_municipality =pd.DataFrame({'codigo_postal': raw_student_data.iloc[:, 6].str.zfill(5)})
students_per_municipality.value_counts('codigo_postal')

In [None]:
def plot_municipality_heat_map(municipality_weights):
    municipality_weights = municipality_weights.copy()
    municipality_weights['zip_code'] = municipality_weights['zip_code'].dropna().astype(int).astype(str).str.zfill(5)
    spain_displayed = pd.merge(spain, municipality_weights, how='inner', left_on=['COD_POSTAL'], right_on=['zip_code'])
    # print(spain_displayed.head())
    
    spain_displayed['name'] = (spain_displayed['COD_POSTAL'] + " " + spain_displayed['municipio_nombre'])
    
    tooltip = folium.GeoJsonTooltip(
        fields=["name"],
        aliases=["Municipality"],
        localize=True,
        sticky=False,
        labels=True,
        style="""
            background-color: #F0EFEF;
            border: 1px solid black;
            border-radius: 3px;
            box-shadow: 3px;
        """,
        max_width=800,
    )
    style = lambda x: {'fillColor':'blue', "weight": 1, "color": "black", 'fillOpacity': x['properties']['weight'] }
    
    folium.GeoJson(spain_displayed,style_function=style, tooltip = tooltip).add_to(municipality_map)
    return municipality_map
    

In [None]:
dist.head()

In [None]:
municipality_map = folium.Map([41.3947,2.1557], zoom_start=12.4, tiles='cartodbpositron')

weights1=pd.DataFrame({"zip_code": dist['Postal Code Course'], "weight": dist['Total Course']})
weights1 = weights1.groupby('zip_code').sum()
weights1['weight'] = np.log(weights1['weight']) / np.log(weights1['weight'].max())
weights1 = weights1.reset_index()



In [None]:
plot_municipality_heat_map(weights1)


# Renta (renta disponible)

In [None]:
renta_data = pd.read_csv("../data/renta.csv")
renta_data['Unnamed: 0'] = renta_data['Unnamed: 0'].str[0:5] # WE loose names for zip code regions in this step
renta_data = renta_data.rename(columns={'Unnamed: 0': 'ZipCode'})
renta_data['ZipCode'] = pd.to_numeric(renta_data['ZipCode'], errors='coerce')
renta_data = renta_data[renta_data['ZipCode'].notna()].astype({'ZipCode': int})
renta_data