# Assessment 9

### 1. Import the data located at this link. It has information on Tech Institutes' total vacancies, total applicants, total entrants, and total enrolled. Moreover, the institutes are geolocated.

#### a) Installing and importing necessary packages

In [1]:
#!pip install folium
#!pip install branca

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import chardet

import folium as fm
from folium import Marker, GeoJson
from folium.plugins import MarkerCluster, HeatMap, StripePattern

import geopandas as gpd
from geopandas import GeoSeries
from shapely.geometry import Point, LineString

import branca as br 

#### b) Import the data of information on Tech Institutes' total vacancies, total applicants, total entrants, and total enrolled

In [None]:
# Keeps the necesary columns and helps choosing the type of each column
dtypes = {'cod_mod': str, 'ltimoden_metaatencion':int, 'cuentadeid_postulante_procesoadm': int, 'sumaden_flagingresante': int, 'sumaden_flagmatriculado': int, 'nlat_ie': float, 'nlong_ie': float} 
# We import the database that is formatted in CSV
df = pd.read_csv(r'../../_data/institutos1.csv', dtype = dtypes)
#We keep the columns that we are going to use
df = df[['cod_mod', 'ltimoden_metaatencion', 'cuentadeid_postulante_procesoadm', 'sumaden_flagingresante', 'sumaden_flagmatriculado', 'nlat_ie', 'nlong_ie']]
df

In [None]:
# Define a dictionary of labels for the variables
variable_labels = {'cod_mod': 'institute code', 'ltimoden_metaatencion': 'total vacancies', 'cuentadeid_postulante_procesoadm': 'total applicants', 'sumaden_flagingresante': 'total entrants', 'sumaden_flagmatriculado': 'total enrolled', 'nlat_ie': 'latitude', 'nlong_ie': 'longitude'}

# Assign labels to the variables in the DataFrame
for variable, label in variable_labels.items():
    df[variable].name = label
    
df.head(5)

#### c) Data frame that contain institutes dataset with shapefiles

In [None]:
#We create the geometry column, considering that the coordinates are in the WGS 84 system (World Geodetic System 1984)
df_geo = gpd.GeoDataFrame( df, crs = "EPSG:4326", 
                              geometry = gpd.points_from_xy( df.nlong_ie, 
                                                                df.nlat_ie))
# Import shapefile
df_shp = gpd.read_file(r'../../_data/INEI_LIMITE_DEPARTAMENTAL/INEI_LIMITE_DEPARTAMENTAL.shp')

#Use the overlay function to intersect institutes dataset with shapefiles
intersct_df_geo = gpd.overlay( df_geo, df_shp, how = 'intersection')

intersct_df_geo = intersct_df_geo[['cod_mod', 'ltimoden_metaatencion', 'cuentadeid_postulante_procesoadm', 'sumaden_flagingresante', 'sumaden_flagmatriculado', 'nlat_ie', 'nlong_ie', 'CCDD', 'geometry']]
intersct_df_geo

In [None]:
df_sum = ['ltimoden_metaatencion', 'cuentadeid_postulante_procesoadm', 'sumaden_flagingresante', 'sumaden_flagmatriculado']

total_dep = intersct_df_geo.groupby(['CCDD'], as_index = False)[ df_sum ].sum()

total_dep_shp = df_shp.merge(total_dep, on = 'CCDD')
total_dep_shp.head(2)

In [None]:
# We generate a Choropleth map of the total institutes' vacancies by the department
zoom_start = 5
lat_hos = df["nlat_ie"].mean()
long_hos = df["nlong_ie"].mean()
centroid_lat = lat_hos
centroid_lon = long_hos

m = fm.Map([centroid_lat, centroid_lon], tiles='cartodbpositron', zoom_start=zoom_start)

fm.Choropleth(geo_data=df_shp,
            data=total_dep_shp,
            columns=["CCDD", "ltimoden_metaatencion"],
            key_on="feature.properties.CCDD",
            fill_color="YlOrRd",
            fill_opacity=0.8,
            line_opacity=0.2,
            legend_name="Total Vacancies",
            smooth_factor=0,
            Highlight= True,
            line_color = "#0000",
            overlay=True,
            nan_fill_color = "White"  # fill white missing values 
            ).add_to(m)

fm.LayerControl().add_to(m)

m

### 2. Make an interactive map of markes using visual_html function and show total vacancies, applicants, entrants, and enrolled information in a table popup. Additionally, use "university" as icon.

In [None]:
# Checking unique values in "cod_mod" shapefile column
print("The column 'Cod_mod' has only unique values:", intersct_df_geo['cod_mod'].is_unique)
print("The number of unique values is:" , intersct_df_geo['cod_mod'].unique().size)

In [None]:
# Function create table by each institute using html. This funtion will be aplly by each row
# Almost alway each code on html requires a beginnig <p> and ending </p> 

def visual_html(i):
 
    # information by institute 

    
    Vacancies = intersct_df_geo['ltimoden_metaatencion'].iloc[i]                             
    Applicants = intersct_df_geo['cuentadeid_postulante_procesoadm'].iloc[i]                           
    Entrants = intersct_df_geo['sumaden_flagingresante'].iloc[i]  
    Enrolled = intersct_df_geo['sumaden_flagmatriculado'].iloc[i]  
    
    # Color by each column of table 
    
    left_col_colour = "#118000"
    right_col_colour = "#006f80"
    
    html = """<!DOCTYPE html>
<html>

<head>
    <p> Institute Data </p>

</head>
    <table style="height: 126px; width: 200px;">  <!-- Comment: Create a teable. -->

<!-- Add information  -->

<tbody> 
<tr>

<!-- Add color by column -->

<td style="background-color: """+ left_col_colour +""";"><span style="color: #ffffff;">Total Vacancies</span></td>
<td style="width: 70px;background-color: """+ right_col_colour +""";">{}</td>""".format(Vacancies) + """
</tr>
<tr>
<td style="background-color: """+ left_col_colour +""";"><span style="color: #ffffff;">Total Applicants</span></td>
<td style="width: 70px;background-color: """+ right_col_colour +""";">{}</td>""".format(Applicants) + """
</tr>
<tr>
<td style="background-color: """+ left_col_colour +""";"><span style="color: #ffffff;">Total Entrants</span></td>
<td style="width: 70px;background-color: """+ right_col_colour +""";">{}</td>""".format(Entrants) + """
</tr>
<tr>
<td style="background-color: """+ left_col_colour +""";"><span style="color: #ffffff;">Total Enrolled</span></td>
<td style="width: 70px;background-color: """+ right_col_colour +""";">{}</td>""".format(Enrolled) + """
</tr>

</tbody>
</table>
</html>
"""
    return html

In [None]:
ubication = intersct_df_geo['nlat_ie'].mean(), intersct_df_geo['nlong_ie'].mean()  # Average point

inst = fm.Map(location = ubication, zoom_start=7)

for i in range(0,len(intersct_df_geo)):
    html = visual_html(i)

    iframe = br.element.IFrame(html=html,width=350,height=300)
    popup = fm.Popup(iframe, parse_html=True)
    
    fm.Marker([intersct_df_geo['nlat_ie'].iloc[i], intersct_df_geo['nlong_ie'].iloc[i]],
                  popup=popup, icon=fm.Icon(color= 'blue', icon='university', prefix="fa")).add_to(inst)

inst.save("Institute_data.html")
inst