# DATASET & CHARTS

## Basic Libraries

In [None]:
import pandas as pd
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt

from shapely.geometry import Point

import plotly.express as px
import plotly.graph_objects as go


%pylab inline

## Dataset

In [None]:
url = 'https://raw.githubusercontent.com/francheska-vicente/datapre-project/main_v2/data_output/combined_data.csv'
sdg_data = pd.read_csv (url)
sdg_data 

## Line Charts

In [None]:
regions_selected = []

In [None]:
regions_selected = ['NCR: National Capital Region', 'Region 1: Ilocos Region']

In [None]:
indicators_selected = []

In [None]:
indicators_selected = ['1.4.1 Net JHS Enrolment Rate', '1.2.1 Poverty Proportion']

In [None]:
two_region = pd.DataFrame ()

for region in regions_selected:
    if len (indicators_selected) > 1:
        temp_region = sdg_data [sdg_data['Geolocation'] == region][['Year', indicators_selected [0], indicators_selected [1]]]
    else:
        temp_region = sdg_data [sdg_data['Geolocation'] == region][['Year', indicators_selected [0]]]
    
    temp_region = pd.concat ([sdg_data['Geolocation'], temp_region], axis=1)
    
    temp_region = temp_region.dropna (thresh = len (indicators_selected) + 1)
    temp_region ['Year'] = temp_region ['Year'].astype('int')
    
    two_region = pd.concat([two_region, temp_region])
    
two_region = two_region.reset_index (drop = True)
two_region

In [None]:
def line_update_layout (fig, title, label):
    fig.update_layout(
        # TITLE

        title={'text' : title, 'y': 0.95, 'x' : 0.5, # Position of the title
              # 'xanchor': 'center', 'yanchor': 'top'
              },
        title_font_family="Cambria",
        title_font_color="#000000",
        title_font_size=20,


        # axis and legend font
        font_family="Cambria",
        font_color="#000000",


        # x-axis
        xaxis_title='Year',

        xaxis=dict(
            showline=True,
            showgrid=False,
            showticklabels=True,
            linecolor='#000000',
            linewidth=2,
            ticks='outside',
            tickfont=dict(
                family='Cambria',
                size=16,
                color='#000000',
            ),
        ),


        # y-axis
        yaxis_title = label, 

        yaxis=dict(
            showgrid=False,
            showline=True,
            showticklabels=True,
            linecolor='#000000',
            linewidth=2,
            ticks='outside',
            tickfont=dict(
                family='Cambria',
                size=16,
                color='#000000',
            ),

        ),

        hovermode="x unified",

        autosize=True,

        # MARGIN
        # margin=dict(autoexpand=False, l=100, r=20,t=110),

        showlegend=True,

        paper_bgcolor="LightSteelBlue", # BG COLOR OUTSIDE CHART

        plot_bgcolor='light gray' # BG COLOR INSIDE CHART
    )
    
    return fig

In [None]:
print (indicators_selected)

In [None]:
for indicator in indicators_selected:
    label = " ".join (indicator.split (' ') [1 : ])
    df_visualization = two_region [['Geolocation', 'Year', indicator]]
    df_visualization = df_visualization.dropna ()
    
    x_axis_values = df_visualization ['Year'].unique ()
    
    fig = px.line(df_visualization, x='Year', y = indicator, markers=True,
             labels={indicator: label}, color = 'Geolocation') 
    title = " ".join (indicator.split (' ') [1 : ]) + ' per Year'
    fig = line_update_layout (fig, title, label)
    fig.update_xaxes(type='category')
    
    fig.show ()

## Bar Charts

### HORIZONTAL BAR CHART

In [None]:
regions_selected = []

In [None]:
regions_selected = ['NCR: National Capital Region', 'Region 1: Ilocos Region']

In [None]:
indicators_selected = []

In [None]:
indicators_selected = ['1.4.1 Net JHS Enrolment Rate', '1.2.1 Poverty Proportion']

In [None]:
two_region = pd.DataFrame ()

for indicartor in indicators_selected:
    if len (indicators_selected) > 1:
        temp_region = sdg_data [['Year', indicators_selected [0], indicators_selected [1]]]
    else:
        temp_region = sdg_data [['Year', indicators_selected [0]]]
    
    temp_region = pd.concat ([sdg_data['Geolocation'], temp_region], axis=1)
    temp_region = temp_region [temp_region ['Geolocation'] != 'PHILIPPINES']
    temp_region = temp_region.dropna (thresh = len (indicators_selected) + 1)
    temp_region ['Year'] = temp_region ['Year'].astype('int')
    
    two_region = pd.concat([two_region, temp_region])
    
two_region = two_region.reset_index (drop = True)
two_region

In [None]:
def bar_update_layout (fig, title, label):
    fig.update_layout(
    # TITLE
    
        title={'text': title, 'y':0.95, 'x':0.5, # Position of the title
              # 'xanchor': 'center', 'yanchor': 'top'
              },    
        title_font_family="Cambria",
        title_font_color="#000000",
        title_font_size=20,

        # axis font
        font_family="Cambria",
        font_color="#000000",


        # x-axis
        xaxis_title= label,

        xaxis=dict(
            showline=True,
            showgrid=False,
            showticklabels=True,
            linecolor='#000000',
            linewidth=2,
            ticks='outside',
            tickfont=dict(
                family='Cambria',
                size=14,
                color='#000000',
            ),
        ),

        # y-axis
        yaxis_title='Geolocation',
        yaxis=dict(
            {'categoryorder':'total ascending'}, # ascending values from bottom to top
            showgrid=False,
            showline=True,
            showticklabels=True,
            linecolor='#000000',
            linewidth=2,
            ticks='outside',
            tickfont=dict(
                family='Cambria',
                size=10,
                color='#000000',
            ),
        ),

        autosize=True,

        # margin=dict(autoexpand=False, l=100, r=20,t=110),

        showlegend=True,

        plot_bgcolor='light grey'
    )

    return fig

In [None]:
geolocation_values = []
for temp in sdg_data ['Geolocation'].unique () [1 :]:
    temp = temp.split (":")
    geolocation_values.append (temp [1])

In [None]:
for indicator in indicators_selected:
    label = " ".join (indicator.split (' ') [1 : ])
    df_visualization = two_region [['Geolocation', 'Year', indicator]]
    df_visualization = df_visualization.dropna ()
    
    year_values = df_visualization ['Year'].unique ()
    
    df_visualization_curr = df_visualization [df_visualization ['Year'] == year_values [-1]]
    df_visualization_curr = df_visualization_curr.drop_duplicates ()
    fig = px.bar(df_visualization_curr, x = indicator, y = geolocation_values,
             labels={indicator: label}, color = 'Geolocation') 
    title = " ".join (indicator.split (' ') [1 : ]) + ' of the Year ' + str (year_values [-1])
    fig = bar_update_layout (fig, title, label)
    
    fig.show ()