# DATASET & CHARTS

## Basic Libraries

In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt

from shapely.geometry import Point

import plotly.express as px
import plotly.graph_objects as go


%pylab inline

Populating the interactive namespace from numpy and matplotlib


## Dataset

In [2]:
url = 'https://raw.githubusercontent.com/francheska-vicente/datapre-project/main_v2/data_output/combined_data.csv'
sdg_data = pd.read_csv (url)
sdg_data 

Unnamed: 0,Geolocation,Year,1.2.1 Poverty Proportion,1.4.1 Net Elem Enrolment Rate,1.4.1 Net Elem Enrolment Rate (Girls),1.4.1 Net Elem Enrolment Rate (Boys),1.4.1 Net JHS Enrolment Rate,1.4.1 Net JHS Enrolment Rate (Girls),1.4.1 Net JHS Enrolment Rate (Boys),1.4.1 Net SHS Enrolment Rate,...,Gross Capital Formation,GRDP,Population,Primary Drop-out rate,Primary Drop-out rate (Girls),Primary Drop-out rate (Boys),Secondary Drop-out rate,Secondary Drop-out rate (Girls),Secondary Drop-out rate (Boys),Price Index for Agriculture
0,PHILIPPINES,2000,,96.77,97.28,96.27,66.06,69.49,62.72,,...,579938180.0,3.697556e+09,76723051.0,,,,,,,
1,NCR: National Capital Region,2000,,101.00,101.92,100.13,79.05,79.50,78.57,,...,203930819.0,1.237451e+09,9961971.0,,,,,,,
2,CAR: Cordillera Administrative Region,2000,,94.42,94.58,94.26,71.19,76.37,66.14,,...,13865180.0,9.044601e+07,1369249.0,,,,,,,
3,Region 1: Ilocos Region,2000,,97.73,97.01,98.41,87.51,90.05,85.07,,...,24454284.0,1.289450e+08,4209083.0,,,,,,,
4,Region 2: Cagayan Valley,2000,,95.65,95.74,95.57,77.11,81.11,73.31,,...,32773347.0,8.593798e+07,2819641.0,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
391,Region 10: Northern Mindanao,2021,,,,,,,,,...,164566009.0,9.492320e+08,,,,,,,,107.975
392,Region 11: Davao Region,2021,,,,,,,,,...,257595240.0,9.672276e+08,,,,,,,,110.850
393,Region 12: SOCCSKSARGEN,2021,,,,,,,,,...,87077953.0,5.039756e+08,,,,,,,,103.350
394,CARAGA: CARAGA Administrative Region,2021,,,,,,,,,...,100730468.0,3.317629e+08,,,,,,,,104.525


## Line Charts

In [36]:
regions_selected = []

In [37]:
regions_selected = ['NCR: National Capital Region', 'Region 1: Ilocos Region']

In [38]:
indicators_selected = ['1.4.1 Net JHS Enrolment Rate', '1.2.1 Poverty Proportion']

In [48]:
two_region = pd.DataFrame ()

for region in regions_selected:
    if len (indicators_selected) > 1:
        temp_region = sdg_data [sdg_data['Geolocation'] == region][['Year', indicators_selected [0], indicators_selected [1]]]
    else:
        temp_region = sdg_data [sdg_data['Geolocation'] == region][['Year', indicators_selected [0]]]
    
    temp_region = pd.concat ([sdg_data['Geolocation'], temp_region], axis=1)
    
    two_region = pd.concat([two_region, temp_region])
    
two_region = two_region.reset_index (drop = True)
two_region

Unnamed: 0,Geolocation,Year,1.4.1 Net JHS Enrolment Rate,1.2.1 Poverty Proportion
0,PHILIPPINES,,,
1,NCR: National Capital Region,2000.0,79.05,
2,CAR: Cordillera Administrative Region,,,
3,Region 1: Ilocos Region,,,
4,Region 2: Cagayan Valley,,,
...,...,...,...,...
787,Region 10: Northern Mindanao,,,
788,Region 11: Davao Region,,,
789,Region 12: SOCCSKSARGEN,,,
790,CARAGA: CARAGA Administrative Region,,,


In [49]:
def line_update_layout (title, label):
    fig.update_layout(
        # TITLE

        title={'text' : title, 'y': 0.95, 'x' : 0.5, # Position of the title
              # 'xanchor': 'center', 'yanchor': 'top'
              },
        title_font_family="Cambria",
        title_font_color="#000000",
        title_font_size=20,


        # axis and legend font
        font_family="Cambria",
        font_color="#000000",


        # x-axis
        xaxis_title='Year',

        xaxis=dict(
            showline=True,
            showgrid=False,
            showticklabels=True,
            linecolor='#000000',
            linewidth=2,
            ticks='outside',
            tickfont=dict(
                family='Cambria',
                size=16,
                color='#000000',
            ),
        ),


        # y-axis
        yaxis_title = label, 

        yaxis=dict(
            showgrid=False,
            showline=True,
            showticklabels=True,
            linecolor='#000000',
            linewidth=2,
            ticks='outside',
            tickfont=dict(
                family='Cambria',
                size=16,
                color='#000000',
            ),

        ),

        hovermode="x unified",

        autosize=True,

        # MARGIN
        # margin=dict(autoexpand=False, l=100, r=20,t=110),

        showlegend=True,

        paper_bgcolor="LightSteelBlue", # BG COLOR OUTSIDE CHART

        plot_bgcolor='light gray' # BG COLOR INSIDE CHART
    )
    
    return fig

In [50]:
print (indicators_selected)

['1.4.1 Net JHS Enrolment Rate', '1.2.1 Poverty Proportion']


In [54]:
for indicator in indicators_selected:
    label = " ".join (indicator.split (' ') [1 : ])
    df_visualization = two_region [['Geolocation', 'Year', indicator]]
    df_visualization = df_visualization.dropna ()
    
    x_axis_values = df_visualization ['Year'].unique ()
    
    fig = px.line(df_visualization, x='Year', y = indicator, markers=True,
             labels={indicator: label}, color = 'Geolocation') 
    title = " ".join (indicator.split (' ') [1 : ]) + ' per Year'
    fig = line_update_layout (title, label)
    fig.update_xaxes(type='category')
    
    fig.show ()

## Bar Charts

### HORIZONTAL BAR CHART

In [None]:
fig = px.bar(on_region_AA, y='Geolocation', x='1.4.1 Net JHS Enrolment Rate', orientation='h', 
             #color='Geolocation',
             #hover_data=["tip", "size"],
             #height=400
            )

fig.update_layout(
    
    # TITLE
    
    title={'text':'Progress of the Regions', 'y':0.95, 'x':0.5, # Position of the title
          # 'xanchor': 'center', 'yanchor': 'top'
          },    
    title_font_family="Cambria",
    title_font_color="#000000",
    title_font_size=20,
    
    
    # axis font
    font_family="Cambria",
    font_color="#000000",
    
    
    # x-axis
    xaxis_title='Net JHS Enrolment Rate',
    
    xaxis=dict(
        showline=True,
        showgrid=False,
        showticklabels=True,
        linecolor='#000000',
        linewidth=2,
        ticks='outside',
        tickfont=dict(
            family='Cambria',
            size=14,
            color='#000000'
        ),
    ),
    
    
    # y-axis
    yaxis_title='Geolocation',
    yaxis=dict(
        {'categoryorder':'total ascending'}, # ascending x-values from bottom to top
        showgrid=False,
        showline=True,
        showticklabels=True,
        linecolor='#000000',
        linewidth=2,
        ticks='outside',
        tickfont=dict(
            family='Cambria',
            size=10,
            color='#000000'
        ),
    ),
    
    autosize=True,
    
    # margin=dict(autoexpand=False, l=100, r=20,t=110),
    
    showlegend=True,
    
    plot_bgcolor='light grey'
)

fig.show()

### GROUPED BAR CHART

In [None]:
fig = go.Figure(
    data=[
        go.Bar(name='Net JHS Enrolment Rate (%)', y=tw_region_AA['Geolocation'], x=tw_region_AA['1.4.1 Net JHS Enrolment Rate'], xaxis='x', offsetgroup=1, orientation='h'),
        go.Bar(name='Primary Drop-out rate (%)', y=tw_region_AA['Geolocation'], x=tw_region_AA['Primary Drop-out rate'], xaxis='x2', offsetgroup=2, orientation='h')
    ],
    layout={
        'xaxis': {'title': 'Net JHS Enrolment Rate (%)'},
        'xaxis2': {'title': 'Primary Drop-out rate (%)', 'overlaying': 'x', 'side': 'top'}
    }
)

# Change the bar mode
fig.update_layout(
    
    barmode='group',

    
    # TITLE
    
    title={'text':'Progress of the Regions', 'y':0.95, 'x':0.2, # Position of the title
          # 'xanchor': 'center', 'yanchor': 'top'
          },    
    title_font_family="Cambria",
    title_font_color="#000000",
    title_font_size=20,
    
    
    # axis font
    font_family="Cambria",
    font_color="#000000",
    
    
    # x-axis 1
    xaxis=dict(
        showline=True,
        showgrid=False,
        showticklabels=True,
        linecolor='#000000',
        linewidth=2,
        ticks='outside',
        tickfont=dict(
            family='Cambria',
            size=14,
            color='#000000',
        ),
    ),
    
    
    # x-axis 2
    xaxis2=dict(
        showline=True,
        showgrid=False,
        showticklabels=True,
        linecolor='#000000',
        linewidth=2,
        ticks='outside',
        tickfont=dict(
            family='Cambria',
            size=14,
            color='#000000',
        ),
    ),
    
    
    # y-axis
    yaxis_title='Geolocation',
    yaxis=dict(
        {'categoryorder':'total ascending'}, # ascending values from bottom to top
        showgrid=False,
        showline=True,
        showticklabels=True,
        linecolor='#000000',
        linewidth=2,
        ticks='outside',
        tickfont=dict(
            family='Cambria',
            size=10,
            color='#000000',
        ),
    ),
    
    
    # LEGEND
    
    legend=dict(orientation="h", # horizontal orientation
        
        # entrywidth=70, # spacing in between legends, can't make it work sa akin

        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1,
        
        traceorder="reversed",
        
        title_font_color="#000000",
        title_font_family="Times New Roman",
        
        font=dict(
            family="Courier",
            size=12,
            color="black"
            ),
        
        bgcolor="LightSteelBlue", # legend box
        bordercolor="Black",
        borderwidth=1
        ),
    
    hovermode="y unified",
    
    autosize=True,
    
    # margin=dict(autoexpand=False, l=100, r=20,t=110),
    
    showlegend=True,
    
    plot_bgcolor='light grey'

)

fig.show()