# DATA101 Final Project: Interactive Visualization Application
## Part 2: Non-Spatial Data Visualization

## Import Libraries

In [1]:
# Install Plotly
# Using pip
!pip install plotly

# Using conda
# conda install -c conda-forge plotly



In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import plotly.graph_objects as go
from plotly.subplots import make_subplots

from pathlib import Path
%pylab inline

%pylab is deprecated, use %matplotlib inline and import the required libraries.
Populating the interactive namespace from numpy and matplotlib


In [None]:
## Only do this if you're working on Google Colab

# from google.colab import drive
# drive.mount('/gdrive')
# %cd /gdrive

In [2]:
# when using Google Colab
# dataset_folder = Path('/gdrive/MyDrive/datasets')

# when using local folder
dataset_folder = Path('cleaned-datasets/')

## Datasets

In [3]:
# Primary Completion Rates
primary_completion = pd.read_csv(dataset_folder / 'Primary/' 'Primary_Completion_Rate_by_Region_and_Year.csv', index_col='Region')

# Primary Drop-out Rates
primary_dropout = pd.read_csv(dataset_folder / 'Primary/' 'Primary_Drop-out_Rate_by_Region_and_Year.csv', index_col='Region')

# Primary Net Enrollment Rates
primary_enrollment = pd.read_csv(dataset_folder / 'Primary/' 'Primary_Net_Enrollment_Rate_by_Region_and_Year.csv', index_col='Region')

# Secondary Completion Rates
secondary_completion = pd.read_csv(dataset_folder / 'Secondary/' 'Secondary_Completion_Rate_by_Region_and_Year.csv', index_col='Region')

# Secondary Drop-out Rates
secondary_dropout = pd.read_csv(dataset_folder / 'Secondary/' 'Secondary_Drop-out_Rate_by_Region_and_Year.csv', index_col='Region')

# Secondary Net Enrollment Rates
secondary_enrollment = pd.read_csv(dataset_folder / 'Secondary/' 'Secondary_Enrollment_Rate_by_Region_and_Year.csv', index_col='Region')

# Poverty Incidence Rates
poverty_incidence = pd.read_csv(dataset_folder / 'Interpolated_Poverty_Incidence_among_Population.csv', index_col='Region')

## Bar Chart

The bar chart is defined by the following filters:
- Region (Multi-select) <- horizontal if >2, vertical if 1
- Education Level (Primary or Secondary? Single-select)
- Education Metric (Enrollments, Completions, or Drop-outs? Single-select) <- this defines the top5 and widest bar to be featured

Interactivity:
- Year Range Slider: For single select, show as is ; For multi-select, show average

In [9]:
# Filters
regions = ['Region I', 'Region II', 'Region III', 'Region IV-A', 'Region IV-B']
educ_level = 'Secondary'
educ_metric = 'Enrollments'
years = ['2006', '2007', '2008', '2009', '2010']

if educ_level == 'Secondary':
    bar_completion_df = secondary_completion.loc[regions, years]
    bar_dropout_df = secondary_dropout.loc[regions, years]
    bar_enrollment_df = secondary_enrollment.loc[regions, years]
    
elif educ_level == 'Primary':
    bar_completion_df = primary_completion.loc[regions, years]
    bar_dropout_df = primary_dropout.loc[regions, years]
    bar_enrollment_df = primary_enrollment.loc[regions, years]

bar_completion_df = pd.DataFrame(bar_completion_df.mean(axis=1).round(1), columns=['Completions'])
bar_dropout_df = pd.DataFrame(bar_dropout_df.mean(axis=1).round(1), columns=['Dropouts'])
bar_enrollment_df = pd.DataFrame(bar_enrollment_df.mean(axis=1).round(1), columns=['Enrollments'])

bar_df = pd.merge(bar_completion_df, bar_dropout_df, on='Region', how='outer')
bar_df = pd.merge(bar_df, bar_enrollment_df, on='Region', how='outer')

bar_df = bar_df.sort_values(by=educ_metric, ascending=False)

bar_df

Unnamed: 0_level_0,Completions,Dropouts,Enrollments
Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Region I,81.4,5.1,58.2
Region III,76.3,6.9,55.1
Region IV-A,79.8,5.7,54.9
Region II,78.6,6.7,51.3
Region IV-B,72.4,8.8,50.5


In [10]:
if(len(regions) > 1):
  for i in range(0, len(regions)):    
      fig = go.Figure()
      
      fig.add_trace(go.Bar(x=[bar_df['Dropouts'].iloc[i]],
                          orientation='h',
                          name='Drop-outs',
                          marker=dict(color='#23B37F', line=dict(width=0)),
                          text = bar_df['Dropouts'].iloc[i],
                          texttemplate = "%{text}%",
                          textposition='auto')
      )

      fig.add_trace(go.Bar(x=[bar_df['Completions'].iloc[i]],
                          orientation='h',
                          name='Completions',
                          marker=dict(color='#00E08F', line=dict(width=0)),
                          text=bar_df['Completions'].iloc[i],
                          texttemplate = "%{text}%",
                          textposition='auto')
      )

      fig.add_trace(go.Bar(x=[bar_df['Enrollments'].iloc[i]],
                          orientation='h',
                          name='Enrollments',
                          marker=dict(color='#D5FBCB',line=dict(width=0)),
                          text=bar_df['Enrollments'].iloc[i],
                          texttemplate = "%{text}%",
                          textposition='auto')
      )

      fig.update_layout(showlegend=False)
      fig.update_xaxes(autorange='reversed')
      fig.update_layout(height=100, width=300, bargroupgap=0.15)
      fig.update_xaxes(visible=False)
      fig.update_yaxes(visible=False)
      fig.update_layout(margin=dict(l=130, r=15, t=15, b=15, pad=130))
      fig.update_layout(
          title={
              'text': f'{i+1}. {bar_df.index[i]}',
              'y':0.6,
              'x':0.06,
              'xanchor': 'left',
              'yanchor': 'middle'})
      fig.update_layout(plot_bgcolor='#446C37')
      fig.update_layout(title_font_color='#FFFFFF',
                        #title_font_family='Sansation-Regular',
                        title_font_size=12,
                      )
      fig.update_layout(font_color='#181717',
                        #font_family='Sansation-Regular',
                        font_size=9,
                      )
      fig.show()

In [6]:
# Filters
regions = ['Region I']
educ_level = 'Secondary'
educ_metric = 'Enrollments'
years = ['2006', '2007']

if educ_level == 'Secondary':
    bar_completion_df = secondary_completion.loc[regions, years]
    bar_dropout_df = secondary_dropout.loc[regions, years]
    bar_enrollment_df = secondary_enrollment.loc[regions, years]
    
elif educ_level == 'Primary':
    bar_completion_df = primary_completion.loc[regions, years]
    bar_dropout_df = primary_dropout.loc[regions, years]
    bar_enrollment_df = primary_enrollment.loc[regions, years]

bar_completion_df = pd.DataFrame(bar_completion_df.mean(axis=1).round(1), columns=['Completions'])
bar_dropout_df = pd.DataFrame(bar_dropout_df.mean(axis=1).round(1), columns=['Dropouts'])
bar_enrollment_df = pd.DataFrame(bar_enrollment_df.mean(axis=1).round(1), columns=['Enrollments'])

bar_solo_df = pd.merge(bar_completion_df, bar_dropout_df, on='Region', how='outer')
bar_solo_df = pd.merge(bar_solo_df, bar_enrollment_df, on='Region', how='outer')

bar_solo_df = bar_solo_df.sort_values(by=educ_metric, ascending=False)
bar_solo_df

Unnamed: 0_level_0,Completions,Dropouts,Enrollments
Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Region I,83.2,4.4,52.8


In [8]:
if (len(regions) == 1):
        fig = go.Figure()
        fig.add_trace(go.Bar(y=[bar_solo_df['Enrollments'].iloc[0]],
                        name='Enrollments',
                        marker=dict(color='#D5FBCB',line=dict(width=0)),
                        text=bar_solo_df['Enrollments'].iloc[0],
                        texttemplate = "%{text}%",
                        textposition='auto')
        )

        fig.add_trace(go.Bar(y=[bar_solo_df['Completions'].iloc[0]],
                        name='Completions',
                        marker=dict(color='#00E08F', line=dict(width=0)),
                        text=bar_solo_df['Completions'].iloc[0],
                        texttemplate = "%{text}%",
                        textposition='auto')
        )

        fig.add_trace(go.Bar(y=[bar_solo_df['Dropouts'].iloc[0]],
                        name='Drop-outs',
                        marker=dict(color='#23B37F', line=dict(width=0)),
                        text=bar_solo_df['Dropouts'].iloc[0],
                        texttemplate = "%{text}%",
                        textposition='auto')
        )

        fig.update_layout(showlegend=False)
        fig.update_xaxes(autorange='reversed')
        fig.update_layout(height=350, width=300, bargroupgap=0.15)
        fig.update_xaxes(visible=False)
        fig.update_yaxes(visible=False)
        fig.update_layout(margin=dict(l=15, r=15, t=15, b=15, pad=15))
        fig.update_layout(plot_bgcolor='#446C37')
        fig.update_layout(font_color='#181717',
                                #font_family='Sansation',
                                # font_size=12,
                                )
        
        fig.show()

## Line Chart

The line chart is defined by the following filters:

- Region (Multi-select) <- affects the legend, 1 color per region
- Education Level (Primary or Secondary? Single-select)
- Education Metric (Enrollments, Completions, or Drop-outs? Single-select) <- this defines the data to be used

Interactivity:
- Year Range Slider <- this identifies the x-axis values to be included

## Scatterplot

The scatterplot is defined by the following filters:

- Region (Multi-select) <- affects the legend, 1 color per region
- Education Level (Primary or Secondary? Single-select)
- Education Metric (Enrollments, Completions, or Drop-outs? Single-select) <- this defines the data to be used

Interactivity:
- Year Range Slider <- this identifies the x-axis values to be included