In [53]:
import pandas as pd
import numpy as np
from IPython.display import display, clear_output, HTML, Image, IFrame

import io
import ipywidgets as widgets
from ipywidgets import interact, interact_manual

from io import BytesIO
from PIL import Image
import urllib.request
import requests as req


In [54]:
%%javascript
//To remove scroll from any output area and automatically extend the jupyter cell
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

In [55]:
my_style = {'description_width': 'initial'}
my_style_two = {'description_width': '150px'}
my_layout = {'width': '600px'}
my_layout_short = {'width': '300px'}
my_layout_shortest = {'width': '200px'}
FLAG__ON_BINDER = True
FLAG__FROM_GIT = True

DATA_FRAME_PATH_LOCAL = './data_to_display.csv'
DATA_FRAME_PATH_GIT_URL = 'https://raw.githubusercontent.com/aideenf/AIVC/master/cp_wssc/Data/Iterative-models-building/Classification%20results/Conventions/'
DATA_FRAME_PATH_LOCAL_BINDER  = '/home/jovyan/cp_wssc/Data/Iterative-models-building/Classification results/Conventions/Audited/'
    

In [56]:
def image_to_byte_array(image:Image):
  imgByteArr = io.BytesIO()
  image.save(imgByteArr, format=image.format)
  imgByteArr = imgByteArr.getvalue()
  return imgByteArr

In [57]:
data_to_display_df = pd.read_csv(DATA_FRAME_PATH_LOCAL, index_col = False)
data_to_display_df = data_to_display_df.drop("Unnamed: 0", axis = 1).drop("id", axis = 1)

data_to_display_df['title_lower'] = data_to_display_df["title"].apply(lambda string: string.lower())
# display(data_to_display_df.head())

# print("Num rows", data_to_display_df.shape[0])
# print("Num cols",data_to_display_df.shape[1])

In [58]:
file_size_list = data_to_display_df.size_kb.unique()
max_usability_rating = data_to_display_df.usabilityRating.max()
min_usability_rating = data_to_display_df.usabilityRating.min()

my_intro_html= """<h4>About this project</h4><p>This tool visualizes the results of my final 
project with AkademyAI. For this project I was interested to aggregate relevant machine learning datasets
in accordance with the UN's sustainable development goals. <p>As a first step towards accomplishing
this goal, my project has dealt with classifying relevant health and well being (target 3) datasets
from the Kaggle API using NLP. The NLP model has been trained on dataset descriptions scraped
and cleaned from Kaggle, as well as text labeled specifically as being target 3 related from a previosu
Zindi Competition. (https://zindi.africa/competitions/sustainable-development-goals-sdgs-text-classification-challenge).</p>
<p>The datasets displayed with this tool have been prefiltered based on usability rating (>.5) and
size (greater than 10MB) inorder to maintain a quality level suitable for machine learning and
data science projects. Additional functionality allows to move backwards and forwards through
the datasets, as well as further filtering results with the Usability rating sliders and file 
size drop down menu (although the prefilled entry for file size "Any" is recommended). Links to the
datasets page on kaggle, as well as a direct download link, allow access to the datasets listed.</p>
<p>Ultimately, the use of a mixed Zindi and Kaggle training set has made the NLP model slightly
more robust in correctly indentifying health related datasets from Kaggle. This indicates that
including text scraped from a variety of media sources (articles, NGO descriptions, newsheadlines)
could help in the tagging of other SDG targets in addition to target 3 as an expansion of this project. </p>
</p> """     
about_SDGS_html = """<h4> About Sustainable Development Goals </h4><p> Sustainable Development
Goals (SDGs) were created by the UN in 2016 outlining 17 humanitarian goals to be accomplished by
2030. The 17 goals, or targets, are as follows: GOAL 1: No Poverty

<p> GOAL 2: Zero Hunger </p>

<p> GOAL 3: Good Health and Well-being </p>

<p> GOAL 4: Quality Education </p>

<p> GOAL 5: Gender Equality </p>

<p> GOAL 6: Clean Water and Sanitation </p>

<p> GOAL 7: Affordable and Clean Energy </p>

<p> GOAL 8: Decent Work and Economic Growth </p>

<p> GOAL 9: Industry, Innovation and Infrastructure </p>

<p> GOAL 10: Reduced Inequality </p>

<p> GOAL 11: Sustainable Cities and Communities </p>

<p> GOAL 12: Responsible Consumption and Production </p>

<p> GOAL 13: Climate Action </p>

<p> GOAL 14: Life Below Water </p>

<p> GOAL 15: Life on Land </p>

<p> GOAL 16: Peace and Justice Strong Institutions </p>

<p> GOAL 17: Partnerships to achieve the Goal</p> 

<p> Currently this project has functionality to aggregate Goal/Target 3: Health and well-being 
machine learning datasets from Kaggle. The indicators of Target 3 aim to ensure healthy lives and promoting the well-being 
at all ages is essential to sustainable development.  Health and well-being can be quantified by a variety of 
indicators, including materinal mortality rate, access to healthcare and family planning services, epidemics and
disease in certain populations (HIV), as well as addiction and mental health rates. </p>



</p>  """  


#image
IMAGE_PATH = 'http://www.euro.who.int/__data/assets/image/0020/392042/SDG3-600-collage.png'
image = Image.open(urllib.request.urlopen(IMAGE_PATH))
image = image_to_byte_array(image)


In [59]:
selectHTMLHead = widgets.HTML(
            "<font color = '#8c8c8c'><h3><center>*** Model Classification, Audit Tool ***</center></h3></font>")

selectHTML = widgets.HTML(
            "<font color = '#8c8c8c'><h4><left>--- Filter ---</left></h4></font>")

space = widgets.Label('  ', layout=widgets.Layout(width='100%'))




accordion = widgets.Accordion(children=
                              [widgets.HTML(my_intro_html),
                               widgets.HTML(about_SDGS_html),
                               widgets.Image(value=image,format='png',width=1200,height=1200)
                              ])


accordion.set_title(0, 'About this project')
accordion.set_title(1, 'About SDGs')
accordion.set_title(2, 'Target 3: Health and Well-being Indicators')
accordion.selected_index = 0

file_size_list = data_to_display_df.size_kb.unique()
file_size_list = np.insert(file_size_list, 0, "Any")
file_size_drop_down = widgets.Dropdown(
    options =  file_size_list,
    description="File size :",
     #value=None,
    disabled=False,
    style = my_style_two,
    layout = my_layout
)



usability_rating_slider_min = widgets.FloatSlider(
    min=min_usability_rating, 
    max=max_usability_rating,
    value = min_usability_rating,
    step = 0.01,
    description='Min Usability level:',
    style = my_style_two,
    layout = my_layout
    )
usability_rating_slider_min.style.handle_color = '#5c85d6'


usability_rating_slider_max = widgets.FloatSlider(
    min=min_usability_rating, 
    max=max_usability_rating, 
    value = max_usability_rating,
    step = 0.01,
    description='Max Usability level:',
    style = my_style_two,
    layout = my_layout)
usability_rating_slider_max.style.handle_color = '#5c85d6'



search_box_title = widgets.Text(value='', 
                              description='Search Title:',
                              style = my_style_two,
                              layout = my_layout)


forward_button = widgets.Button(
            description= "Forward",
            disabled=False,
            button_style='',
            tooltip='View next dataset')
forward_button.style.button_color = '#e68a00'



back_button = widgets.Button(
            description='Back',
            disabled=False,
            button_style='',
             tooltip='View previous dataset')
back_button.style.button_color = '#d9d9d9'

last_clicked = "forward"

def forward_button_clicked(b):
    global data_to_display_df
    global last_clicked
    last_clicked = "forward"
   
    with output:
        clear_output(wait=True)
        print ("Forward button clicked") 
        output_function(file_size_drop_down.value,
                        usability_rating_slider_min.value,
                        usability_rating_slider_max.value,
                        search_box_title.value)
        

def back_button_clicked(b):
    global data_to_display_df
    global last_clicked
    last_clicked = "back"
   
    with output:
        clear_output(wait=True)
        print ("Back button clicked") 
        output_function(file_size_drop_down.value,
                        usability_rating_slider_min.value,
                        usability_rating_slider_max.value,
                        search_box_title.value)
        


            
            
def output_function(file_size_drop_down_value,
                        usability_rating_slider_min_value,
                        usability_rating_slider_max_value,
                        search_box_title_value):
    
#     print ("output function called")
    global data_to_display_df
    global last_clicked
    file_size_drop_down.disable = False
    usability_rating_slider_min.disable = False
    usability_rating_slider_max.disable = False
    search_box_title.disable = False
    
#     display (data_to_display_df.head())
    
    #print (file_size_drop_down_value)
    #print (usability_rating_slider_min_value)
    #print (usability_rating_slider_max_value)
    #print (search_box_title_value)
    #print (last_clicked)
    
    row_to_display = pd.DataFrame()
    
    if search_box_title_value != '':
        file_size_drop_down.disable = True
        usability_rating_slider_min.disable = True
        usability_rating_slider_max.disable = True
        filtered_result =  data_to_display_df.loc[(data_to_display_df['title_lower'].str.contains(search_box_title_value))]
                                              
    
    elif file_size_drop_down_value == "Any":
        filtered_result =  data_to_display_df.loc[(data_to_display_df['usabilityRating'] > usability_rating_slider_min_value ) &
                                              (data_to_display_df['usabilityRating'] < usability_rating_slider_max_value )]
     
    
    
    else:
        filtered_result =  data_to_display_df.loc[(data_to_display_df['size_kb'] == file_size_drop_down_value) &
                                              (data_to_display_df['usabilityRating'] > usability_rating_slider_min_value ) &
                                              (data_to_display_df['usabilityRating'] < usability_rating_slider_max_value )]
         
#     display ( filtered_result.head())
    print ( "Num filtered rows", filtered_result.shape[0])
    
    if not filtered_result.empty:
        
#         row_to_display = filtered_result.iloc[current_position]
        row_to_display = filtered_result.sample(n = 1)
        
        title = row_to_display.iloc[0].title
        subtitle = row_to_display.iloc[0].subtitle
        rating = row_to_display.iloc[0].usabilityRating
        size = row_to_display.iloc[0].size_kb
    
        kaggle_url = row_to_display.iloc[0].url
        zip_file_url = "https://www." + kaggle_url + "/download"
        url = "https://www." + kaggle_url

    
        txt = "The number of filtered samples is " + str(filtered_result.shape[0])
        display(HTML("<font color='purple'>" +  txt + "</font>"))
        display(HTML("<font color='green'>" + "Title: " + title + "</font>"))
        display(HTML("<font color='green'>" + "Sub Title: " + subtitle + "</font>"))
        display(HTML("<font color='green'>" + "Usability Rating: " + str(round(rating, 2)) + "</font>"))
        display(HTML("<font color='green'>" + "File Size: " + size + "</font>"))
        print ("Kaggle URL", url)
        print ("Download Data:",  zip_file_url)
    else:
        print ("Relax your filter baby")
       
        
                       
    
output = widgets.interactive_output(output_function, {'file_size_drop_down_value' :file_size_drop_down,
                                    'usability_rating_slider_min_value' :usability_rating_slider_min,
                                    'usability_rating_slider_max_value':usability_rating_slider_max,
                                    'search_box_title_value':search_box_title})

forward_button.on_click(forward_button_clicked)
back_button.on_click(back_button_clicked)


display(
        selectHTMLHead,
        accordion,
        space,
        selectHTML,
        space,
        widgets.VBox([file_size_drop_down,
                        usability_rating_slider_min,
                        usability_rating_slider_max,
                        search_box_title]),
        
        space,
        widgets.HBox([back_button,forward_button]),
        output
        
    )

HTML(value="<font color = '#8c8c8c'><h3><center>*** Model Classification, Audit Tool ***</center></h3></font>"…

Accordion(children=(HTML(value='<h4>Using the audit tool</h4><p>This tool will randomly select and display a s…

Label(value='  ', layout=Layout(width='100%'))

HTML(value="<font color = '#8c8c8c'><h4><left>--- Filter ---</left></h4></font>")

Label(value='  ', layout=Layout(width='100%'))

VBox(children=(Dropdown(description='File size :', layout=Layout(width='600px'), options=('Any', '267MB', '3GB…

Label(value='  ', layout=Layout(width='100%'))

HBox(children=(Button(description='Back', style=ButtonStyle(button_color='#d9d9d9'), tooltip='View previous da…

Output()