# Using Nanopublications to Detect and Explain Contradictory Research Claims

Imran Asif ([ORCID:0000-0002-1144-6265](https://orcid.org/0000-0002-1144-6265))  
PhD Student, Computer Science

_Heriot-Watt University, Edinburgh, UK_

In [1]:
import ipywidgets as widgets
from SPARQLWrapper import SPARQLWrapper, JSON
import time
import seaborn as sns
# import pandas
import pandas as pd
# import matplotlib
import matplotlib.pyplot as plt

from IPython.display import display
from IPython.display import clear_output
from IPython.core.display import HTML
from IPython.display import Javascript
import urllib.parse
import statistics

import requests
from datetime import datetime
import json
from scipy import stats

### You need to give the SPARQL endpoint to run the script

In [2]:
SPARQL_endpoint = "http://localhost:3030/nanopubs_d_16_12_2020/sparql"
dbSparql = SPARQLWrapper(SPARQL_endpoint)
dbSparql.setReturnFormat(JSON)
def queryCoDa(query):
    dbSparql.setQuery(query)
    results = dbSparql.queryAndConvert()
    return results

# Define Methods to Get Assertions, Provenance and IVs

In [3]:
def getAllAssertion():
    #The followig query get all assertion information. Query result contains the concatenate of IVs with "," 
    query = """
    prefix np: <http://www.nanopub.org/nschema#>
    PREFIX cc: <https://data.cooperationdatabank.org/vocab/class/>
    PREFIX cp: <https://data.cooperationdatabank.org/vocab/prop/>
    PREFIX cr: <https://data.cooperationdatabank.org/id/>
    PREFIX n: <https://data.cooperationdatabank.org/coda/nanopub/>

    select ?np ?obs ?a ?effectValue ?dv ?iv ?var ?sampleSize ?stdErr where {
      graph ?h {
        ?np a np:Nanopublication ;
          np:hasAssertion ?a .
      }
      graph ?a {
                    ?obs cp:dependentVariable ?dv ;
                         cp:independentVariable ?iv ;
                         cp:eSEstimate ?effectValue ;
                         cp:effectSizeSampleSize ?sampleSize ;
                         cp:has-stderr ?stdErr ;
                         cp:effectSizeVariance ?var ;
                }
    }"""

    return queryCoDa(query)

###### END getAllAssertion #####
#######################################################################################
#Fetch IVs from the Assertion
def getIVs():
    query = """
        prefix np: <http://www.nanopub.org/nschema#>
        PREFIX cp: <https://data.cooperationdatabank.org/vocab/prop/>

        select DISTINCT ?iv where {
          graph ?h {
            ?np a np:Nanopublication ;
              np:hasAssertion ?a ;
          }
          graph ?a {
                        ?s cp:independentVariable ?iv .
                    }
        }"""

    return queryCoDa(query)

###### END IVs #####
#######################################################################################
#Get provence info by NP IRI
def getProvenanceByNP(np):
    query = """
            PREFIX np: <http://www.nanopub.org/nschema#>
            PREFIX cc: <https://data.cooperationdatabank.org/vocab/class/>
            PREFIX cp: <https://data.cooperationdatabank.org/vocab/prop/>
            PREFIX cr: <https://data.cooperationdatabank.org/id/>
            PREFIX n: <https://data.cooperationdatabank.org/coda/nanopub/>
            PREFIX prov: <http://www.w3.org/ns/prov#>
            PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
            PREFIX dct: <http://purl.org/dc/terms/>

            SELECT distinct ?study ?oneShot ?oneShotLabel (group_concat(distinct ?gSize;separator=',')  as ?groupSize) 
             ?matching ?matchingLabel ?gameIncentive ?gameIncentiveLabel ?Experimental ?ExperimentalLabel 
             (group_concat(distinct ?numOfChoices;separator=',')  as ?numberOfChoices) ?kIndex 
             (group_concat(distinct ?mpcr;separator=',')  as ?MPCR) ?maleProportion 
             (group_concat(distinct ?yodCollection;separator=',')  as ?yearOfDataCollection)
             (count(distinct ?creator) as ?authorCount)
             ?doi ?date ?overallStandardDeviation
             ?meanAge ?numberOfObservations ?overallMeanContributions ?overallMeanWithdrawal
             ?overallN ?overallPercentageEndowmentContributed ?overallProportionCooperation
             ?publicationStatus ?publicationStatusLabel 
             (group_concat(distinct ?replenishmentRate;separator=',') as ?replenishmentRates )
             ?studyDilemmaType ?studyDilemmaTypeLabel
             (group_concat(distinct ?studyPGDThreshold;separator=',') as ?studyPGDThresholds )
             ?studyOtherDilemmaType ?ageLowerInclusive ?ageHigherInclusive
            where {
              graph ?h {
                <"""+np+"""> a np:Nanopublication ;
                      np:hasProvenance ?prov .
              }
              graph ?prov {
                        ?a  prov:wasDerivedFrom ?study .
                        OPTIONAL {?a dct:creator ?creator . }
                        OPTIONAL {?a cp:doi ?doi .}
                        OPTIONAL {?a dct:date ?date . }
                        OPTIONAL {?a cp:studyOneShot ?oneShot .
                                  ?oneShot rdfs:label ?oneShotLabel . }
                        OPTIONAL {?a cp:studyMatchingProtocol ?matching .
                                  ?matching  rdfs:label ?matchingLabel . }
                        OPTIONAL {?a cp:studyGameIncentive ?gameIncentive . 
                                  ?gameIncentive  rdfs:label ?gameIncentiveLabel . }
                        OPTIONAL {?a cp:studyExperimentalSetting ?Experimental . 
                                  ?Experimental  rdfs:label ?ExperimentalLabel . }
                        OPTIONAL {?a cp:yearOfDataCollection ?yodCollection . }
                        OPTIONAL {?a cp:studyNumberOfChoices ?numOfChoices .}
                        OPTIONAL {?a cp:studyGroupSize ?gSize .}
                        OPTIONAL { ?a cp:studyKindex ?kIndex . }
                        OPTIONAL { ?a cp:studyMPCR ?mpcr . }
                        OPTIONAL { ?a cp:maleProportion ?maleProportion . }
                        
                        OPTIONAL { ?a cp:meanAge ?meanAge . }
                        OPTIONAL { ?a cp:numberOfObservations ?numberOfObservations . } 
                        OPTIONAL { ?a cp:overallMeanContributions ?overallMeanContributions . }
                        OPTIONAL { ?a cp:overallMeanWithdrawal ?overallMeanWithdrawal . }
                        OPTIONAL { ?a cp:overallN ?overallN . }
                        OPTIONAL { ?a cp:overallPercentageEndowmentContributed ?overallPercentageEndowmentContributed . }
                        OPTIONAL { ?a cp:overallProportionCooperation ?overallProportionCooperation . }
                        OPTIONAL { ?a cp:overallStandardDeviation ?overallStandardDeviation . }
                        
                        OPTIONAL { ?a cp:publicationStatus ?publicationStatus . ?publicationStatus  rdfs:label ?publicationStatusLabel . } 
                        OPTIONAL { ?a cp:replenishmentRate ?replenishmentRate . } 
                        OPTIONAL { ?a cp:studyDilemmaType ?studyDilemmaType . ?studyDilemmaType  rdfs:label ?studyDilemmaTypeLabel . } 
                        OPTIONAL { ?a cp:studyPGDThreshold ?studyPGDThreshold . } 
                        OPTIONAL { ?a cp:studyOtherDilemmaType ?studyOtherDilemmaType . } 
    					OPTIONAL { ?a cp:lowerInclusive ?ageLowerInclusive . }
    					OPTIONAL { ?a cp:higherInclusive ?ageHigherInclusive . }
              }
            }
            GROUP BY ?study ?oneShot ?oneShotLabel ?matching ?matchingLabel ?gameIncentive ?gameIncentiveLabel 
                     ?Experimental ?ExperimentalLabel ?kIndex ?maleProportion ?date ?doi ?overallStandardDeviation
                     ?meanAge ?numberOfObservations ?overallMeanContributions ?overallMeanWithdrawal
                     ?overallN ?overallPercentageEndowmentContributed ?overallProportionCooperation
                     ?publicationStatus ?publicationStatusLabel ?studyDilemmaType ?studyDilemmaTypeLabel
                     ?studyOtherDilemmaType ?ageLowerInclusive ?ageHigherInclusive"""
            
    resultProv = queryCoDa(query)
    return resultProv

# Pre-Processing 

In [4]:
resultAssertion = getAllAssertion()
resultIVs = getIVs()

In [5]:
def getAuthorFromDOIUsingAPI(doi):
    json = {}
    # Making a get request
    try:
        #https://api.openaccessbutton.org/find?id=10.1016/j.socec.2016.04.002
        doi_id = doi.split('.org/')[1]
        #r = requests.get('https://api.datacite.org/dois/'+doi_id).json()
        #author = r['data']['attributes']['creators'][0]['familyName']

        r = requests.get('https://api.openaccessbutton.org/metadata?id='+doi_id).json()
        json = r
    except:
        json = {}
        #print(doi)
            
    return json

In [6]:
def load_first_author_list():
    dict_first_author = {}
    doi_json_list = []
    family = ''
    doi = ''
    with open('doi_meta_data.json') as json_file:
        if json_file.read(2) != '':
            json_file.seek(0)  # it may be redundant but it does not hurt
            doi_json_list = json.load(json_file)

    if len(doi_json_list) > 0:
        for json_content in doi_json_list:
            try:
                doi = json_content['doi']
                if 'family' in json_content['author'][0].keys():
                    family = json_content['author'][0]['family']
                else:
                    family = json_content['author'][0]['name'].split(' ')[1]

                dict_first_author[doi] = family
            except Exception as e:
                dict_first_author[doi] = family
        
    return [dict_first_author, doi_json_list]

dict_first_author = load_first_author_list()[0]
doi_json_list = load_first_author_list()[1]

In [7]:
dicIVs = {}
for result in resultIVs["results"]["bindings"]:
    splitIV = result["iv"]["value"].split("/")
    dicIVs[splitIV[len(splitIV)-1]] = {'Batch1': 0, 'Batch2': 0, 'Batch3': 0, 'Batch4': 0, 'Batch5': 0, 'Batch6': 0, 'Batch7': 0,
                                         'ObjBatch1': [], 'ObjBatch2': [], 'ObjBatch3': [], 'ObjBatch4': [], 'ObjBatch5': [],
                                         'ObjBatch6': [], 'ObjBatch7': []}

############################################################

for result in resultAssertion["results"]["bindings"]:
    
    #First Get the provence info and put into the npObj
    # Call the meethod getProvenanceByNp
    resProv = getProvenanceByNP(result["np"]["value"])
    
    #Define/assign Provenance Variables
    oneShot = ''
    groupSize = ''
    matching = ''
    gameIncentive = ''
    Experimental = ''
    numberOfChoices = ''
    kIndex = ''
    MPCR = ''
    maleProportion = ''
    yearOfDataCollection = ''
    doi = ''
    pubYear = ''
    authorCount = ''
    
    #Additional Characteristics
    meanAge = ''
    numberOfObservations = ''
    overallMeanContributions = ''
    overallMeanWithdrawal = ''
    overallN = ''
    overallPercentageEndowmentContributed = ''
    overallProportionCooperation = ''
    overallStandardDeviation = ''
    publicationStatus = ''
    replenishmentRate = ''
    studyDilemmaType = ''
    studyPGDThreshold = ''
    studyOtherDilemmaType = ''
    ageLowerInclusive = ''
    ageHigherInclusive = ''
    
    #-----------------------------------------------------------------------------
    
    if('oneShot' in resProv['results']['bindings'][0].keys()):
        oneShot = resProv['results']['bindings'][0]['oneShotLabel']['value']
    
    if('groupSize' in resProv['results']['bindings'][0].keys()):
        groupSize = resProv['results']['bindings'][0]['groupSize']['value']
    
    if('matching' in resProv['results']['bindings'][0].keys()):
        matching = resProv['results']['bindings'][0]['matchingLabel']['value']
        
    if('gameIncentive' in resProv['results']['bindings'][0].keys()):
        gameIncentive = resProv['results']['bindings'][0]['gameIncentiveLabel']['value']
        
    if('Experimental' in resProv['results']['bindings'][0].keys()):
        Experimental = resProv['results']['bindings'][0]['ExperimentalLabel']['value']
    
    if('numberOfChoices' in resProv['results']['bindings'][0].keys()):
        numberOfChoices = resProv['results']['bindings'][0]['numberOfChoices']['value']
    
    if('kIndex' in resProv['results']['bindings'][0].keys()):
        kIndex = resProv['results']['bindings'][0]['kIndex']['value']
    
    if('MPCR' in resProv['results']['bindings'][0].keys()):
        MPCR = resProv['results']['bindings'][0]['MPCR']['value']
    
    if('maleProportion' in resProv['results']['bindings'][0].keys()):
        maleProportion = resProv['results']['bindings'][0]['maleProportion']['value']
        
    if('yearOfDataCollection' in resProv['results']['bindings'][0].keys()):
        yearOfDataCollection = resProv['results']['bindings'][0]['yearOfDataCollection']['value']
        
    #----------------------------------------------------------------------------------------    
    if('meanAge' in resProv['results']['bindings'][0].keys()):
        meanAge = resProv['results']['bindings'][0]['meanAge']['value']

    if('numberOfObservations' in resProv['results']['bindings'][0].keys()):
        numberOfObservations = resProv['results']['bindings'][0]['numberOfObservations']['value']

    if('overallMeanContributions' in resProv['results']['bindings'][0].keys()):
        overallMeanContributions = resProv['results']['bindings'][0]['overallMeanContributions']['value']
        
    if('overallMeanWithdrawal' in resProv['results']['bindings'][0].keys()):
        overallMeanWithdrawal = resProv['results']['bindings'][0]['overallMeanWithdrawal']['value']
        
    if('overallN' in resProv['results']['bindings'][0].keys()):
        overallN = resProv['results']['bindings'][0]['overallN']['value']
        
    if('overallPercentageEndowmentContributed' in resProv['results']['bindings'][0].keys()):
        overallPercentageEndowmentContributed = resProv['results']['bindings'][0]['overallPercentageEndowmentContributed']['value']
        
    if('overallProportionCooperation' in resProv['results']['bindings'][0].keys()):
        overallProportionCooperation = resProv['results']['bindings'][0]['overallProportionCooperation']['value']
        
    if('overallStandardDeviation' in resProv['results']['bindings'][0].keys()):
        overallStandardDeviation = resProv['results']['bindings'][0]['overallStandardDeviation']['value']
        
    if('publicationStatus' in resProv['results']['bindings'][0].keys()):
        publicationStatus = resProv['results']['bindings'][0]['publicationStatusLabel']['value']
        
    if('replenishmentRates' in resProv['results']['bindings'][0].keys()):
        replenishmentRate = resProv['results']['bindings'][0]['replenishmentRates']['value']
        
    if('studyDilemmaType' in resProv['results']['bindings'][0].keys()):
        studyDilemmaType = resProv['results']['bindings'][0]['studyDilemmaTypeLabel']['value']
        
    if('studyPGDThresholds' in resProv['results']['bindings'][0].keys()):
        studyPGDThreshold = resProv['results']['bindings'][0]['studyPGDThresholds']['value']
        
    if('studyOtherDilemmaType' in resProv['results']['bindings'][0].keys()):
        studyOtherDilemmaType = resProv['results']['bindings'][0]['studyOtherDilemmaType']['value']
        
    if('ageLowerInclusive' in resProv['results']['bindings'][0].keys()):
        ageLowerInclusive = resProv['results']['bindings'][0]['ageLowerInclusive']['value']
        
    if('ageHigherInclusive' in resProv['results']['bindings'][0].keys()):
        ageHigherInclusive = resProv['results']['bindings'][0]['ageHigherInclusive']['value']

    #----------------------------------------------------------------------------------------------
    
    try:
        if('date' in resProv['results']['bindings'][0].keys()):
            strDate = resProv['results']['bindings'][0]['date']['value'].split(":")[0]
            date = datetime.strptime(strDate, '%Y-%m-%d')
            pubYear = date.year
    except:
        d = resProv['results']['bindings'][0]['date']['value']
        #print(d)
    
    if('doi' in resProv['results']['bindings'][0].keys()):
        authorCount = int(resProv['results']['bindings'][0]['authorCount']['value'])
        doi = resProv['results']['bindings'][0]['doi']['value']
                
    npObj = {
            'np': result["np"]["value"],
            'obs': result["obs"]["value"],
            'authorCount': authorCount,
            'publicationYear': pubYear,
            'doi': doi,
            'effectValue': result["effectValue"]["value"],
            'dv': result["dv"]["value"],
            'iv': result["iv"]["value"],
            'var': result["var"]["value"],
            'sampleSize': result["sampleSize"]["value"],
            'stdErr': result["stdErr"]["value"],
            'study': resProv['results']['bindings'][0]['study']['value'],
            'oneShot': oneShot,
            'groupSize': groupSize,
            'matching': matching,
            'gameIncentive': gameIncentive,
            'Experimental': Experimental,
            'numberOfChoices': numberOfChoices,
            'kIndex': kIndex,
            'MPCR': MPCR,
            'maleProportion': maleProportion,
            'yearOfDataCollection': yearOfDataCollection,
            'meanAge': meanAge,
            'numberOfObservations': numberOfObservations,
            'overallMeanContributions': overallMeanContributions,
            'overallMeanWithdrawal': overallMeanWithdrawal,
            'overallN': overallN,
            'overallPercentageEndowmentContributed': overallPercentageEndowmentContributed,
            'overallProportionCooperation': overallProportionCooperation,
            'overallStandardDeviation': overallStandardDeviation,
            'publicationStatus': publicationStatus,
            'replenishmentRate': replenishmentRate,
            'studyDilemmaType': studyDilemmaType,
            'studyPGDThreshold': studyPGDThreshold,
            'studyOtherDilemmaType': studyOtherDilemmaType,
            'ageLowerInclusive': ageLowerInclusive,
            'ageHigherInclusive': ageHigherInclusive
    }

    splitDOI = doi.split('doi.org/')
    if 'http' in splitDOI[1]:
        splitDOI = splitDOI[1].split('://')
    
    if len(splitDOI) > 2:
        splitDOI = splitDOI[2]
    
    if splitDOI[1] not in dict_first_author.keys() and '&lt' not in doi and doi != 'http://dx.doi.org/NA':
        #print(doi)
        json_data = getAuthorFromDOIUsingAPI(doi)
        dict_first_author[doi] = ''
        if len(json_data) > 0:
            doi_json_list.append(json_data)
    

    splitIVs = result["iv"]["value"].split("/")
    lstIVBatches = dicIVs.get(splitIVs[len(splitIVs)-1])
    
    # Rounded to three decimal places.
    if round(float(result["effectValue"]["value"]), 3) <= -0.500:
        lstIVBatches['Batch1'] = lstIVBatches['Batch1'] + 1
        lstIVBatches['ObjBatch1'].append(npObj)  
    elif round(float(result["effectValue"]["value"]), 3) >= -0.499 and round(float(result["effectValue"]["value"]), 3) <= -0.300:
        lstIVBatches['Batch2'] = lstIVBatches['Batch2'] + 1
        lstIVBatches['ObjBatch2'].append(npObj)  
    elif round(float(result["effectValue"]["value"]), 3) >= -0.299 and round(float(result["effectValue"]["value"]), 3) <= -0.051:
        lstIVBatches['Batch3'] = lstIVBatches['Batch3'] + 1
        lstIVBatches['ObjBatch3'].append(npObj)  
    elif round(float(result["effectValue"]["value"]), 3) >= -0.050 and round(float(result["effectValue"]["value"]), 3) <= 0.050:
        lstIVBatches['Batch4'] = lstIVBatches['Batch4'] + 1
        lstIVBatches['ObjBatch4'].append(npObj)  
    elif round(float(result["effectValue"]["value"]), 3) >= 0.051 and round(float(result["effectValue"]["value"]), 3) <= 0.299:
        lstIVBatches['Batch5'] = lstIVBatches['Batch5'] + 1
        lstIVBatches['ObjBatch5'].append(npObj)  
    elif round(float(result["effectValue"]["value"]), 3) >= 0.300 and round(float(result["effectValue"]["value"]), 3) <= 0.499:
        lstIVBatches['Batch6'] = lstIVBatches['Batch6'] + 1
        lstIVBatches['ObjBatch6'].append(npObj)  
    elif round(float(result["effectValue"]["value"]), 3) >= 0.500:
        lstIVBatches['Batch7'] = lstIVBatches['Batch7'] + 1
        lstIVBatches['ObjBatch7'].append(npObj)  

In [8]:
with open('doi_meta_data.json', 'w') as json_file:
    json.dump(doi_json_list, json_file, indent = 4)

In [9]:
dict_first_author = load_first_author_list()[0]
#print(len(dict_first_author))

In [10]:
#########################################################################
# Convert dictionary to dataframe for further processing
dfDic = pd.DataFrame.from_dict(dicIVs, orient='index',
                       columns=['Batch1', 'Batch2', 'Batch3', 'Batch4', 'Batch5', 'Batch6', 'Batch7', 
                                'ObjBatch1', 'ObjBatch2', 'ObjBatch3', 'ObjBatch4', 'ObjBatch5',
                                         'ObjBatch6', 'ObjBatch7'])
#----------------------------------------------------------------------
# Sum the batches 
dfDic['sum'] = dfDic.apply(lambda row: row.Batch1 + row.Batch2 + row.Batch3 + row.Batch4 + row.Batch5 + row.Batch6 + row.Batch7, axis=1)
#--------------------
# Sort the data frame with respect to sum
dfDic.sort_values(by=['sum'], inplace=True, ascending=False)
df20 = dfDic.head(20)

# Now Implement the GUI

In [11]:
out = widgets.Output()

In [12]:
def getTop20_OfficialIVs():
    lstDropdown = df20.index.tolist()
    #lstDropdown.insert(0, '-- Select All --')
    lstDropdown.append('primingConcept')
    lstDropdown.append('timePressure')
    lstDropdown.append('gameIncentive')
    lstDropdown.append('uncertaintyLevel')
    lstDropdown.append('anonymityManipul')
    return lstDropdown

In [13]:
%%HTML
<style>
.modal-content 
{
    min-width: 900px
}
</style>

javascript = """
<script type="text/Javascript">
    function showInfo(msg){
        var iframe = "<iframe width=\'854\' height=\'480\' src=\'np_d_html/"+msg+".html\' frameborder=\'0\' allowfullscreen></iframe>"
        require(
            ["base/js/dialog"], 
            function(dialog) {
              console.log(dialog)
                dialog.modal({
                    title: 'Nanopub Information',
                    body: $.parseHTML(iframe),
                    width: 'auto',
                    buttons: {
                        'Cancel': {}
                    }
                });
            }
        );
    }
</script>"""

HTML(javascript)

In [14]:
def drawCatplot():
    dicCatplot = {'IV': [], 'Batches':[], 'effectValues':[], 'stdErr': []}
    #if dropdown.value == "-- Select All --":
    display(HTML("<h2> catplot for all IVs</h2>"), out)
    for key in getTop20_OfficialIVs():
        #if key != '-- Select All --':
        for x in dicIVs[key].items():
            if x[0]=='ObjBatch1':
                for objNP in x[1]:
                    dicCatplot['IV'].append(key)
                    dicCatplot['Batches'].append('Large Negative Correlation')
                    dicCatplot['effectValues'].append(float(objNP['effectValue']))
                    dicCatplot['stdErr'].append(float(objNP['stdErr']))
            elif x[0]=='ObjBatch2':
                for objNP in x[1]:
                    dicCatplot['IV'].append(key)
                    dicCatplot['Batches'].append('Medium Negative Correlation')
                    dicCatplot['effectValues'].append(float(objNP['effectValue']))
                    dicCatplot['stdErr'].append(float(objNP['stdErr']))
            elif x[0]=='ObjBatch3':
                for objNP in x[1]:
                    dicCatplot['IV'].append(key)
                    dicCatplot['Batches'].append('Small Negative Correlation')
                    dicCatplot['effectValues'].append(float(objNP['effectValue']))
                    dicCatplot['stdErr'].append(float(objNP['stdErr']))
            elif x[0]=='ObjBatch4':
                for objNP in x[1]:
                    dicCatplot['IV'].append(key)
                    dicCatplot['Batches'].append('No Correlation')
                    dicCatplot['effectValues'].append(float(objNP['effectValue']))
                    dicCatplot['stdErr'].append(float(objNP['stdErr']))
            elif x[0]=='ObjBatch5':
                for objNP in x[1]:
                    dicCatplot['IV'].append(key)
                    dicCatplot['Batches'].append('Small Positive Correlation')
                    dicCatplot['effectValues'].append(float(objNP['effectValue']))
                    dicCatplot['stdErr'].append(float(objNP['stdErr']))
            elif x[0]=='ObjBatch6':
                for objNP in x[1]:
                    dicCatplot['IV'].append(key)
                    dicCatplot['Batches'].append('Medium Positive Correlation')
                    dicCatplot['effectValues'].append(float(objNP['effectValue']))
                    dicCatplot['stdErr'].append(float(objNP['stdErr']))
            elif x[0]=='ObjBatch7':
                for objNP in x[1]:
                    dicCatplot['IV'].append(key)
                    dicCatplot['Batches'].append('Large Positive Correlation')
                    dicCatplot['effectValues'].append(float(objNP['effectValue']))
                    dicCatplot['stdErr'].append(float(objNP['stdErr']))
#     else:
#         display(HTML("<h2> catplot about "+ dropdown.value +"</h2>"))
#         key = dropdown.value
#         for x in dicIVs[key].items():
#             if x[0]=='ObjBatch1':
#                 for objNP in x[1]:
#                     dicCatplot['IV'].append(key)
#                     dicCatplot['Batches'].append('Large Negative Correlation')
#                     dicCatplot['effectValues'].append(float(objNP['effectValue']))
#                     dicCatplot['stdErr'].append(float(objNP['stdErr']))
#             elif x[0]=='ObjBatch2':
#                 for objNP in x[1]:
#                     dicCatplot['IV'].append(key)
#                     dicCatplot['Batches'].append('Medium Negative Correlation')
#                     dicCatplot['effectValues'].append(float(objNP['effectValue']))
#                     dicCatplot['stdErr'].append(float(objNP['stdErr']))
#             elif x[0]=='ObjBatch3':
#                 for objNP in x[1]:
#                     dicCatplot['IV'].append(key)
#                     dicCatplot['Batches'].append('Small Negative Correlation')
#                     dicCatplot['effectValues'].append(float(objNP['effectValue']))
#                     dicCatplot['stdErr'].append(float(objNP['stdErr']))
#             elif x[0]=='ObjBatch4':
#                 for objNP in x[1]:
#                     dicCatplot['IV'].append(key)
#                     dicCatplot['Batches'].append('No Correlation')
#                     dicCatplot['effectValues'].append(float(objNP['effectValue']))
#                     dicCatplot['stdErr'].append(float(objNP['stdErr']))
#             elif x[0]=='ObjBatch5':
#                 for objNP in x[1]:
#                     dicCatplot['IV'].append(key)
#                     dicCatplot['Batches'].append('Small Positive Correlation')
#                     dicCatplot['effectValues'].append(float(objNP['effectValue']))
#                     dicCatplot['stdErr'].append(float(objNP['stdErr']))
#             elif x[0]=='ObjBatch6':
#                 for objNP in x[1]:
#                     dicCatplot['IV'].append(key)
#                     dicCatplot['Batches'].append('Medium Positive Correlation')
#                     dicCatplot['effectValues'].append(float(objNP['effectValue']))
#                     dicCatplot['stdErr'].append(float(objNP['stdErr']))
#             elif x[0]=='ObjBatch7':
#                 for objNP in x[1]:
#                     dicCatplot['IV'].append(key)
#                     dicCatplot['Batches'].append('Large Positive Correlation')
#                     dicCatplot['effectValues'].append(float(objNP['effectValue']))
#                     dicCatplot['stdErr'].append(float(objNP['stdErr']))
                    
    dfCatplot = pd.DataFrame.from_dict(dicCatplot)
    # Catplot
    #----------------------------------------------------------------------------
#     sns.set_context("poster", font_scale = 0.85, rc={"grid.linewidth": 5})
#     dfCatplot = pd.DataFrame.from_dict(dicCatplot)
#     sns.set_context("poster", font_scale = 0.7, rc={"grid.linewidth": 5})
#     g = sns.catplot(x="effectValues", y="IV", hue="Batches", data=dfCatplot, ci='sd', palette="Set1", height=9, aspect=1.5)
#     g.ax.legend(loc=2)
#     g.set_axis_labels('effectValues (<=-1 to >=1)', 'Independent Variables')
#     plt.show()
    #----------------------------------------------------------------------------
    
    #Scatter Plot
    #---------------------------------------------------------------------------
    sns.set_context("poster", font_scale = 0.85, rc={"grid.linewidth": 5})
    fig = plt.gcf()
    # Changing Seaborn Plot size
    fig.set_size_inches(20, 10)
    
    g = sns.scatterplot(x="effectValues", y="IV", hue="Batches", data=dfCatplot, ci='sd', palette="Set1")
    
    plt.xlabel('effectValues (<=-1 to >=1)')
    plt.ylabel('Independent Variables')
    #-------------------------------------------------------------------------
    
    #Show plot
    #-----------------------------------------------------
    #fig.savefig('measureD.png', bbox_inches='tight')
    plt.show()
                    
def formatContent(npObj, IV, chk, dicSelectiveIV):
    splitNP = npObj['np'].split('/')
    splitObs = npObj['obs'].split('/')
    splitStudy = npObj['study'].split('/')
    oneShot = npObj['oneShot']
    matching = npObj['matching']
    gameIncentive = npObj['gameIncentive']
    Experimental = npObj['Experimental']
    yearOfDataCollection = npObj['yearOfDataCollection'].split(',')[0]
    authorCount = int(npObj['authorCount'])
    publicationYear = npObj['publicationYear']
    
    
    
    # Following are the integer values
    if npObj['numberOfChoices'] != '':
        numberOfChoices = int(statistics.median([int(e) for e in npObj['numberOfChoices'].split(',')]))
    else:
        numberOfChoices = ''
    
    if npObj['groupSize'] != '':
        groupSize = int(statistics.median([int(e) for e in npObj['groupSize'].split(',')]))
    else:
        groupSize = ''
    #===========================================================
    #Following are the float values
    if npObj['kIndex'] != '':
        kIndex = float(statistics.median([float(e) for e in npObj['kIndex'].split(',')]))
    else:
        kIndex = ''
        
    if npObj['MPCR'] != '':
        MPCR = float(statistics.median([float(e) for e in npObj['MPCR'].split(',')]))
    else:
        MPCR = ''
    
    if npObj['maleProportion'] != '':
        maleProportion = float(statistics.median([float(e) for e in npObj['maleProportion'].split(',')]))
    else:
        maleProportion = ''
        
    splitDOI = npObj['doi'].split('doi.org/')
    if splitDOI[1] in dict_first_author.keys():
        author = dict_first_author[splitDOI[1]]
        if authorCount > 1:
             author = author + " et al.("+ str(publicationYear) +")"
        else:
            author = author + " ("+ str(publicationYear) +")"
    else:
        author = 'No Author'
        
    #Additional Provenance Characteristics
    if npObj['meanAge'] != '':
        meanAge = float(npObj['meanAge'])
    else:
        meanAge = ''
    #--------------------
    if npObj['numberOfObservations'] != '':
        numberOfObservations = int(npObj['numberOfObservations'])
    else:
        numberOfObservations = ''
    #--------------------
    if npObj['overallMeanContributions'] != '':
        overallMeanContributions = float(npObj['overallMeanContributions'])
    else:
        overallMeanContributions = ''
    #-------------------
    if npObj['overallMeanWithdrawal'] != '':
        overallMeanWithdrawal = float(npObj['overallMeanWithdrawal'])
    else:
        overallMeanWithdrawal = ''
    #-----------------
    if npObj['overallN'] != '':
        overallN = int(npObj['overallN'])
    else:
        overallN = ''
    #------------------
    if npObj['overallPercentageEndowmentContributed'] != '':
        overallPercentageEndowmentContributed = float(npObj['overallPercentageEndowmentContributed'])
    else:
        overallPercentageEndowmentContributed = ''
    #----------------
    if npObj['overallProportionCooperation'] != '':
        overallProportionCooperation = float(npObj['overallProportionCooperation'])
    else:
        overallProportionCooperation = ''
    #-----------------
    if npObj['overallStandardDeviation'] != '':
        overallStandardDeviation = float(npObj['overallStandardDeviation'])
    else:
        overallStandardDeviation = ''
    #-----------------
    publicationStatus = npObj['publicationStatus']
    
    if npObj['replenishmentRate'] != '':
        replenishmentRate = float(statistics.median([float(e) for e in npObj['replenishmentRate'].split(',')]))
    else:
        replenishmentRate = ''
    #--------------
    
    studyDilemmaType =  npObj['studyDilemmaType']
    
    if npObj['studyPGDThreshold'] != '':
        studyPGDThreshold = float(statistics.median([float(e) for e in npObj['studyPGDThreshold'].split(',')]))
    else:
        studyPGDThreshold = ''
    #--------------
    studyOtherDilemmaType = npObj['studyOtherDilemmaType']
    
    if npObj['ageLowerInclusive'] != '':
        ageLowerInclusive = float(npObj['ageLowerInclusive'])
    else:
        ageLowerInclusive = ''
    #--------------------------
    if npObj['ageHigherInclusive'] != '':
        ageHigherInclusive = float(npObj['ageHigherInclusive'])
    else:
        ageHigherInclusive = ''
    #--------------------------
            
    msg = urllib.parse.quote(splitNP[len(splitNP)-1])
    dicSelectiveIV['nano'].append('<span style="cursor:pointer" onclick="showInfo(\''+str(msg)+'\');">'+author+'</span>')
    dicSelectiveIV['iv'].append(IV)
    dicSelectiveIV['obs'].append(splitObs[len(splitObs)-1])
    dicSelectiveIV['Batches'].append(chk)
    dicSelectiveIV['effectValues'].append(float(npObj['effectValue']))
    dicSelectiveIV['stdErr'].append(float(npObj['stdErr']))
    dicSelectiveIV['study'].append(splitStudy[len(splitStudy)-1])
    dicSelectiveIV['oneShot'].append(oneShot)
    dicSelectiveIV['groupSize'].append(str(groupSize))
    #dicSelectiveIV['groupSizeFull'].append(npObj['groupSize'])
    dicSelectiveIV['matching'].append(matching)
    dicSelectiveIV['gameIncentive'].append(gameIncentive)
    dicSelectiveIV['Experimental'].append(Experimental)
    dicSelectiveIV['numberOfChoices'].append(str(numberOfChoices))
    #dicSelectiveIV['numberOfChoicesFull'].append(npObj['numberOfChoices'])
    
    dicSelectiveIV['kIndex'].append(str(kIndex))
    #dicSelectiveIV['kIndexFull'].append(npObj['kIndex'])
    dicSelectiveIV['MPCR'].append(str(MPCR))
    #dicSelectiveIV['MPCR'].append(npObj['MPCR'])
    dicSelectiveIV['maleProportion'].append(str(maleProportion))
    #dicSelectiveIV['maleProportion'].append(npObj['maleProportion'])
    dicSelectiveIV['yearOfDataCollection'].append(yearOfDataCollection)
    
    dicSelectiveIV['meanAge'].append(meanAge)
    dicSelectiveIV['numberOfObservations'].append(numberOfObservations)
    dicSelectiveIV['overallMeanContributions'].append(overallMeanContributions)
    dicSelectiveIV['overallMeanWithdrawal'].append(overallMeanWithdrawal)
    dicSelectiveIV['overallN'].append(overallN)
    dicSelectiveIV['overallPercentageEndowmentContributed'].append(overallPercentageEndowmentContributed)
    dicSelectiveIV['overallProportionCooperation'].append(overallProportionCooperation)
    dicSelectiveIV['overallStandardDeviation'].append(overallStandardDeviation)
    dicSelectiveIV['publicationStatus'].append(publicationStatus)
    dicSelectiveIV['replenishmentRate'].append(replenishmentRate)
    dicSelectiveIV['studyDilemmaType'].append(studyDilemmaType)
    dicSelectiveIV['studyPGDThreshold'].append(studyPGDThreshold)
    dicSelectiveIV['studyOtherDilemmaType'].append(studyOtherDilemmaType)
    dicSelectiveIV['ageLowerInclusive'].append(ageLowerInclusive)
    dicSelectiveIV['ageHigherInclusive'].append(ageHigherInclusive)

#Contradiction Summary Dictionary
dictEvaluation = {}

def findContrastNano(dicSelectiveIV, df, contrast, IV):
    nano = dicSelectiveIV['nano']
    lstIndices = []
    lstIndicesCategory = []
    lstIndicesNumeric = []
    
    for cntrst in contrast:
        if cntrst == 'category':
            categoryCharacteristicsList = ['oneShot', 'matching', 'gameIncentive', 'Experimental', 'publicationStatus', 
                                           'studyDilemmaType', 'studyOtherDilemmaType']
            
            if ddlCategory.value == "Both" or ddlCategory.value == "Inter Batch":
                #This for all batches
                for categoryElement in categoryCharacteristicsList:
                    categoryElementValues = dicSelectiveIV[categoryElement]
                    if len(categoryElementValues) > 1:
                        my_dict = {i:categoryElementValues.count(i) for i in categoryElementValues}
                        if len(my_dict) > 0:
                            minval = min(my_dict.values())
                            if minval == 1:
                                minList = [k for k, v in my_dict.items() if v==minval]
                                for ele in minList:
                                    index = categoryElementValues.index(ele)
                                    lstIndicesCategory.append(str(index) +','+ str(df.index.tolist().index(categoryElement)))
                                
            #print('Global Index List')
            #print(lstIndicesCategory)
            
            if ddlCategory.value == "Both" or ddlCategory.value == "Intra Batch":
                #Now find contradictory within batches
                batches = ['Large Negative Correlation', 'Large Positive Correlation', 'Medium Negative Correlation',
                          'Medium Positive Correlation', 'Small Negative Correlation', 'Small Positive Correlation',
                          'No Correlation']
                
                for batchName in batches:
                    for categoryElement in categoryCharacteristicsList:
                        dict_values_index = { 'values':[], 'df_index':[]}

                        columns = list(df)
                        index = 0
                        for col in columns: 
                            # printing the third element of the column 
                            if df[col]['Batches'] == batchName:
                                dict_values_index['values'].append(df[col][categoryElement])
                                dict_values_index['df_index'].append(index)

                            index += 1

                        categoryElementValues = dict_values_index['values']

                        if len(categoryElementValues) > 1:
                            my_dict = {i:categoryElementValues.count(i) for i in categoryElementValues}
                            if len(my_dict) > 0:
                                minval = min(my_dict.values())
                                if minval == 1:
                                    minList = [k for k, v in my_dict.items() if v==minval]
                                    for ele in minList:
                                        index = dict_values_index['values'].index(ele)
                                        index = dict_values_index['df_index'][index]
                                        if str(index) +','+ str(df.index.tolist().index(categoryElement)) not in lstIndicesCategory:
                                            lstIndicesCategory.append(str(index) +','+ str(df.index.tolist().index(categoryElement)))

                    
                
        #For numeric values calculate the z-score
        #For now only, get z-score within clusters
        if cntrst == 'numeric':
            numericCharacteristicsList = ['kIndex', 'MPCR', 'maleProportion', 'meanAge', 'overallMeanContributions', 
                                          'overallMeanWithdrawal', 'overallN', 'overallPercentageEndowmentContributed', 
                                          'overallProportionCooperation', 'overallStandardDeviation', 'replenishmentRate', 
                                          'studyPGDThreshold', 'ageLowerInclusive', 'ageHigherInclusive', 'numberOfObservations',
                                          'yearOfDataCollection', 'numberOfChoices', 'groupSize']
            #numericCharacteristicsList = ['maleProportion']
            
            # For All batches
            if ddlNumeric.value == "Both" or ddlNumeric.value == "Inter Batch":
                for numericElement in numericCharacteristicsList:
                    numericElementValues = dicSelectiveIV[numericElement]
                    dict_values_index = { 'values':[], 'df_index':[]}

                    #Ignore the empty values in these characteristics
                    #numericalValues = [float(item) for item in numericElementValues if item != ""]
                    for index in range(len(numericElementValues)):
                        if numericElementValues[index] != "":
                            dict_values_index['values'].append(float(numericElementValues[index]))
                            dict_values_index['df_index'].append(index)

                    #print(dict_values_index)
                    try:
                        z_score = stats.zscore(dict_values_index['values'])
                    except:
                        x = 0
                        
                    #print(numericElement + '\n' + '-----------------')
                    #print(z_score)
                    for index in range(len(z_score)):
                        if abs(round(z_score[index])) >= 3:
                            df_index = dict_values_index['df_index'][index]
                            lstIndicesNumeric.append(str(df_index) +','+ str(df.index.tolist().index(numericElement)))
                
            
            if ddlNumeric.value == "Both" or ddlNumeric.value == "Intra Batch":
                #Now find contradictory within batches
                batches = ['Large Negative Correlation', 'Large Positive Correlation', 'Medium Negative Correlation',
                          'Medium Positive Correlation', 'Small Negative Correlation', 'Small Positive Correlation',
                          'No Correlation']
                
                for batchName in batches:
                    for numericElement in numericCharacteristicsList:
                        dict_values_index = { 'values':[], 'df_index':[]}

                        columns = list(df)
                        index = 0
                        for col in columns: 
                            # printing the third element of the column 
                            if df[col]['Batches'] == batchName:
                                if df[col][numericElement] != "":
                                    dict_values_index['values'].append(float(df[col][numericElement]))
                                    dict_values_index['df_index'].append(index)

                            index += 1

                        #print(dict_values_index)
                        try:
                            z_score = stats.zscore(dict_values_index['values'])
                        except:
                            x = 0
                        #print(numericElement + '\n' + '-----------------')
                        #print(z_score)
                        for index in range(len(z_score)):
                            if abs(round(z_score[index])) >= 3:
                                df_index = dict_values_index['df_index'][index]
                                lstIndicesNumeric.append(str(df_index) +','+ str(df.index.tolist().index(numericElement)))
                    

                
    totalNanoCategory = []
    totalNanoNumeric = []
    
    for catIndex in lstIndicesCategory:
        splitCatIndices = catIndex.split(',')
        if splitCatIndices not in totalNanoCategory:
            totalNanoCategory.append(splitCatIndices)
    
    for numIndex in lstIndicesNumeric:
        splitNumIndices = numIndex.split(',')
        if splitNumIndices not in totalNanoNumeric:
            totalNanoNumeric.append(splitNumIndices)
            
    lstIndices = lstIndicesCategory + lstIndicesNumeric
    print('Summary\n-------------')
    print('Total Nanopubs: %s' % len(nano))
    print('Total Nanopubs highlighted by Category: %s' % len(totalNanoCategory))
    print('Total Nanopubs highlighted by Numeric: %s' % len(totalNanoNumeric))
    
    dictEvaluation[IV] = {'Categorical': len(totalNanoCategory), 'Numerical': len(totalNanoNumeric)}
    
    #print(lstIndices)
    return lstIndices

def color(x,row_idx,col_idx):
    #print(x)
    #for index, row in x.iterrows():
    #    print(row[4][4])
    #print(row_idx)
    #print(col_idx)
    #effectValues
    #Old Formulla = str(((float(shade[colIndex])/2)*10))
    #New hue formulla
    #hue = effectValue / ((stdErr/2)*10)
    
    shade = []
    for col in x.columns:
        stdErr = x[col].iloc[x.index.tolist().index('stdErr')]
        effectVal = x[col].iloc[x.index.tolist().index('effectValues')]
        if float(effectVal) < 0:
            effectVal = effectVal * -1
            
        hue =  1 - (float(stdErr)/2)*10    #(float(effectVal) / ((float(stdErr)/2)*10))/10
        shade.append(str(hue))
    
    df_styler = pd.DataFrame('', index=x.index, columns=x.columns)
    
    for colIndex in range(len(x.columns)):
        df_styler.iloc[3, colIndex] = 'background-color: rgba(255,255,0,'+shade[colIndex]+');' #red (255,0,0)
    
    colIndexList = []
    rowIndexList = []
    for idx in range(len(col_idx)):
        if col_idx[idx] not in colIndexList:
            colIndexList.append(col_idx[idx])
            
        if row_idx[idx] not in rowIndexList:
            rowIndexList.append(row_idx[idx])
    
    for iCol in colIndexList:
        for iRow in range(len(x.index)):
            if iRow == 3:
                df_styler.iloc[iRow, iCol] = 'background-color: rgba(255,255,0,'+shade[colIndex]+');border-left:3px solid black;border-right:3px solid black;'
            else:
                df_styler.iloc[iRow, iCol] = 'border-left:3px solid black;border-right:3px solid black;'
            
    for iRow in rowIndexList:
        for iCol in range(len(x.columns)):
            df_styler.iloc[iRow, iCol] = 'border-top:3px solid black;border-bottom:3px solid black;'
    
    color = ''
    for idx in range(len(col_idx)):
        color = 'background-color: rgba(255,0,0,'+shade[colIndex]+');border-left:3px solid black;border-right:3px solid black;border-top:3px solid black;border-bottom:3px solid black'
        df_styler.iloc[row_idx[idx], col_idx[idx]] = color
    
        
    for idx in range(len(col_idx)):
        value = x.iloc[row_idx[idx], col_idx[idx]]
        #msg = urllib.parse.quote(x.columns[col_idx[idx]].split('(')[1].split(')')[0])
        #print(x.columns[col_idx[idx]])
        msg = x.columns[col_idx[idx]].split('(')[1].split(')')[0]
        #print(msg)
        x.iloc[row_idx[idx], col_idx[idx]] = '<span style="cursor:pointer" onclick="showInfo('+str(msg)+');">'+str(value)+'</span>'
        
    #print(df_styler)
    return df_styler

def on_button_clicked(b):
    button.description = 'Please wait!'
    button.disabled = True
    dicSelectiveIV = { 'nano':[], 'iv':[], 'obs':[], 'Batches':[], 'effectValues':[], 'stdErr': [], 'study': [],
                    'oneShot': [],
                    'groupSize': [],
                    #'groupSizeFull': [],  
                    'matching': [],
                    'gameIncentive': [],
                    'Experimental': [],
                    'numberOfChoices': [],
                    #'numberOfChoicesFull':[],
                    'kIndex': [],
                    'MPCR': [],
                    'maleProportion': [],
                    'yearOfDataCollection': [],
                    'meanAge': [],  
                    'numberOfObservations': [],
                    'overallMeanContributions': [],
                    'overallMeanWithdrawal': [],
                    'overallN': [],
                    'overallPercentageEndowmentContributed': [],
                    'overallProportionCooperation': [],
                    'overallStandardDeviation': [],
                    'publicationStatus': [],
                    'replenishmentRate': [],
                    'studyDilemmaType': [],
                    'studyPGDThreshold': [],
                    'studyOtherDilemmaType': [],
                    'ageLowerInclusive': [],
                    'ageHigherInclusive': []
                 }
    
    #print(dropdown.value)
    contrastOption = []
    if checkboxCategory.value == True:
        contrastOption.append('category')
    if checkboxNumeric.value == True:
        contrastOption.append('numeric')
        
    if dropdown.value == "-- Select All --":
        clear_output(True)
        createGUI()
        
        dictEvaluation = {}
        
        # To draw the catplot by Independent variable
        if checkboxCatplot.value == True:
            drawCatplot()
            
        for iv in getTop20_OfficialIVs():
            if iv != '-- Select All --':
                dicSelectiveIV = { 'nano':[], 'iv':[], 'obs':[], 'Batches':[], 'effectValues':[], 'stdErr': [], 'study': [],
                    'oneShot': [],
                    'groupSize': [],
                    #'groupSizeFull': [],  
                    'matching': [],
                    'gameIncentive': [],
                    'Experimental': [],
                    'numberOfChoices': [],
                    #'numberOfChoicesFull':[],
                    'kIndex': [],
                    'MPCR': [],
                    'maleProportion': [],
                    'yearOfDataCollection': [],
                    'meanAge': [],  
                    'numberOfObservations': [],
                    'overallMeanContributions': [],
                    'overallMeanWithdrawal': [],
                    'overallN': [],
                    'overallPercentageEndowmentContributed': [],
                    'overallProportionCooperation': [],
                    'overallStandardDeviation': [],
                    'publicationStatus': [],
                    'replenishmentRate': [],
                    'studyDilemmaType': [],
                    'studyPGDThreshold': [],
                    'studyOtherDilemmaType': [],
                    'ageLowerInclusive': [],
                    'ageHigherInclusive': []
                 }
                
                #Select batches according to the selection of check boxes
                title = ''
                if checkboxLNP.value == True or checkboxAllBatches.value == True:
                    title = 'Large, '
                    #Large Negative
                    nps = dicIVs[iv]['ObjBatch1']
                    for objNP in nps:
                        formatContent(objNP, iv, 'Large Negative Correlation', dicSelectiveIV)
                    
                    #Large Positive
                    nps = dicIVs[iv]['ObjBatch7']
                    for objNP in nps:
                        formatContent(objNP, iv, 'Large Positive Correlation', dicSelectiveIV)
                        
                if checkboxMNP.value == True or checkboxAllBatches.value == True:
                    title += 'Medium, '
                    #Medium Negative
                    nps = dicIVs[iv]['ObjBatch2']
                    for objNP in nps:
                        formatContent(objNP, iv, 'Medium Negative Correlation', dicSelectiveIV)
                    
                    #Medium Positive
                    nps = dicIVs[iv]['ObjBatch6']
                    for objNP in nps:
                        formatContent(objNP, iv, 'Medium Positive Correlation', dicSelectiveIV)
                        
                if checkboxSNP.value == True or checkboxAllBatches.value == True:
                    title += 'Small, '
                    #Small Negative
                    nps = dicIVs[iv]['ObjBatch3']
                    for objNP in nps:
                        formatContent(objNP, iv, 'Small Negative Correlation', dicSelectiveIV)
                    
                    #Small Positive
                    nps = dicIVs[iv]['ObjBatch5']
                    for objNP in nps:
                        formatContent(objNP, iv, 'Small Positive Correlation', dicSelectiveIV)
                
                
                if checkboxAllBatches.value == True:
                    title = 'Large, Medium, Small (Negative & Positive) and No Correlation '
                    #No Correlation
                    nps = dicIVs[iv]['ObjBatch4']
                    for objNP in nps:
                        formatContent(objNP, iv, 'No Correlation', dicSelectiveIV)
                        
                title = title.strip()
                title = title.rstrip(",") + ' (Negative & Positive) Correlation '
                #########################
                r_idx = []
                c_idx = []
                
                dfSelectiveIV = pd.DataFrame.from_dict(dicSelectiveIV, orient='index', columns=dicSelectiveIV['nano'])
                display(HTML("<h2> "+ title + "for " + iv +"</h2>"), out)
                highlightNano = findContrastNano(dicSelectiveIV, dfSelectiveIV, contrastOption, iv)
                dfSelectiveIV = dfSelectiveIV.drop(['nano'])
                if len(highlightNano) == 0 or checkboxHighlight.value == False:
                    display(dfSelectiveIV.style.apply(color, row_idx = r_idx, col_idx = c_idx, axis = None), out)
                else:
                    for index in highlightNano:
                        splitIndex = index.split(',')
                        r_idx.append(int(splitIndex[1]) - 1)
                        c_idx.append(int(splitIndex[0]))

                    display(dfSelectiveIV.style.apply(color, row_idx = r_idx, col_idx = c_idx, axis = None), out)
                    #dfSelectiveIV.style.apply(color, row_idx = r_idx, col_idx = c_idx, axis = None).to_excel('d-measure-contradiction.xlsx')
                display(HTML("<hr />"), out)
    else:
        clear_output(True)
        createGUI()
        dictEvaluation = {}
        
        # To draw the catplot by Independent variable
        if checkboxCatplot.value == True:
            drawCatplot()
        
        #Select batches according to the selection of check boxes
        title = ''
        if checkboxLNP.value == True or checkboxAllBatches.value == True:
            #display(HTML("<p>IN</p>"))
            title = 'Large, '
            #Large Negative
            nps = dicIVs[dropdown.value]['ObjBatch1']
            for objNP in nps:
                formatContent(objNP, dropdown.value, 'Large Negative Correlation', dicSelectiveIV)
                
            #Large Positive
            nps = dicIVs[dropdown.value]['ObjBatch7']
            for objNP in nps:
                formatContent(objNP, dropdown.value, 'Large Positive Correlation', dicSelectiveIV)
            
                
        if checkboxMNP.value == True or checkboxAllBatches.value == True: 
            title += 'Medium, '
            #Medium Negative
            nps = dicIVs[dropdown.value]['ObjBatch2']
            for objNP in nps:
                formatContent(objNP, dropdown.value, 'Medium Negative Correlation', dicSelectiveIV)
            
            #Medium Positive
            nps = dicIVs[dropdown.value]['ObjBatch6']
            for objNP in nps:
                formatContent(objNP, dropdown.value, 'Medium Positive Correlation', dicSelectiveIV)
                
        if checkboxSNP.value == True or checkboxAllBatches.value == True: 
            title += 'Small, '
            #Small Negative
            nps = dicIVs[dropdown.value]['ObjBatch3']
            for objNP in nps:
                formatContent(objNP, dropdown.value, 'Small Negative Correlation', dicSelectiveIV)
            
            #Small Positive
            nps = dicIVs[dropdown.value]['ObjBatch5']
            for objNP in nps:
                formatContent(objNP, dropdown.value, 'Small Positive Correlation', dicSelectiveIV)
            
        if checkboxAllBatches.value == True:
            title = 'Large, Medium, Small and No '
            #No Correlation
            nps = dicIVs[dropdown.value]['ObjBatch4']
            for objNP in nps:
                formatContent(objNP, dropdown.value, 'No Correlation', dicSelectiveIV)
                
        
        title = title.strip()
        title = title.rstrip(",") + ' (Negative & Positive) Correlation '
        ############################
        r_idx = []
        c_idx = []
        dfSelectiveIV = pd.DataFrame.from_dict(dicSelectiveIV, orient='index', columns=dicSelectiveIV['nano'])
        display(HTML("<h2> "+ title + "for " + dropdown.value +"</h2>"), out)
        highlightNano = findContrastNano(dicSelectiveIV, dfSelectiveIV, contrastOption, dropdown.value)
        dfSelectiveIV = dfSelectiveIV.drop(['nano'])
        if len(highlightNano) == 0 or checkboxHighlight.value == False:
            display(dfSelectiveIV.style.apply(color, row_idx = r_idx, col_idx = c_idx, axis = None), out)
        else:
            for index in highlightNano:
                splitIndex = index.split(',')
                r_idx.append(int(splitIndex[1]) - 1)
                c_idx.append(int(splitIndex[0]))
                
            display(dfSelectiveIV.style.apply(color, row_idx = r_idx, col_idx = c_idx, axis = None))
            #dfSelectiveIV.style.apply(color, row_idx = r_idx, col_idx = c_idx, axis = None).to_excel('df_html_files/d-measure-contradiction_'+dropdown.value+'.xlsx')
            #html = dfSelectiveIV.style.apply(color, row_idx = r_idx, col_idx = c_idx, axis = None).render()
            #print(html)
            #with open('df_html_files/d-measure-contradiction_'+dropdown.value+'.html', 'w') as f:
             #   f.write(html)
    
    #Button Enabled
    button.description = 'Submit'
    button.disabled = False

In [15]:
#Define Controls
dropdown = widgets.Dropdown(
    options= getTop20_OfficialIVs(),
    #value='-- Select All --',
    description='IVs:',
    disabled=False,
)

label = widgets.HTML(
    value="<hr /><b>Please select the batches</b>"
)

checkboxLNP = widgets.Checkbox(
    value=True,
    description='Large Negative & Positive Correlation',
    disabled=False,
    indent=False
)
checkboxMNP = widgets.Checkbox(
    value=False,
    description='Medium Negative & Positive Correlation',
    disabled=False,
    indent=False
)

checkboxSNP = widgets.Checkbox(
    value=False,
    description='Small Negative & Positive Correlation',
    disabled=False,
    indent=False
)

checkboxAllBatches = widgets.Checkbox(
    value=False,
    description='All Correlation',
    disabled=False,
    indent=False
)

labelLine = widgets.HTML(
    value="<hr /><b>Nanopub Contrast Analysis</b>"
)


checkboxHighlight = widgets.Checkbox(
    value=True,
    description='Highlight Contrast Cells?',
    disabled=False,
    indent=False
)

checkboxCategory = widgets.Checkbox(
    value=True,
    description='By Category?',
    disabled=False,
    indent=False,
    layout={'width': 'max-content'}
)

ddlCategory = widgets.Dropdown(
    options=[('Both', 'Both'), ('Inter Batch', 'Inter Batch'), ('Intra Batch', 'Intra Batch')],
    value='Both',
    description='',
    disabled=False,
    indent=False,
    layout={'width': 'max-content'}
)

checkboxNumeric = widgets.Checkbox(
    value=False,
    description='By Numeric values?',
    disabled=False,
    layout={'width': 'max-content'}
)
ddlNumeric = widgets.Dropdown(
    options=[('Both', 'Both'), ('Inter Batch', 'Inter Batch'), ('Intra Batch', 'Intra Batch')],
    value='Both',
    description='',
    disabled=False,
    indent=False,
    layout={'width': 'max-content'}
)

labelExtra = widgets.HTML(
    value="<hr /><b>Extra</b>"
)

checkboxCatplot = widgets.Checkbox(
    value=False,
    description='Do you want to view catplot for all IVs?',
    disabled=False,
    indent=False
)

button = widgets.Button(
    description='Submit',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Submit',
    icon='check' # (FontAwesome names without the `fa-` prefix)
)

def createGUI():
    button.on_click(on_button_clicked)
    items = [dropdown, label]
    itemsChk1 = [ checkboxLNP, checkboxMNP]
    itemsChk2 = [ checkboxSNP, checkboxAllBatches]
    itemContrast = [checkboxCategory, ddlCategory, checkboxNumeric, ddlNumeric]
    display(widgets.VBox ([widgets.VBox(items), widgets.HBox(itemsChk1), widgets.HBox(itemsChk2) , labelLine, 
                           checkboxHighlight, widgets.HBox(itemContrast), labelExtra, checkboxCatplot, button]), out)

In [16]:
createGUI()

VBox(children=(VBox(children=(Dropdown(description='IVs:', options=('punishmentTreatment', 'endowmentSize', 'g…

Output()