# Crop Detection Benchmarks Summary

This notebook collects data from the cross validation reports from different machine learning techniques applied to crop detection validation

## 1. Install required packages

In [1]:
import sys

# Install packages required only once
install_packages = False

if install_packages:
    !{sys.executable} -m pip install lxml
    !{sys.executable} -m pip install html5lib 

## 2. Import libraries

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from unicodedata import normalize
import glob
import lxml
import os

## 3. List reports

In [3]:
listing = glob.glob('report*')
for filename in listing:
    print(filename)

report_PU_MLP_season
report_PU_MLP_shuffle
report_PU_RF_season
report_PU_RF_shuffle
report_PU_SVC_season
report_PU_SVC_shuffle
report_simclr_15_0.1_season
report_simclr_15_0.1_shuffle
report_simclr_30_0.1_season
report_simclr_30_0.1_shuffle
report_simclr_45_0.1_season
report_simclr_45_0.1_shuffle
report_tls_15_10_cosine_season
report_tls_15_10_cosine_shuffle
report_tls_30_10_cosine_season
report_tls_30_10_cosine_shuffle
report_tls_30_10_euclidean_season
report_tls_45_10_cosine_season
report_tls_45_10_cosine_shuffle


## 4. Generate reports with average cross validation accuracies

In [4]:
filename = 'report.html'

try:
    os.remove(filename)
except:
    print('Error removing file')

Error removing file


In [5]:
javascript = '<script> \
    function sortTable(n) { \
      var table, rows, switching, i, x, y, shouldSwitch, dir, switchcount = 0; \
      table = document.getElementById("myTable"); \
      switching = true; \
      dir = "asc"; \
      while (switching) { \
        switching = false; \
        rows = table.rows; \
        for (i = 1; i < (rows.length - 1); i++) { \
          shouldSwitch = false; \
          x = rows[i].getElementsByTagName("TD")[n]; \
          y = rows[i + 1].getElementsByTagName("TD")[n]; \
          if (dir == "asc") { \
            if (x.innerHTML.toLowerCase() > y.innerHTML.toLowerCase()) { \
              shouldSwitch = true; \
              break; \
            } \
    } else if (dir == "desc") { \
    if (x.innerHTML.toLowerCase() < y.innerHTML.toLowerCase()) { \
    shouldSwitch = true; \
              break; \
            } \
          } \
        } \
        if (shouldSwitch) { \
          rows[i].parentNode.insertBefore(rows[i + 1], rows[i]); \
          switching = true; \
          switchcount ++; \
        } else { \
          if (switchcount == 0 && dir == "asc") { \
            dir = "desc"; \
            switching = true; \
          } \
        } \
      } \
    } \
    function myFunction() { \
      var input, filter, table, tr, td, i, txtValue; \
      input = document.getElementById("myInput"); \
      filter = input.value.toUpperCase(); \
      table = document.getElementById("myTable"); \
      tr = table.getElementsByTagName("tr"); \
      var cols = document.getElementById("myTable").rows[1].cells.length;\
      var displayLine = 0; \
      strs = filter.split("|"); \
      for (row = 1; row < tr.length; row++) { \
        for (s = 0; s < strs.length; s++) { \
          for (c = 0; c < cols; c++) { \
            td = tr[row].getElementsByTagName("td")[c]; \
            if (td) { \
              txtValue = td.textContent || td.innerText; \
              console.log(strs[s]); \
              if (txtValue.toUpperCase().indexOf(strs[s]) > -1) { \
                displayLine = displayLine + 1; \
              } else { \
                displayLine = displayLine; \
              } \
            } \
          } \
        } \
        if(displayLine >= (strs.length)) { \
            tr[row].style.display = ""; \
          } \
          else { \
            tr[row].style.display = "none"; \
          } \
          displayLine = 0; \
      } \
    }\
    function zoom(e){\n\
      var zoomer = e.currentTarget;\n\
      e.offsetX ? offsetX = e.offsetX : offsetX = e.touches[0].pageX\n\
      e.offsetY ? offsetY = e.offsetY : offsetX = e.touches[0].pageX\n\
      x = offsetX/zoomer.offsetWidth*100\n\
      y = offsetY/zoomer.offsetHeight*100\n\
      zoomer.style.backgroundPosition = x + "% " + y + "%";\n\
    }\n\
    </script>'

style = '<style>\
    * {\
      box-sizing: border-box;\
    }\
    .tooltip {\
      position: relative;\
      display: inline-block;\
      border-bottom: 1px dotted black;\
    }\
    .tooltip .tooltiptext {\
      visibility: hidden;\
      width: 240px;\
      background-color: black;\
      color: #fff;\
      text-align: center;\
      border-radius: 6px;\
      padding: 5px 0;\
      position: absolute;\
      z-index: 1;\
    }\
    .tooltip:hover .tooltiptext {\
      visibility: visible;\
    }\
    #myInput {\
      background-image: url("/css/searchicon.png");\
      background-position: 10px 10px;\
      background-repeat: no-repeat;\
      width: 100%;\
      font-size: 16px;\
      padding: 12px 20px 12px 40px;\
      border: 1px solid #ddd;\
      margin-bottom: 12px;\
    }\
    #myTable {\
      border-collapse: collapse;\
      width: 100%;\
      border: 1px solid #ddd;\
      font-size: 18px;\
    }\
    #myTable th, #myTable td {\
      text-align: left;\
      padding: 12px;\
    }\
    #myTable tr {\
      border-bottom: 1px solid #ddd;\
    }\
    #myTable tr.header{\
      background-color: #f1f1f1;\
      cursor: pointer;\
    }\
    #myTable tr:hover {\
      background-color: #f1f1f1;\
    }\
    figure.zoom {\
      background-position: 50% 50%;\
      position: relative;\
      width: 450px;\
      overflow: hidden;\
      cursor: zoom-in;\
    }\
    figure.zoom img:hover {\
      opacity: 0;\
    }\
    figure.zoom img {\
      transition: opacity 0.5s;\
      display: block;\
      width: 100%;\
      height: 100%;\
    }\
    </style>'


In [6]:
import sys

def getMethod(report):
    if 'tls' in report:
        method = 'Triplet Siamese'
    elif 'simclr' in report:
        method = 'SimCLR'
    elif 'PU' in report:
        method = 'PU Learning'
    else:
        method ='Undefined'
    return method

def getSampling(report):
    if 'shuffle' in report:
        sampling = 'Shuffle'
    elif 'season' in report:
        sampling = 'Season'
    else:
        sampling ='Undefined'
    return sampling

for report in listing:
    table = pd.read_html(f'{report}/report.html')
    

    
    acc_column = 0
    for idx, c in enumerate(table[0].columns):
        #print(c)
        if c[1] == 'AccuracyTest accuracy':
            acc_column = idx
            
    avg = np.array([])
    for value in table[0][table[0].columns[acc_column]]:
        avg = np.append(avg, float(value.replace('%', '')))
        
    print(report, avg.mean())
    
    f_column = 0
    for idx, c in enumerate(table[0].columns):
        #print(c)
        if c[1] == 'F1F1 weighted':
            acc_column = idx
            
    f = np.array([])
    for value in table[0][table[0].columns[acc_column]]:
        f = np.append(f, float(value.replace('%', '')))
        
    print(report, f.mean())
    
    
    mcc_column = 0
    for idx, c in enumerate(table[0].columns):
        #print(c)
        if c[1] == 'MCCMatthews Correlation Coeficient':
            mcc_column = idx
            
    mcc = np.array([])
    for value in table[0][table[0].columns[mcc_column]]:
        mcc = np.append(mcc, float(value))
    print(report, mcc.mean())
    

    try:
        with open(filename, 'r') as file:
            table = file.read()
    except:
        table = ''
        table = '<html><head>'
        table += javascript
        table += '<meta name="viewport" content="width=device-width, initial-scale=1">'
        table += style
        table += '</head><body><h2>Search</h2>'
        table += '<input type="text" id="myInput" onkeyup="myFunction()" placeholder="Search for names.." title="Type in a name">'
        table += '<table border=1 id="myTable">\n'

        # Create the table's column headers
        table += '<tr>\n'
        table += '<th href="javascript:void(0)" onclick="sortTable(0)"><div class="tooltip">Test ID<span class="tooltiptext">Experiment description</span></div></th>'
        table += '<th href="javascript:void(0)" onclick="sortTable(1)"><div class="tooltip">Method<span class="tooltiptext">Experiment description</span></div></th>'
        table += '<th href="javascript:void(0)" onclick="sortTable(2)"><div class="tooltip">Sampling<span class="tooltiptext">Technique used to create a sample</span></div></th>'
        table += '<th href="javascript:void(0)" onclick="sortTable(3)"><div class="tooltip">Avg Accuracy<span class="tooltiptext">Average Accuracy</span></div></th>'
        table += '<th href="javascript:void(0)" onclick="sortTable(4)"><div class="tooltip">F1 Score<span class="tooltiptext">F1 Score</span></div></th>'        
        table += '<th href="javascript:void(0)" onclick="sortTable(5)"><div class="tooltip">Avg MCC<span class="tooltiptext">Average Mathews Correlation Coeficient</span></div></th>'   
        table += '<th href="javascript:void(0)" onclick="sortTable(6)"><div class="tooltip">URL<span class="tooltiptext">Link to cross validation internal results</span></div></th>'        
     
        table += '</tr>\n'
        print('No report file found')


    fileout = open(filename, "w")
    
    table += '<tr>\n'
    table += f'<td>{report}</td>'
    table += f'<td>{getMethod(report)}</td>'
    table += f'<td>{getSampling(report)}</td>'
    table += '<td>{:.2f}%</td>'.format(avg.mean())
    table += '<td>{:.2f}%</td>'.format(f.mean())
    table += '<td>%.2f' % mcc.mean()+ '</td>'
    table += f'<td><a href="{report}/report.html">Results</a></td>'   
    table += '</tr>\n'

    fileout.writelines(table)
    fileout.close()

report_PU_MLP_season 93.53235294117647
report_PU_MLP_season 94.31588235294119
report_PU_MLP_season 0.8411764705882352
No report file found
report_PU_MLP_shuffle 94.12470588235294
report_PU_MLP_shuffle 94.18411764705883
report_PU_MLP_shuffle 0.84
report_PU_RF_season 96.24470588235295
report_PU_RF_season 96.39941176470587
report_PU_RF_season 0.8841176470588235
report_PU_RF_shuffle 92.8558823529412
report_PU_RF_shuffle 92.49529411764706
report_PU_RF_shuffle 0.8129411764705883
report_PU_SVC_season 95.73700000000001
report_PU_SVC_season 95.84200000000001
report_PU_SVC_season 0.8949999999999999
report_PU_SVC_shuffle 93.17176470588235
report_PU_SVC_shuffle 93.13588235294117
report_PU_SVC_shuffle 0.8294117647058823
report_simclr_15_0.1_season 98.23823529411764
report_simclr_15_0.1_season 98.23
report_simclr_15_0.1_season 0.9458823529411766
report_simclr_15_0.1_shuffle 97.28117647058824
report_simclr_15_0.1_shuffle 97.23588235294118
report_simclr_15_0.1_shuffle 0.9264705882352942
report_simclr_

# End of notebook