<a href="https://colab.research.google.com/github/jansoe/ANN/blob/main/PredictionAnalyse.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import auth
auth.authenticate_user()

In [2]:
import gspread
from google.auth import default
creds, _ = default()

gc = gspread.authorize(creds)

In [38]:
sheet = gc.open('Symptom2Disease_jan').sheet1

index = sheet.range('A2:A1201')
label = sheet.range('B2:B1201')
predictions = sheet.range('E2:E1201')

In [39]:
diseases = set(cell.value.lower() for cell in label)
diseases

{'acne',
 'allergy',
 'arthritis',
 'bronchial asthma',
 'cervical spondylosis',
 'chicken pox',
 'common cold',
 'dengue',
 'diabetes',
 'dimorphic hemorrhoids',
 'drug reaction',
 'fungal infection',
 'gastroesophageal reflux disease',
 'hypertension',
 'impetigo',
 'jaundice',
 'malaria',
 'migraine',
 'peptic ulcer disease',
 'pneumonia',
 'psoriasis',
 'typhoid',
 'urinary tract infection',
 'varicose veins'}

In [46]:
prediction_labels = {}
for ix, cell in enumerate(predictions):
    if cell.value:
        prediction_labels[ix] = []
        cell_values = cell.value.split(',')

        for cell_value in cell_values:
            cell_value = cell_value.strip(' "')
            cell_value = cell_value.lower()

            if (cell_value not in diseases) and cell_value != "other":
                print(f"!! row {ix+2} value >> {cell_value} << does not match !!")
            else:
                prediction_labels[ix].append(cell_value)

In [47]:
prediction_labels

{45: ['psoriasis', 'arthritis'],
 46: ['psoriasis'],
 47: ['arthritis'],
 48: ['psoriasis', 'arthritis'],
 49: ['psoriasis'],
 95: ['varicose veins'],
 96: ['varicose veins'],
 97: ['varicose veins'],
 98: ['dimorphic hemorrhoids'],
 99: ['varicose veins'],
 145: ['typhoid'],
 146: ['typhoid'],
 147: ['typhoid'],
 148: ['peptic ulcer disease'],
 149: ['typhoid'],
 195: ['psoriasis'],
 196: ['dengue'],
 197: ['dengue'],
 198: ['dengue'],
 199: ['chicken pox'],
 245: ['dengue'],
 246: ['impetigo'],
 247: ['impetigo'],
 248: ['dengue'],
 249: ['impetigo'],
 295: ['arthritis'],
 296: ['dengue'],
 297: ['dengue'],
 298: ['malaria'],
 299: ['psoriasis'],
 345: ['psoriasis'],
 346: ['psoriasis'],
 347: ['psoriasis'],
 348: ['psoriasis'],
 349: ['impetigo'],
 395: ['allergy'],
 396: ['common cold'],
 397: ['pneumonia'],
 398: ['allergy'],
 399: ['bronchial asthma'],
 445: ['bronchial asthma'],
 446: ['bronchial asthma'],
 447: ['bronchial asthma'],
 448: ['pneumonia'],
 449: ['pneumonia'],
 49

In [43]:
rows2select = [45,46,47,48,49]

validation_rows = []
for i in range(len(diseases)):
    for ix in rows2select:
        validation_rows.append(ix+50*i)

print(validation_rows)

[45, 46, 47, 48, 49, 95, 96, 97, 98, 99, 145, 146, 147, 148, 149, 195, 196, 197, 198, 199, 245, 246, 247, 248, 249, 295, 296, 297, 298, 299, 345, 346, 347, 348, 349, 395, 396, 397, 398, 399, 445, 446, 447, 448, 449, 495, 496, 497, 498, 499, 545, 546, 547, 548, 549, 595, 596, 597, 598, 599, 645, 646, 647, 648, 649, 695, 696, 697, 698, 699, 745, 746, 747, 748, 749, 795, 796, 797, 798, 799, 845, 846, 847, 848, 849, 895, 896, 897, 898, 899, 945, 946, 947, 948, 949, 995, 996, 997, 998, 999, 1045, 1046, 1047, 1048, 1049, 1095, 1096, 1097, 1098, 1099, 1145, 1146, 1147, 1148, 1149, 1195, 1196, 1197, 1198, 1199]


In [52]:
for row in validation_rows:
    print(f'Ground Truth {label[row].value.lower()} vs Predictions {prediction_labels[row]}')

Ground Truth psoriasis vs Predictions ['psoriasis', 'arthritis']
Ground Truth psoriasis vs Predictions ['psoriasis']
Ground Truth psoriasis vs Predictions ['arthritis']
Ground Truth psoriasis vs Predictions ['psoriasis', 'arthritis']
Ground Truth psoriasis vs Predictions ['psoriasis']
Ground Truth varicose veins vs Predictions ['varicose veins']
Ground Truth varicose veins vs Predictions ['varicose veins']
Ground Truth varicose veins vs Predictions ['varicose veins']
Ground Truth varicose veins vs Predictions ['dimorphic hemorrhoids']
Ground Truth varicose veins vs Predictions ['varicose veins']
Ground Truth typhoid vs Predictions ['typhoid']
Ground Truth typhoid vs Predictions ['typhoid']
Ground Truth typhoid vs Predictions ['typhoid']
Ground Truth typhoid vs Predictions ['peptic ulcer disease']
Ground Truth typhoid vs Predictions ['typhoid']
Ground Truth chicken pox vs Predictions ['psoriasis']
Ground Truth chicken pox vs Predictions ['dengue']
Ground Truth chicken pox vs Predictions

In [62]:
from collections import Counter

TP = Counter()
FP = Counter()
FN = Counter()

for row in validation_rows:
    gt = label[row].value.lower()

    # Wenn das Label in den Vorhersagen vorkommt, ist es ein TP
    if gt in prediction_labels[row]:
        TP[gt] += 1

    # Wenn das Label nicht in den Vorhersagen vorkommt, ist es ein FN
    if gt not in prediction_labels[row]:
        FN[gt] += 1

    # Alle Vorhersagen die nicht mit dem Label übereinstimmen sind FP
    for pred in prediction_labels[row]:
        if pred != gt:
            FP[pred] += 1


In [70]:
TP

Counter({'psoriasis': 4,
         'varicose veins': 4,
         'typhoid': 4,
         'chicken pox': 1,
         'impetigo': 3,
         'dengue': 2,
         'common cold': 1,
         'pneumonia': 2,
         'dimorphic hemorrhoids': 5,
         'arthritis': 1,
         'acne': 2,
         'cervical spondylosis': 5,
         'jaundice': 4,
         'malaria': 5,
         'urinary tract infection': 4,
         'allergy': 1,
         'gastroesophageal reflux disease': 4,
         'peptic ulcer disease': 3,
         'diabetes': 1})

In [71]:
sum(TP.values())

56