In [1]:
#!/usr/bin/env python3
import json
import numpy as np
import requests
import sys
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import random
from manual_review_classifier.ClassifierPlots import create_reliability_diagram, create_roc_curve

###Create Tools for code
#create tool that can append files to a list
def append_file_to_list(file):
    list = []
    f = open(file, 'r')
    for line in f:
        line = line.strip('\n')
        line = line.split('\t')
        list.append(line)
    return list


In [7]:
#open error file for deep learning
error_data = append_file_to_list('../data/clinical_analysis_data/errors_dl.tsv')
CIViC_FN = append_file_to_list('../data/clinical_analysis_data/CIViC_variants_DL_FN.txt')
CIViC_FP = append_file_to_list('../data/clinical_analysis_data/CIViC_variants_DL_FP.txt')

In [8]:
# Pull clinical and non-clinical False Negatives
FN_no_clinical = {}
FN_clinical = {}
for number,thing in enumerate(error_data):
    if thing[2] == 'False Negative':
        code1 = thing[7:12]
        prob1 = thing[5][0:5]
        for item in CIViC_FN[1:]:
            code2 = item[0:5]
            prob2 = item[7][0:5]
            if code1 == code2 and prob1 == prob2:
                FN_clinical[number] = thing
for number,thing in enumerate(error_data):
    if thing[2] == 'False Negative' and number not in FN_clinical:
        FN_no_clinical[number] = thing

# Pull clinical and non-clinical False Positive
FP_no_clinical = {}
FP_clinical = {}
for number,thing in enumerate(error_data):
    if thing[2] == 'False Positive':
        code1 = thing[7:12]
        prob1 = thing[5][0:5]
        for item in CIViC_FP[1:]:
            code2 = item[0:5]
            prob2 = item[7][0:5]
            if code1 == code2 and prob1 == prob2:
                FP_clinical[number] = thing
for number,thing in enumerate(error_data):
    if thing[2] == 'False Positive' and number not in FP_clinical:
        FP_no_clinical[number] = thing

In [9]:
#Pull the non-Clinical Flase Positives and take random 30
FP_no_clinical_IGV = []
for item in range(45):
    number = np.random.choice((list(FP_no_clinical.keys())))
    FP_no_clinical_IGV.append(FP_no_clinical[number])


In [7]:
#Pull the non-Clinical Flase Negatives and take random 30
FN_no_clinical_IGV = []
for item in range(45):
    number = np.random.choice((list(FN_no_clinical.keys())))
    FN_no_clinical_IGV.append(FN_no_clinical[number])

In [8]:
#Pull the Clinical False Negatives and False Positives
FP_clinical_IGV = []
for k,v in FP_clinical.items():
    FP_clinical_IGV.append(v)
    
FN_clinical_IGV = []
for k,v in FN_clinical.items():
    FN_clinical_IGV.append(v)

In [12]:
# Pull master manual re-review list for analysis
MRR = append_file_to_list("../data/clinical_analysis_data/manual_rereview.txt")

In [13]:
# Determine columns that are reviewer calls
calls = [16, 17, 18, 19, 20, 21, 22]
# Count calls and determine consensus or no consensus
MRR_new = []
MR_count = 0
classifier_count = 0
new_consensus = 0
no_consensus = 0
for item in MRR[1:]:
    F = 0
    S = 0
    A = 0
    for k in calls:
        if item[k] == 'S':
            S += 1
        if item[k] == 'F':
            F += 1
        if item[k] == 'A':
            A += 1
    if F > S and F > A:
        consensus = 'F'
    elif S > F and S > A:
        consensus = 'S'
    elif A > F and A > S:
        consensus = 'A'
    else:
        consensus = 'none'
        no_consensus += 1
        agree = 'none'
    if item[0] == consensus:
        agree = "MR"
        MR_count += 1
    elif item[1] == consensus:
        agree = "Classifier"
        classifier_count += 1
    elif item[0] != consensus and item[1] != consensus:
        agree = 'new'
        new_consensus += 1
    MRR_new.append([item[0], item[1], item[2], item[3], item[25], consensus, agree])
    

In [14]:
# Print statements
total = len(MRR_new)
print('Total Variants: ',total)
print('MR-Variants: ',MR_count)
print('Classifier-Variants: ',classifier_count)
print('Change-Variants: ',new_consensus)
print('No Consensus: ',no_consensus)

Total Variants:  179
MR-Variants:  58
Classifier-Variants:  57
Change-Variants:  64
No Consensus:  13
