In [None]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

Sat Apr 30 17:39:38 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   38C    P8    26W / 149W |      0MiB / 11441MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
# import packages and modules to access and call their functions later
import numpy as np
import pandas as pd

import copy
import csv
import itertools
import matplotlib.pyplot as plt
import os
import random
import time
import zipfile

# roc curve and auc
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve
from matplotlib import pyplot
from sklearn.metrics import accuracy_score

In [None]:
# Make sure to add streetviews2 shortcut to your own drive before running the cell below
from google.colab import drive
drive.mount('/content/drive/')
%cd /content/drive/MyDrive/streetviews2/arrs_pkl/

Mounted at /content/drive/
/content/drive/.shortcut-targets-by-id/1gnVV0eOEygqj9_wnneoTbAKcSf-W4nJS/streetviews2/arrs_pkl


In [None]:
# display up to 100 rows
pd.set_option('display.max_rows', 100)

# output readable text, not truncated
pd.set_option('display.max_colwidth', 0)

# display up to 4 decimal places
pd.options.display.float_format = '{:,.4f}'.format

# disable warnings
pd.options.mode.chained_assignment = None  # default='warn'

In [None]:
temp = pd.read_csv("df_CN.csv")
df1 = temp.T.reset_index()
df1.columns = np.arange(len(df1.columns))
df1.columns = df1.iloc[0]
df1 = df1[1:]
df1 = df1[["Unnamed: 0", "AUC"]]
df1 = df1.rename(columns={"Unnamed: 0": "Model"})
df1 = df1.replace(regex={r'FC': 'CNN'})
df1

Unnamed: 0,Model,AUC
1,CN_0,0.5691
2,CN_1,0.5681
3,CN_2,0.5653
4,CN_3,0.557
5,CN_4,0.5454


In [None]:
temp = pd.read_csv("df_FC.csv")
df2 = temp.T.reset_index()
df2.columns = np.arange(len(df2.columns))
df2.columns = df2.iloc[0]
df2 = df2[1:]
df2 = df2[["Unnamed: 0", "AUC"]]
df2 = df2.rename(columns={"Unnamed: 0": "Model"})
df2

Unnamed: 0,Model,AUC
1,FC_0,0.5657
2,FC_1,0.5539


In [None]:
df_models = pd.concat([df1, df2])
df_models = df_models.reset_index()
df_models = df_models.drop(columns=['index'])
df_models

Unnamed: 0,Model,AUC
0,CN_0,0.5691
1,CN_1,0.5681
2,CN_2,0.5653
3,CN_3,0.557
4,CN_4,0.5454
5,FC_0,0.5657
6,FC_1,0.5539


# Step 1A: Read in probs (probabilities predicted for cancer in the validation dataset)

In [None]:
# read in the pickel (pckl) file
probs = pd.read_pickle("validation_predictions.pckl")

In [None]:
berk_prob = pd.read_pickle('Berkeley_predictions.pckl')

In [None]:
for key in probs:
  print(key)

FC_0
FC_1
CN_0
CN_1
CN_2
CN_3
CN_4


In [None]:
model1 = probs['FC_0']
model3 = probs['CN_4']

In [None]:
model2 = berk_prob['FC_1']
model4 = berk_prob['CN_2']

In [None]:
# pick 'FC_0', 'CN_4'
lst = []
lst.append(model3)
lst.append(model1)
lst

[array([[0.1665586 , 0.19175081, 0.12363591, 0.18940693, 0.19768904,
         0.13095872],
        [0.15778276, 0.18261932, 0.1242151 , 0.18984604, 0.20647828,
         0.1390585 ],
        [0.16799837, 0.18650663, 0.12841749, 0.19020763, 0.19309828,
         0.13377164],
        [0.17016865, 0.18967211, 0.12111107, 0.19256951, 0.19339563,
         0.13308312],
        [0.16933015, 0.18506321, 0.1371572 , 0.18218602, 0.19126482,
         0.13499862],
        [0.169932  , 0.17818643, 0.14812116, 0.18536639, 0.18121494,
         0.13717912],
        [0.16643317, 0.19217363, 0.12350721, 0.18828595, 0.18921193,
         0.14038809],
        [0.17144898, 0.1812437 , 0.13605656, 0.19151069, 0.18567723,
         0.1340628 ],
        [0.16748823, 0.1946294 , 0.12702118, 0.17733659, 0.20329702,
         0.13022757],
        [0.16988951, 0.1915613 , 0.12372147, 0.19158971, 0.19323309,
         0.13000493],
        [0.16772322, 0.19727501, 0.10752307, 0.1944432 , 0.2068902 ,
         0.12614524],

In [None]:
# pick 'FC_1', 'CN_2'
lst_ber = []
lst_ber.append(model2)
lst_ber.append(model4)
# lst_ber

# Take the mean of CN & FN

In [None]:
mean_two_model = np.mean(lst_ber, axis=0)

In [None]:
pre = np.argmax(mean_two_model, axis=1)

In [None]:
ber_pci = pre.copy()
ber_pci= np.array(ber_pci)
with open('ber_pci_first_1000_arr.csv','wb') as f:
  pkl.dump(ber_pci, f)

In [None]:
#test
import pickle as pkl
label = []

with open('y_val.pckl','rb') as f:
  label = pkl.load(f)

print(type(label))

In [None]:
# read in the pickel (pckl) file
labels=pd.DataFrame(data=label, columns=["label"])
labels