In [None]:
import os

base = "/data/bionets" if "ramses" in os.uname()[1] else "/data_nfs/"
import torch as t
import cv2 
import sys
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import pickle
import pandas as pd
from matplotlib.patches import Patch

os.environ["CUDA_VISIBLE_DEVICES"]="1"
sys.path.append("..")
from src import *
plt.style.use("default")


from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import accuracy_score, f1_score, mean_squared_error
from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression

In [None]:
data = get_data_csv(high_quality_only=True)
data = data[data["Group"] == "Melanoma"]
data["Coarse tumor stage"] = data["Float tumor stage"] > 0.5
val = ['B030', 'B407', 'B515', 'B279', 'B295', 'B487', 'B503']
data["split"] = "train"
data.loc[data["Histo-ID"].isin(val), "split"] = "val"
data_unique = data.reset_index().drop("index", axis=1)
data = balance(data, split_by="split", variable="Coarse tumor stage")

In [None]:
data.index.values

In [None]:
def get_features(effnet, x):
    effnet = EfficientnetWithFinetuning(indim=len(markers))
    effnet.load_state_dict(t.load("../model/finetuned_effnet_with_LR_reduction_on_plateau.pt", map_location="cpu"))
    effnet = effnet.cuda()

    effnet = effnet.eval()
    x = effnet.features(x.cuda())
    x = effnet.avgpool(x)
    x = effnet.flatten(x)
    return x.detach().cpu().numpy()

In [None]:
effnet = EfficientnetWithFinetuning(indim=len(markers))
effnet.load_state_dict(t.load("../model/finetuned_effnet_with_LR_reduction_on_plateau.pt", map_location="cpu"))
effnet = effnet.cuda()
effnet = effnet.eval()
dl = t.utils.data.DataLoader(MelanomaData(markers, classify=False, data=data, mode="val"), batch_size=1, shuffle=False)
it = iter(dl)
features = list()
labels = list()
while True:
    try:
        print("progressing")
        x, y = next(it)
        labels.append(y.detach().cpu().numpy()[0])
        features.append(get_features(effnet, x))
    except StopIteration:
        break
conc = np.concatenate(features)

In [None]:
conc = conc[:-6]

In [None]:
data = data_unique

In [None]:
X = conc[np.where(data["split"] == "train")]
X_val = conc[np.where(data["split"] == "val")]

labels = data["Coarse tumor stage"].values
y = labels[np.where(data["split"] == "train")]
y_val = labels[np.where(data["split"] == "val")]

In [None]:
data["Histo-ID"].values

In [None]:
clf = RandomForestClassifier(max_depth=2, random_state=0)
clf.fit(X, y)
y_pred = clf.predict(X_val)
print(accuracy_score(y_val, y_pred), f1_score(y_val, y_pred)) 
print(y_val)
print(y_pred)

In [None]:
linreg = LinearRegression()
linreg.fit(X, y)
y_pred = linreg.predict(X_val) > 0.5
print(accuracy_score(y_val, y_pred), f1_score(y_val, y_pred)) 
print(y_val)
print(y_pred)

In [None]:
pca = PCA(2)
transform = pca.fit_transform(conc)
df = pd.DataFrame()
df["PC1"] = transform[:, 0]
df["PC2"] = transform[:, 1]
df["Coarse tumor stage"] = labels
df["Coarse tumor stage"].replace({True: "T3, T4", False: "T1, T2"}, inplace=True)
df["Group"] = data["Group"].values
df["Coarse tumor stage"].iloc[np.where(df["Group"] == "Nevus")] = "Nevus"

In [None]:
sns.set_theme("paper")
palette = {"T1, T2": "#f57d05", "T3, T4": "#8634b3"}
f, ax = plt.subplots(1,1, figsize=(6,6))
f.patch.set_facecolor('white')
sns.set_theme("paper")
sns.scatterplot(df, x="PC1", y="PC2", palette=palette, hue="Coarse tumor stage", hue_order=["T1, T2", "T3, T4"])
legend_handles = [Patch(color=color, label=key) for key, color in palette.items()]
ax.legend(handles=legend_handles, loc=(0.35,1), ncol=3, frameon=False)
plt.tight_layout()
plt.savefig("../result_plots/PCA_on_EffNet_features.pdf")

In [None]:
pca = PCA(2)
transform = pca.fit_transform(conc[np.where(data["Group"] == "Melanoma")])

In [None]:
df = pd.DataFrame()
df["PC1"] = transform[:, 0]
df["PC2"] = transform[:, 1]
df["Coarse tumor stage"] = labels
df["Coarse tumor stage"].replace({True: "T3, T4", False: "T1, T2"}, inplace=True)
df["Group"] = data["Group"].values

In [None]:
palette = {"T1, T2": "#f57d05", "T3, T4": "#8634b3"}
f, ax = plt.subplots(1,1, figsize=(6,6))
sns.set_theme("paper")
sns.scatterplot(df, x="PC1", y="PC2", hue="Coarse tumor stage", palette=palette, ax=ax)
legend_handles = [Patch(color=color, label=key) for key, color in palette.items()]
ax.legend(handles=legend_handles, loc=(0.275, 1), ncol=2, frameon=False)
plt.tight_layout()