In [3]:
import altair as alt
import numpy as np
import pandas as pd
import pprint
import sklearn.metrics
import sklearn.model_selection
import sklearn.neural_network
import sklearn.svm
from sklearn.preprocessing import LabelEncoder
import copy

In [4]:
cancer_types = [
    "ccrcc",
    "endometrial",
    "hnscc",
    "lscc",
    "luad",
]

In [5]:
inputs = {}
targets = {}
for cancer in cancer_types:
    inputs[cancer] = pd.read_csv(f'clean_data/{cancer}_inputs.tsv', sep="\t", index_col=0)
    targets[cancer] = pd.read_csv(f'clean_data/{cancer}_targets.tsv', sep="\t", index_col=0)

In [6]:
target_cols = {}

for cancer in cancer_types:
    ys = {}
    y = targets[cancer]
    
    for col in y.columns:
        enc = LabelEncoder()
        ys[col] = enc.fit_transform(y[col])
        
    target_cols[cancer] = ys

In [7]:
def SVM_baseline(X, ys):
    
    results = {}
    for target, y in ys.items():
        mlp = sklearn.svm.SVC(
            random_state=0,
        )
        results[target] = sklearn.model_selection.cross_validate(
            mlp,
            X,
            y,
            cv=10,
            scoring={
                "accuracy": "accuracy",
                "precision": sklearn.metrics.make_scorer(
                    sklearn.metrics.precision_score,
                    average="samples",
                    zero_division=0
                ),
                "recall": sklearn.metrics.make_scorer(
                    sklearn.metrics.recall_score,
                    average="samples",
                    zero_division=0
                ),
            },
            n_jobs=-1,
        )
        
    scores = pd.DataFrame(results).\
    T[["test_accuracy", "test_precision", "test_recall"]].\
    applymap(np.mean).T
    
    scores.index.name = "metric"
    chart_df = scores.reset_index().melt(
        id_vars="metric",
        var_name="target",
        value_name="score",
    )
    
    chart = alt.Chart(chart_df).mark_bar().encode(
        x="target",
        y=alt.Y(
            "score",
            scale=alt.Scale(
                domain=[0, 1]
            )
        ),
        color="target",
        column="metric"
    )
    
    scores.columns.name = "target"
    scores.index.name = None
    scores = scores.T.sort_index()
        
    return scores, chart

In [8]:
svm_scores = {}
svm_charts = {}

for cancer in cancer_types:
    baseline_scores, baseline_chart = SVM_baseline(inputs[cancer], target_cols[cancer])
    svm_scores[cancer] = baseline_scores
    svm_charts[cancer] = baseline_chart

Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 761, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 103, in __call__
    score = scorer._score(cached_call, estimator, *args, **kwargs)
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 264, in _score
    return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/metrics/_classification.py", line 1757, in precision_score
    p, _, _, _ = precision_recall_fscore_support(
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/metrics/_classification.py", line 1548, in precision_recall_fs

ics/_scorer.py", line 103, in __call__
    score = scorer._score(cached_call, estimator, *args, **kwargs)
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 264, in _score
    return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/metrics/_classification.py", line 1757, in precision_score
    p, _, _, _ = precision_recall_fscore_support(
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/metrics/_classification.py", line 1548, in precision_recall_fscore_support
    MCM = multilabel_confusion_matrix(
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/metrics/_classification.py", line 499, in multilabel_confusion_matrix
    raise ValueError(
ValueError: Samplewise metrics are not available outside of multilabel classification.

T

File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 103, in __call__
    score = scorer._score(cached_call, estimator, *args, **kwargs)
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 264, in _score
    return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/metrics/_classification.py", line 1757, in precision_score
    p, _, _, _ = precision_recall_fscore_support(
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/metrics/_classification.py", line 1548, in precision_recall_fscore_support
    MCM = multilabel_confusion_matrix(
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/metrics/_classification.py", line 499, in multilabel_confusion_matrix
    raise Value

  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 103, in __call__
    score = scorer._score(cached_call, estimator, *args, **kwargs)
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 264, in _score
    return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/metrics/_classification.py", line 1757, in precision_score
    p, _, _, _ = precision_recall_fscore_support(
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/metrics/_classification.py", line 1548, in precision_recall_fscore_support
    MCM = multilabel_confusion_matrix(
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/metrics/_classification.py", line 499, in multilabel_confusion_matrix
    raise Val

Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 761, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 103, in __call__
    score = scorer._score(cached_call, estimator, *args, **kwargs)
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 264, in _score
    return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/metrics/_classification.py", line 1757, in precision_score
    p, _, _, _ = precision_recall_fscore_support(
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/metrics/_classification.py", line 1548, in precision_recall_fs

Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/model_selection/_validation.py", line 761, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 103, in __call__
    score = scorer._score(cached_call, estimator, *args, **kwargs)
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/metrics/_scorer.py", line 264, in _score
    return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/metrics/_classification.py", line 1757, in precision_score
    p, _, _, _ = precision_recall_fscore_support(
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/metrics/_classification.py", line 1548, in precision_recall_fs

ValueError: n_splits=10 cannot be greater than the number of members in each class.

In [9]:
svm_scores['ccrcc']

Unnamed: 0_level_0,test_accuracy,test_precision,test_recall
target,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
histologic_grade,,,
histologic_type,,,
recurrence_status,,,
success_last_follow-up,,,
survival_status,,,
tumor_stage,,,


In [10]:
target_cols['ccrcc']

{'recurrence_status': array([0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0,
        0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
        1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 1, 0]),
 'survival_status': array([0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0,
        0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1,
        0, 0, 0, 0, 0, 0, 0, 0, 0]),
 'histologic_type': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0]),
 'tumor_stage': array([2, 0, 2, 0, 2, 2, 2, 0, 2, 1, 2, 2, 2, 0, 0, 2, 1, 0, 2, 2, 0, 0,
        0, 1, 0, 0, 2, 2, 2, 2, 2, 0, 2, 0, 0, 0, 0, 0, 3, 2, 0, 2, 1, 0,
 

In [11]:
inputs['ccrcc']

Unnamed: 0_level_0,tumor_normal_residual_dist_ADCY3,tumor_normal_residual_dist_AGK,tumor_normal_residual_dist_AGXT,tumor_normal_residual_dist_AHSA1,tumor_normal_residual_dist_ALDH18A1,tumor_normal_residual_dist_ANKZF1,tumor_normal_residual_dist_AP3M1,tumor_normal_residual_dist_AP4S1,tumor_normal_residual_dist_APLP2,tumor_normal_residual_dist_APPL1,...,signed_orth_res_USP47,signed_orth_res_USP6NL,signed_orth_res_VPS25,signed_orth_res_WNK1,signed_orth_res_XPNPEP1,signed_orth_res_YARS2,signed_orth_res_ZDHHC2,signed_orth_res_ZEB1,signed_orth_res_ZNF358,signed_orth_res_ZNF397
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
C3L-00004,0.452100,0.480587,0.608408,0.386821,0.150936,0.110665,0.187571,0.209328,1.000000,0.145496,...,0.535543,0.794728,0.703199,0.483853,0.869253,0.503691,0.493770,0.528256,0.468907,0.428962
C3L-00010,0.452100,0.000000,0.537385,0.547776,0.093862,0.119677,0.335780,0.458666,0.363297,0.441025,...,0.445245,0.690098,0.607158,0.216599,0.635404,0.227031,0.493770,0.528256,0.588094,0.428962
C3L-00011,0.452100,0.163958,0.153419,0.284550,0.735793,0.454296,0.886743,0.845151,0.002419,0.176751,...,0.249779,0.296491,0.000000,1.000000,0.365482,0.223514,0.310194,0.528256,0.572418,0.428962
C3L-00026,0.452100,0.335028,0.165536,0.162598,0.405443,0.328462,0.289042,0.061999,0.804837,0.410859,...,0.616949,0.797200,0.519382,0.205226,0.674230,1.000000,0.493770,0.700417,0.588094,0.428962
C3L-00079,0.452100,0.095632,0.095212,0.057760,0.200335,0.199979,0.021500,0.412560,0.572440,0.646901,...,0.421965,0.295365,0.367012,0.389211,0.559898,0.635467,0.493770,0.635227,0.588094,0.428962
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
C3N-01646,0.452100,0.134910,0.220263,0.330500,0.444779,0.321288,0.299516,0.681814,0.058109,0.430568,...,0.584231,0.360450,0.549910,0.426897,0.412840,0.203016,0.751256,0.528256,0.588094,0.310906
C3N-01648,0.452100,0.221827,0.250131,0.024432,0.114242,0.004377,0.399360,0.093853,0.342128,0.370386,...,0.606364,0.000000,0.574197,0.790429,0.379729,0.522804,0.493770,0.528256,0.856015,0.428962
C3N-01649,0.284759,0.109335,0.227562,0.135720,0.036035,0.141371,0.030476,0.073237,0.439558,0.031604,...,0.327495,0.694037,0.532705,0.286763,0.341304,0.485581,0.493770,0.000000,0.588094,0.428962
C3N-01651,0.075819,0.205972,0.466024,0.144897,0.496417,0.841197,0.148685,0.040512,0.772123,0.478044,...,0.547019,0.476216,0.265406,0.416161,0.543442,0.750166,0.493770,0.639754,0.588094,0.350607
