In [1]:
%matplotlib inline
from typing import Tuple

from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

from source import load_avenio_files, categorical_columns_to_lower
from transform import combine_tsv_files, load_process_and_store_spreadsheets, merge_mutation_spreadsheet_t0_with_t1, clean_mutation_columns

# First make a spreadsheet with merged t0 and t1.

In [5]:
spread_sheet_filename= "variant_list_20200409.xlsx"
spss_filename= "clinical_20200419.sav"
columns = [
    "Allele Fraction",
    "No. Mutant Molecules per mL",
    "CNV Score",
]
patient_mutations, phenotypes = load_avenio_files(
    spread_sheet_filename, spss_filename
)

In [8]:
def harmonic_mean(t0, t1):
    return t0 * t1 / (t0 + t1)

load_process_and_store_spreadsheets(
    transformation=harmonic_mean,
    all_filename_prefix="output/all_",
    train_filename_prefix="output/train_",
    test_filename_prefix="output/test_",
)

In [9]:
def difference(t0, t1):
    return t1 - t0

load_process_and_store_spreadsheets(
    transformation=difference,
    all_filename_prefix="output/all_",
    train_filename_prefix="output/train_",
    test_filename_prefix="output/test_",
)

In [10]:
def relative_difference(t0, t1):
    return (t1-t0)/t0

load_process_and_store_spreadsheets(
    transformation=relative_difference,
    all_filename_prefix="output/all_",
    train_filename_prefix="output/train_",
    test_filename_prefix="output/test_",
)

  return (t1-t0)/t0


In [11]:
def up_or_down(t0, t1):
    return np.sign(t1 - t0)


load_process_and_store_spreadsheets(
    transformation=up_or_down,
    all_filename_prefix="output/all_",
    train_filename_prefix="output/train_",
    test_filename_prefix="output/test_",
)

In [12]:
def to_excel(snv_name, cnv_name):
    excel_filename = snv_name.replace('.tsv', '_CNV.xlsx')
    X, y = combine_tsv_files(snv_name, cnv_name)
    X.merge(y, left_index=True, right_index=True).to_excel(excel_filename)
    print(f'Wrote {excel_filename}.')

In [13]:
to_excel(
    "output/all__harmonic_mean__Allele Fraction.tsv",
    "output/all__harmonic_mean__CNV Score.tsv",
)
to_excel(
    "output/all__harmonic_mean__No. Mutant Molecules per mL.tsv",
    "output/all__harmonic_mean__CNV Score.tsv",
)

Wrote output/all__harmonic_mean__Allele Fraction_CNV.xlsx.
Wrote output/all__harmonic_mean__No. Mutant Molecules per mL_CNV.xlsx.


In [14]:
to_excel(
    "output/all__difference__Allele Fraction.tsv",
    "output/all__difference__CNV Score.tsv",
)
to_excel(
    "output/all__difference__No. Mutant Molecules per mL.tsv",
    "output/all__difference__CNV Score.tsv",
)

Wrote output/all__difference__Allele Fraction_CNV.xlsx.
Wrote output/all__difference__No. Mutant Molecules per mL_CNV.xlsx.


In [15]:
to_excel(
    "output/all__relative_difference__Allele Fraction.tsv",
    "output/all__relative_difference__CNV Score.tsv",
)
to_excel(
    "output/all__relative_difference__No. Mutant Molecules per mL.tsv",
    "output/all__relative_difference__CNV Score.tsv",
)

Wrote output/all__relative_difference__Allele Fraction_CNV.xlsx.
Wrote output/all__relative_difference__No. Mutant Molecules per mL_CNV.xlsx.
