# Sample Report:

#### McAuliffe, P. F. et al. "Ability to generate patient-derived breast cancer xenografts is enhanced in chemoresistant disease and predicts poor patient outcomes". PLoS ONE 10, e0136851 (2015)

https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4556673/pdf/pone.0136851.pdf

## Imports

In [17]:
import pandas as pd
import numpy as np
import matplotlib as plt

## 1. Acquire Data

### S2 Table

In [12]:
# Differentially expressed proteins between patients(P0) and mice (P1)
df = pd.read_excel(r'BCX_tables/S2_Table.xlsx')
df.head()

Unnamed: 0,"S2 Table: Differentially expressed proteins between P0 and P1, cutoff: FDR=0.2",Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7
0,,,,,,,,
1,,Paired t tests,,,P0 (Patients),,P1 (Mice),
2,,T statistics,P value,FDR,mean,sd,mean,sd
3,EGFR,-8.3362,0.0011,0.0662,-0.4167,0.7898,1.5519,0.7004
4,Merlin/NF2,5.4049,0.0057,0.0713,-0.2657,0.2003,-0.8749,0.228


### S3 Table

In [13]:
# Copy number alterations detected in all tested passages of six models by targeted exome sequencing (BCX-006, -010, -011, 017, -022, -024) 
# High amplification cutoff was 4 (highlighted in red) and high deletion cutoff was 1 (highlighted in green) in excel
df = pd.read_excel(r'BCX_tables/S3_Table.xlsx')
df.head()

Unnamed: 0,S3 Table. Copy number alterations detected in all tested passages of six models by targeted exome sequencing.,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,...,Unnamed: 30,Unnamed: 31,Unnamed: 32,Unnamed: 33,Unnamed: 34,Unnamed: 35,Unnamed: 36,Unnamed: 37,Unnamed: 38,Unnamed: 39
0,,,,,,,,,,,...,,,,,,,,,,
1,BCX-006,P0,P1,P4,P7,P10,P13,P16,,BCX-010,...,,BCX-022,P0,P1,P6,,BCX-024,P0,P1,P7
2,AKT1,4,,,,,,,,ATM,...,,AKAP3,3.2,9.7,8.8,,BAP1,1.5,0.7,1.4
3,AR,1,1.3,1.1,1.1,0.9,1,1,,CSMD1,...,,ATM,0.5,0.5,0.9,,CDKN2A,2.8,4,5.6
4,ATRX,1,0.9,1.1,1.1,0.9,1,1,,HEATR7B2,...,,BRCA2,1.2,1,1.4,,CPAMD8,,3.1,5


### S4 Table

In [14]:
# Copy number alterations detected in P0 and P1 of four models by whole exome sequencing (BCX-010, -017, -022, -024)
# High amplification cutoff was 4 (highlighted in red) and high deletion cutoff was 1 (highlighted in green) in excel
df = pd.read_excel(r'BCX_tables/S4_Table.xlsx')
df.head()

Unnamed: 0,S4 Table. Copy number alterations detected in P0 and P1 of four models by whole exome sequencing.,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14
0,,,,,,,,,,,,,,,
1,BCX-010,,,,BCX-017,,,,BCX-022,,,,BCX-024,,
2,Gene,P0,P1,,Gene,P0,P1,,Gene,P0,P1,,Gene,P0,P1
3,- Q151L,0.3561,0.4835,,-,0.0639,,,- C29S,0.4615,,,2015-03-04 00:00:00,0.2105,0.35
4,ADAM17 -,0.0505,,,AMPH -,0.1136,,,-,0.3444,0.4485,,- M110T,0.0627,


### S5 Table

In [15]:
# Somatic mutations detected in P0 and P1 of four models by whole exome sequencing (BCX-010, -017, -022, -024)
df = pd.read_excel(r'BCX_tables/S5_Table.xlsx')
df.head()

Unnamed: 0,S5 Table. Somatic mutations detected in P0 and P1 of four models by whole exome sequencing.,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,...,Unnamed: 30,Unnamed: 31,Unnamed: 32,Unnamed: 33,Unnamed: 34,Unnamed: 35,Unnamed: 36,Unnamed: 37,Unnamed: 38,Unnamed: 39
0,,,,,,,,,,,...,,,,,,,,,,
1,BCX-006,P0,P1,P4,P7,P10,P13,P16,,BCX-010,...,,BCX-022,P0,P1,P6,,BCX-024,P0,P1,P7
2,CSMD2 R2096K,5.53,,,,,,,,EGFR D1072E,...,,SMARCA4 R1465G,44.84,40.73,48.1,,ERCC5 P903S,21.42,,
3,NAV3 D414G,28.01,41.79,29.22,29.19,35.45,31.38,35.05,,FGFR1 V273M,...,,TP53 E221*,96.71,70.12,98.43,,FAT3 K2447N,5.59,,
4,PIK3CA H1047R,16.31,24.91,22.38,20.31,30.25,30.77,31.75,,MET V486A,...,,MLL3 S784Y,,8.83,8.19,,FGFR4 (splice donor variant),29.05,,


### S6 Table

In [16]:
# Somatic mutations detected in all tested passages of six models by targeted exome sequencing (BCX-006, -010, -011, 017, -022, -024)
df = pd.read_excel(r'BCX_tables/S6_Table.xlsx')
df.head()

Unnamed: 0,S6 Table. Somatic mutations detected in all tested passages of six models by targeted exome sequencing.,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,...,Unnamed: 17,Unnamed: 18,Unnamed: 19,Unnamed: 20,Unnamed: 21,Unnamed: 22,Unnamed: 23,Unnamed: 24,Unnamed: 25,Unnamed: 26
0,,,,,,,,,,,...,,,,,,,,,,
1,BCX-010-P0,,,BCX-010-P1,,,,BCX-017-P0,,,...,BCX-022-P1,,,,BCX-024-P0,,,BCX-024-P1,,
2,gene,CNS,CN,gene,CNS,CN,,gene,CNS,CN,...,gene,CNS,CN,,gene,CNS,CN,gene,CNS,CN
3,2015-09-02 00:00:00,DEL,1.2,2015-09-02 00:00:00,H.DEL,1,,2015-03-03 00:00:00,DEL,1.5,...,2015-03-01 00:00:00,H.DEL,0.7,,2015-03-05 00:00:00,"AMP,DEL","3.5,1.4",2015-03-05 00:00:00,H.DEL,0.7
4,2015-09-03 00:00:00,"H.AMP,NORM",42.5,2015-09-03 00:00:00,"H.AMP,AMP","4.8,3",,,,,...,2015-03-02 00:00:00,H.DEL,0.8,,5S_rRNA,"DEL,NORM,AMP","1.5,2.1,3,2.2,2.9,2.3",5S_rRNA,"H.DEL,AMP,NORM","0.8,2.8,1.9,2.9,2.1,3.4,2.2"


## 2. Prepare Data

In [None]:
# Filter NaNs 
# Label "Unnamed" columns
# Clean data

## 3. Explore Relationships

## 4. Model (ML)

## 5. Conclusions