# This notebook was created to 1. calculate the frequency of START codons Leu & Ile in the Mtbc genome, 2. analyze drug phenotypes for sub-lineage *4.11* isolates and 3. analyze drug phenotypes for *eis* promoter / *eis* double mutants

In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
import os
import pandas as pd
import numpy as np
import sys
import pickle

import Bio
from Bio.Alphabet import IUPAC
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.SeqFeature import SeqFeature, FeatureLocation
from Bio import SeqIO
from StringIO import StringIO
from Bio import AlignIO
from Bio.Align import AlignInfo
from Bio.Seq import MutableSeq

# [1] Find Frequency of START codons Leu & Ile

## [1.1]  Load the pickled dictionaries that are used for SNP annotation

In [3]:
# Relevant Information for H37Rv sequence SNP functional annotation
################################################################################################################################################################################################
####### Collect all DNA and Amino Acid sequences corresponding to genes on H37Rv #######
#load reference genome and reference annotation
reference_genome = '/n/data1/hms/dbmi/farhat/bin/work-horse/bin/h37rv.fasta'
for reference_genome in SeqIO.parse(reference_genome, "fasta"):
    reference_genome.seq.alphabet = IUPAC.unambiguous_dna

reference_genome_annotation = pd.read_csv('/n/data1/hms/dbmi/farhat/Roger/inhost_TB_dynamics_project/H37Rv/h37rv_genome_summary.txt', '\t').set_index('name')

####### Load in dictionaries for SNP annotation #######
with open('/n/data1/hms/dbmi/farhat/Roger/homoplasy_project/pickled_files/dicts_for_SNP_annotation/H37Rv_gene_seq_records.pickle', 'rb') as handle:
    ref_gene_sequences_records = pickle.load(handle)
    
with open('/n/data1/hms/dbmi/farhat/Roger/homoplasy_project/pickled_files/dicts_for_SNP_annotation/H37Rv_protein_seq_records.pickle', 'rb') as handle:
    ref_protein_sequences_records = pickle.load(handle)
    
with open('/n/data1/hms/dbmi/farhat/Roger/homoplasy_project/pickled_files/dicts_for_SNP_annotation/H37Rv_coord_gene_mapping.pickle', 'rb') as handle:
    ReferencePosition_Gene_mapping = pickle.load(handle)
    
####### get Gene Categories #######
gene_categories = pd.read_csv('/n/data1/hms/dbmi/farhat/Roger/inhost_TB_dynamics_project/CSV_files/gene_categories/gene_categories.csv').set_index('name')
gene_categories_dict = dict([gene_id , gene_category] for gene_id, gene_category in zip(list(gene_categories.gene_id) , list(gene_categories.Gene_Category)))

####### get Gene Symbols #######
gene_symbol_dict = dict([gene_id , gene_symbol] for gene_id, gene_symbol in zip(list(reference_genome_annotation.symbol.index) , list( reference_genome_annotation.symbol )))
################################################################################################################################################################################################

## [1.2] Find the start codon that corresponds to each gene

In [24]:
gene_ids = ref_protein_sequences_records.keys()

In [25]:
gene_ids[0:5]

['Rv0239', 'Rv0238', 'Rv1322', 'Rv1323', 'Rv1324']

In [26]:
len(gene_ids)

4049

In [27]:
gene_START_codon_list = [ref_protein_sequences_records[gene_id].seq[0] for gene_id in gene_ids]
gene_START_codon_series = pd.Series(gene_START_codon_list, index = gene_ids)

In [29]:
gene_START_codon_series.head()

Rv0239    M
Rv0238    M
Rv1322    M
Rv1323    V
Rv1324    V
dtype: object

### Genes that have a START codon that is Leu (**L**)

In [31]:
np.shape(gene_START_codon_series[gene_START_codon_series == 'L'])

(203,)

In [32]:
gene_START_codon_series[gene_START_codon_series == 'L'].head()

Rv1871c    L
Rv1525     L
Rv0122     L
Rv3338     L
Rv0108c    L
dtype: object

### Genes that have a START codon that is Ile (**I**)

In [33]:
np.shape(gene_START_codon_series[gene_START_codon_series == 'I'])

(3,)

In [34]:
gene_START_codon_series[gene_START_codon_series == 'I']

Rv0742      I
Rv1641      I
Rv2427Ac    I
dtype: object

# [2] Find drug resistance phenotypes for (A) Peru clone and (B) *eis* double mutant isolates

## [2.1] Load drug resistance phenotypes

In [3]:
drug_res_pheno_df = pd.read_csv('/n/data1/hms/dbmi/farhat/Roger/mmpR_BDQ_mutant_project/CSV files/MIC data/2020-10-28_phenotype.csv')

In [4]:
drug_res_pheno_df.head()

Unnamed: 0,run,run_combined,bioproject,biosample,internal,amikacin,capreomycin,ciprofloxacin,ethambutol,ethionamide,isoniazid,kanamycin,levofloxacin,moxifloxacin,ofloxacin,para-aminosalicylic_acid,pyrazinamide,rifampicin,streptomycin
0,1702,1702,internal,1702,1702,R,,,S,,R,,,,,S,S,R,R
1,1719,1719,internal,1719,1719,R,,,S,,R,,,,,S,S,R,S
2,1727,1727,internal,1727,1727,R,,,S,,R,,,,,S,S,R,R
3,1728,1728,internal,1728,1728,R,,,S,,R,,,,,S,S,R,R
4,1741,1741,internal,1741,1741,R,,,S,,R,,,,,S,S,R,S


In [5]:
np.shape(drug_res_pheno_df)

(20379, 19)

## [2.2] Load isolate annotation DF

### *Function* to convert lineage-calls to new/simpler lineage call scheme

In [6]:
def convert_lineage_calls(isolate_annotation_DF):

    lineage_hierarchincal_to_lineage_map_df = pd.read_csv('/n/data1/hms/dbmi/farhat/Roger/mmpR_BDQ_mutant_project/CSV files/lineage_hierachical_to_lineage_map.csv')
    lineage_hierarchincal_to_lineage_map_series = pd.Series(list(lineage_hierarchincal_to_lineage_map_df.loc[:, 'lineage']), index = lineage_hierarchincal_to_lineage_map_df.lineage_hierarchical)

    lineage_call_update_list = []

    for hierarchical_lineage_call in isolate_annotation_DF.lineage_call:

        # if lineage call has an updated shorter call
        if hierarchical_lineage_call in lineage_hierarchincal_to_lineage_map_series.index:
            lineage_call_update_list.append(lineage_hierarchincal_to_lineage_map_series[hierarchical_lineage_call])

        # if lineage call doesn't have an updated shorter call
        else:
            lineage_call_update_list.append(hierarchical_lineage_call)

    # replace the hierarchical calls with the shorter ones
    isolate_annotation_DF.loc[:,'lineage_call'] = lineage_call_update_list
    
    return isolate_annotation_DF

### Load annotation for 31,428 isolates

In [7]:
#load isolate annotation file (columns of Genotype Matrix)
isolate_annotation_DF = pd.read_pickle('/n/data1/hms/dbmi/farhat/Roger/homoplasy_project/rolling_DB_scrape/Genotypes_Filtered_2/genotypes_isolate_annotation.pkl')
isolate_annotation_DF = isolate_annotation_DF.loc[:, ['isolate_ID','lineage_call']] #drop columns
isolate_annotation_DF = convert_lineage_calls(isolate_annotation_DF)

In [8]:
isolate_annotation_DF.head()

Unnamed: 0,isolate_ID,lineage_call
0,SAMEA3558733,4.3.i4.2
1,SAMN03648641,4.4.1.1
2,SAMN03647419,3.1.1.i1
3,SAMEA3671418,4.3.i2
4,SAMN07659096,1.1.3


In [9]:
np.shape(isolate_annotation_DF)

(31428, 2)

### additional 12 *eis* C-14T mutants with AG MICs

In [10]:
#load isolate annotation file (columns of Genotype Matrix)
isolate_annotation_DF_extra_strains = pd.read_pickle('/n/data1/hms/dbmi/farhat/Roger/mmpR_BDQ_mutant_project/eis_promoter_mutant_strains_from_Milan/Genotypes/Genotypes_Filtered/genotypes_isolate_annotation.pkl')
isolate_annotation_DF_extra_strains = isolate_annotation_DF_extra_strains.loc[:, ['isolate_ID','lineage_call']]
isolate_annotation_DF_extra_strains = convert_lineage_calls(isolate_annotation_DF_extra_strains)

In [11]:
isolate_annotation_DF_extra_strains.head()

Unnamed: 0,isolate_ID,lineage_call
0,168-19,2.2.1.1.1
1,622-19,2.2.1.1.1.i3
2,655-19,4.3.i3.1
3,IT1070,2.2.1.1.1.i3
4,IT123,2.2.1.1.1


In [12]:
np.shape(isolate_annotation_DF_extra_strains)

(12, 2)

In [13]:
# merge isolate annotation dataframes
isolate_annotation_DF = isolate_annotation_DF.append(isolate_annotation_DF_extra_strains)
isolate_annotation_DF.reset_index(inplace = True, drop = True)

In [14]:
isolate_annotation_DF.head()

Unnamed: 0,isolate_ID,lineage_call
0,SAMEA3558733,4.3.i4.2
1,SAMN03648641,4.4.1.1
2,SAMN03647419,3.1.1.i1
3,SAMEA3671418,4.3.i2
4,SAMN07659096,1.1.3


In [15]:
np.shape(isolate_annotation_DF)

(31440, 2)

## [2.2] Get phenotypes for Peru clone (sub-lineage = *4.2.1.1.1.1.2* or *4.11*)

In [19]:
sub_lineage = '4.11'

In [20]:
isolate_annotation_4_11_DF = isolate_annotation_DF[isolate_annotation_DF.lineage_call == sub_lineage]

In [21]:
np.shape(isolate_annotation_4_11_DF)

(188, 2)

In [22]:
isolate_annotation_4_11_DF.head()

Unnamed: 0,isolate_ID,lineage_call
49,01R0685,4.11
280,00R1405,4.11
348,02R1942,4.11
621,Peru4582,4.11
770,Peru5115,4.11


In [23]:
#get list of isolate IDs for drug res pheno data
isolate_ID_A_list = list(drug_res_pheno_df.run)
isolate_ID_B_list = list(drug_res_pheno_df.run_combined)
isolate_ID_C_list = list(drug_res_pheno_df.bioproject)
isolate_ID_D_list = list(drug_res_pheno_df.biosample)
isolate_ID_E_list = list(drug_res_pheno_df.internal)

#get set of isolate IDs for isolate group of interest
isolates_of_interest = set(isolate_annotation_4_11_DF.isolate_ID)

peru_clone_drug_phenos_filter = []
isolate_ID_column = []
for isolate_ID_A, isolate_ID_B, isolate_ID_C, isolate_ID_D, isolate_ID_E in zip(isolate_ID_A_list,isolate_ID_B_list,isolate_ID_C_list,isolate_ID_D_list,isolate_ID_E_list):
    
    if (isolate_ID_A in isolates_of_interest):
        peru_clone_drug_phenos_filter.append(True)
        isolate_ID_column.append(isolate_ID_A)
        
    elif (isolate_ID_B in isolates_of_interest):
        peru_clone_drug_phenos_filter.append(True)
        isolate_ID_column.append(isolate_ID_B)
        
    elif (isolate_ID_C in isolates_of_interest):
        peru_clone_drug_phenos_filter.append(True)
        isolate_ID_column.append(isolate_ID_C)
        
    elif (isolate_ID_D in isolates_of_interest):
        peru_clone_drug_phenos_filter.append(True)
        isolate_ID_column.append(isolate_ID_D)
        
    elif (isolate_ID_E in isolates_of_interest):
        peru_clone_drug_phenos_filter.append(True)
        isolate_ID_column.append(isolate_ID_E)
        
    else:
        peru_clone_drug_phenos_filter.append(False)

In [24]:
drug_res_pheno_peru_clone_df = drug_res_pheno_df[peru_clone_drug_phenos_filter]
drug_res_pheno_peru_clone_df.loc[:,'isolate_ID'] = isolate_ID_column

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


In [25]:
drug_res_pheno_peru_clone_df.head()

Unnamed: 0,run,run_combined,bioproject,biosample,internal,amikacin,capreomycin,ciprofloxacin,ethambutol,ethionamide,isoniazid,kanamycin,levofloxacin,moxifloxacin,ofloxacin,para-aminosalicylic_acid,pyrazinamide,rifampicin,streptomycin,isolate_ID
96,internal,Peru2918,internal,Peru2918,Peru2918,S,S,,R,S,R,S,,S,,,S,R,R,Peru2918
127,internal,Peru2959,internal,Peru2959,Peru2959,S,S,,R,S,R,S,,S,,,R,R,R,Peru2959
132,internal,Peru2965,internal,Peru2965,Peru2965,S,S,,R,S,R,S,,S,,,R,R,S,Peru2965
152,internal,Peru3006,internal,Peru3006,Peru3006,S,S,,R,S,R,S,,S,,,R,R,S,Peru3006
173,internal,Peru3031,internal,Peru3031,Peru3031,S,S,,R,S,R,S,,S,,,R,R,R,Peru3031


In [26]:
np.shape(drug_res_pheno_peru_clone_df)

(103, 20)

#### Create a column to indicate isolates that had both the *mmpR* insertion **and** the *mmpL5* deletion (n=82)

In [27]:
mmpR_mmpL5_double_mutant_list = ['SAMEA5569929', 'SAMEA3402909', 'SAMEA5569553', 'Peru2918', 'Peru4515', 'SAMEA3392629', 'SAMEA5569985', 'SAMEA5569889', 'Peru4671', 'Peru4670', '01R1305', 'Peru2959', '02R0099', 'Peru4683', 'Peru4652', 'Peru4722', 'Peru4961', 'Peru4900', 'SAMEA1101329', 'Peru3418', 'SAMEA3414462', 'Peru2965', 'Peru3056', 'SAMEA2682981', 'Peru5003', '00R1547', 'SAMEA5570040', '02R0417', 'SAMEA5569638', '02R0948', 'Peru3376', 'Peru4521', 'Peru5012', '99R893', 'SAMEA5569636', 'SAMEA2683080', 'SAMEA3558289', 'Peru4988', 'SAMEA3401008', 'Peru4668', 'SAMEA2682695', 'Peru4685', 'SAMEA5569938', '01R0272', '1791', 'Peru4565', 'Peru5036', 'Peru4577', 'Peru4932', 'Peru4646', 'Peru4647', 'Peru4936', 'Peru4937', '02R1942', 'Peru3367', 'Peru4582', 'SAMEA5569790', 'Peru4549', 'Peru4057', 'SAMEA3558270', 'SAMEA2683134', '01R0451', 'SAMEA2682679', 'Peru4492', 'Peru3407', 'Peru5076', 'SAMN02414923', 'SAMEA5569666', 'Peru3342', 'Peru3389', 'Peru3408', 'Peru5445', 'Peru3405', 'Peru5115', 'Peru5114', 'Peru4707', 'SAMEA5569914', 'SAMEA5569642', 'SAMEA5570058', 'Peru3031', 'SAMEA3558288', 'Peru4919']

In [31]:
mmpR_mmpL5_double_mutant_set = set(mmpR_mmpL5_double_mutant_list)

In [51]:
'IDR1600042261' in mmpR_mmpL5_double_mutant_set

False

In [161]:
mmpR_mmpL5_double_mutant_bool = []

for isolate_ID in drug_res_pheno_peru_clone_df.isolate_ID:
    
    if isolate_ID in mmpR_mmpL5_double_mutant_list:
        mmpR_mmpL5_double_mutant_bool.append('yes')
        
    else:
        mmpR_mmpL5_double_mutant_bool.append('no')
        
drug_res_pheno_peru_clone_df.loc[:,'mmpR_ins & mmpL5_del'] = mmpR_mmpL5_double_mutant_bool

In [162]:
drug_res_pheno_peru_clone_df.sort_values(by='mmpR_ins & mmpL5_del', ascending = False, inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


Drop unnecessary oclumns

In [165]:
drug_res_pheno_peru_clone_df.drop(axis=1, labels=['run','run_combined','bioproject','biosample','internal'],inplace=True)
drug_res_pheno_peru_clone_df.set_index('isolate_ID',inplace=True)

In [166]:
drug_res_pheno_peru_clone_df.head()

Unnamed: 0_level_0,amikacin,capreomycin,ciprofloxacin,ethambutol,ethionamide,isoniazid,kanamycin,levofloxacin,moxifloxacin,ofloxacin,para-aminosalicylic_acid,pyrazinamide,rifampicin,streptomycin,mmpR_ins & mmpL5_del
isolate_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Peru2918,S,S,,R,S,R,S,,S,,,S,R,R,yes
Peru4685,R,R,,S,S,R,R,,S,S,S,R,R,S,yes
SAMEA1101329,,,,,,R,,,,,,,R,,yes
Peru5445,R,R,,R,R,R,R,,R,,,R,R,R,yes
Peru4549,S,S,,R,S,R,S,,S,,,S,R,R,yes


50/82 *mmpR* insertion **and** the *mmpL5* deletion isolates had drug resistance phenotypes

In [167]:
sum(drug_res_pheno_peru_clone_df.loc[:,'mmpR_ins & mmpL5_del'] == 'yes')

50

53/(188-82) = 53/106 other isolates had drug resistance phenotypes

In [168]:
sum(drug_res_pheno_peru_clone_df.loc[:,'mmpR_ins & mmpL5_del'] == 'no')

53

#### Save DataFrame as excel file

In [169]:
drug_res_pheno_peru_clone_df.to_excel('/n/data1/hms/dbmi/farhat/Roger/mmpR_BDQ_mutant_project/CSV files/MIC data/peru_clone_sublineage_4_11_drug_phenotypes.xlsx')

Get list of isolates that are part of the **Peru clone** / sub-lineage **4.11** but don't have binary drug resistance phenotypes

In [179]:
peru_clone_isolates_no_pheno_data = list(set(isolate_annotation_4_11_DF.isolate_ID) - set(drug_res_pheno_peru_clone_df.index))

In [180]:
len(peru_clone_isolates_no_pheno_data)

85

In [183]:
peru_clone_isolates_no_pheno_data[0:5]

['SAMEA5569929',
 'SAMEA2682683',
 'SAMEA5569553',
 'SAMEA2682821',
 'SAMEA3558288']

In [184]:
outfile = open("/n/data1/hms/dbmi/farhat/Roger/mmpR_BDQ_mutant_project/CSV files/MIC data/peru_clone_sublineage_4_11_no_drug_phenotypes.txt", "w")
print >> outfile, "\n".join(peru_clone_isolates_no_pheno_data)
outfile.close()

### [2.2.1] Run Fisher's exact test association of MDR between isolates that (A) had *mmpR* insertion **and** the *mmpL5* deletion and (B) other isolates 

In [3]:
import scipy.stats as stats

- Of the 188 isolates that comprise the "Peru clone" or sub-lineage 4.11, we have binary drug resistance INH/RIF phenotypes for 122/188 isolates and of these 86/122 are MDR.

- Of the 82/188 isolates that have the mmpR frameshift insertion and mmpL5 frameshift deletion, we have binary drug resistance phenotypes for 59/82 isolates and of these 53/59 are MDR.

- Of the (188-82)/188 = 106/188 other isolates, we have binary drug resistance phenotypes for 63/106 isolates and of these 33/63 are MDR.

|           -          | MDR | not MDR |
|:--------------------:|:---:|:-------:|
| mmpR ins & mmpL5 del | 53  | 6       |
| other                | 33  | 30      |

In [4]:
oddsratio, pvalue = stats.fisher_exact([[53, 6], [33, 30]])

In [5]:
oddsratio

8.030303030303031

In [6]:
pvalue

7.389745406209789e-06

## [2.3] Get phenotypes for *eis* promoter / *eis* double mutants

In [16]:
eis_double_mutant_list = ['SAMEA1016073','SAMN08376196','SAMEA1403685','SAMEA1403638','SAMN06210015','SAMN02419535','SAMN02419543','SAMN07236283','SAMN02584676','SAMN02419586','Peru2946','SAMN02419559','SAMN02584612','SAMN08709032','SAMN04633319','Peru3354','SAMN07956543']

In [17]:
isolate_annotation_eis_double_mut_DF = isolate_annotation_DF[[(isolate_id in eis_double_mutant_list) for isolate_id in isolate_annotation_DF.isolate_ID]]
isolate_annotation_eis_double_mut_DF.reset_index(drop=True,inplace=True)

In [18]:
isolate_annotation_eis_double_mut_DF

Unnamed: 0,isolate_ID,lineage_call
0,SAMEA1016073,2.2.1.1.1.i3
1,SAMN08376196,2.2.1.1.1
2,SAMEA1403685,2.2.1.1.1.i3
3,SAMEA1403638,2.2.1.1.1.i3
4,SAMN06210015,2.2.1.1.1
5,SAMN02419535,2.2.1.1.1.i3
6,SAMN02419543,2.2.1.1.1.i3
7,SAMN07236283,1.1.1.1
8,SAMN02584676,2.2.1.1.1.i3
9,SAMN02419586,2.2.1.1.1.i3


In [19]:
#get list of isolate IDs for drug res pheno data
isolate_ID_A_list = list(drug_res_pheno_df.run)
isolate_ID_B_list = list(drug_res_pheno_df.run_combined)
isolate_ID_C_list = list(drug_res_pheno_df.bioproject)
isolate_ID_D_list = list(drug_res_pheno_df.biosample)
isolate_ID_E_list = list(drug_res_pheno_df.internal)

#get set of isolate IDs for isolate group of interest
isolates_of_interest = set(isolate_annotation_eis_double_mut_DF.isolate_ID)

eis_double_mutant_drug_phenos_filter = []
isolate_ID_column = []
for isolate_ID_A, isolate_ID_B, isolate_ID_C, isolate_ID_D, isolate_ID_E in zip(isolate_ID_A_list,isolate_ID_B_list,isolate_ID_C_list,isolate_ID_D_list,isolate_ID_E_list):
    
    if (isolate_ID_A in isolates_of_interest):
        eis_double_mutant_drug_phenos_filter.append(True)
        isolate_ID_column.append(isolate_ID_A)
        
    elif (isolate_ID_B in isolates_of_interest):
        eis_double_mutant_drug_phenos_filter.append(True)
        isolate_ID_column.append(isolate_ID_B)
        
    elif (isolate_ID_C in isolates_of_interest):
        eis_double_mutant_drug_phenos_filter.append(True)
        isolate_ID_column.append(isolate_ID_C)
        
    elif (isolate_ID_D in isolates_of_interest):
        eis_double_mutant_drug_phenos_filter.append(True)
        isolate_ID_column.append(isolate_ID_D)
        
    elif (isolate_ID_E in isolates_of_interest):
        eis_double_mutant_drug_phenos_filter.append(True)
        isolate_ID_column.append(isolate_ID_E)
        
    else:
        eis_double_mutant_drug_phenos_filter.append(False)

In [20]:
drug_res_pheno_eis_double_mutant_df = drug_res_pheno_df[eis_double_mutant_drug_phenos_filter]
drug_res_pheno_eis_double_mutant_df.loc[:,'isolate_ID'] = isolate_ID_column

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


In [21]:
drug_res_pheno_eis_double_mutant_df

Unnamed: 0,run,run_combined,bioproject,biosample,internal,amikacin,capreomycin,ciprofloxacin,ethambutol,ethionamide,isoniazid,kanamycin,levofloxacin,moxifloxacin,ofloxacin,para-aminosalicylic_acid,pyrazinamide,rifampicin,streptomycin,isolate_ID
116,internal,Peru2946,internal,Peru2946,Peru2946,S,S,,R,R,R,S,,R,,,R,R,R,Peru2946
295,internal,Peru3354,internal,Peru3354,Peru3354,S,S,,R,S,R,S,,R,,,R,R,R,Peru3354
5408,ERR108469,ERR108469,PRJEB2138,SAMEA1403638,,S,S,,R,,R,,,R,R,,S,R,R,SAMEA1403638
5445,ERR133984,ERR133984,PRJEB2138,SAMEA1403685,,S,S,,R,,R,,,S,S,,R,R,R,SAMEA1403685
15102,SRR5710018,SRR5710018,PRJNA390471,SAMN07236283,,,,,R,,R,,,,,,,R,,SAMN07236283


In [22]:
drug_res_pheno_eis_double_mutant_df.drop(axis=1, labels=['run','run_combined','bioproject','biosample','internal'],inplace=True)
drug_res_pheno_eis_double_mutant_df.set_index('isolate_ID',inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [23]:
drug_res_pheno_eis_double_mutant_df

Unnamed: 0_level_0,amikacin,capreomycin,ciprofloxacin,ethambutol,ethionamide,isoniazid,kanamycin,levofloxacin,moxifloxacin,ofloxacin,para-aminosalicylic_acid,pyrazinamide,rifampicin,streptomycin
isolate_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Peru2946,S,S,,R,R,R,S,,R,,,R,R,R
Peru3354,S,S,,R,S,R,S,,R,,,R,R,R
SAMEA1403638,S,S,,R,,R,,,R,R,,S,R,R
SAMEA1403685,S,S,,R,,R,,,S,S,,R,R,R
SAMN07236283,,,,R,,R,,,,,,,R,


#### Save DataFrame as excel file

In [24]:
drug_res_pheno_eis_double_mutant_df.to_excel('/n/data1/hms/dbmi/farhat/Roger/mmpR_BDQ_mutant_project/CSV files/MIC data/eis_double_mutant_drug_phenotypes.xlsx')