pep8 and CHANGES.txt to markdown

basvandenberg · Mar 15, 2014 · b3936a6 · b3936a6
1 parent 0fedc7c
commit b3936a6
Show file tree

Hide file tree

Showing 6 changed files with 101 additions and 98 deletions.
diff --git a/CHANGES.txt → CHANGES.md b/CHANGES.txt → CHANGES.md
diff --git a/spice/data_set.py b/spice/data_set.py
@@ -58,7 +58,7 @@ def read_data_source(self, src_id, data_path, mapping_file=None):
         assert(self.proteins)
         ds = self.ds_dict[src_id]
         ds.read_data(data_path, mapping_file=mapping_file,
-                object_ids=self.get_protein_ids())
+                     object_ids=self.get_protein_ids())
         self.propagate_data_source_data(ds)
 
     # TODO mapping? like in the function above
@@ -97,7 +97,7 @@ def set_mutation_data(self, mutation_data):
              pdb_id, pdb_i) in mutation_data:
             if(pid in protein_dict.keys()):
                 protein = protein_dict[pid]
-                
+
                 MissenseMutation(protein, pos, fr, to, label, pep, pep_i,
                                  codons, codon_fr, codons_to, pdb_id, pdb_i)
         '''
@@ -113,8 +113,8 @@ def set_mutation_data(self, mutation_data):
                 mismut_list = list(mismut_tuple)
                 mismut_list[0] = protein
                 mismut_tuple = tuple(mismut_list)
-                
-                # create mutation object, which will imediately linked to the 
+
+                # create mutation object, which will imediately linked to the
                 # protein object
                 MissenseMutation.from_tuple(mismut_tuple)
 
@@ -166,7 +166,7 @@ def mutation_f(self):
 class DataSource():
 
     def __init__(self, data_set, uid, name, read_func, write_func,
-            set_data_func, check_funcs, data_path, mapping_file):
+                 set_data_func, check_funcs, data_path, mapping_file):
 
         # callback data set
         self.data_set = data_set
@@ -218,7 +218,7 @@ def read_data(self, data_path, mapping_file=None, object_ids=None):
         # get mapping from our uniprot ids to data source ids
         if(mapping_file):
             object_to_data = [t for t in file_io.read_tuple_list(mapping_file,
-                    (str, str))]
+                              (str, str))]
             # 'unzip' into list of mapped ids and list of data file names
             uni_othe_dict = dict(object_to_data)
 
@@ -234,7 +234,7 @@ def read_data(self, data_path, mapping_file=None, object_ids=None):
 
             # set the data
             self.set_data(data, data_mapping=uni_othe_dict,
-                    object_ids=object_ids)
+                          object_ids=object_ids)
 
         # or from a single data file
         else:
@@ -245,7 +245,7 @@ def read_data(self, data_path, mapping_file=None, object_ids=None):
                 data_dict = dict(data)
                 data = [(i, data_dict[uni_othe_dict[i]]) for i in object_ids]
                 self.set_data(data, data_mapping=uni_othe_dict,
-                        object_ids=object_ids)
+                              object_ids=object_ids)
             else:
                 self.set_data(data, object_ids=object_ids)
 
@@ -265,7 +265,8 @@ def set_data(self, data, data_mapping=None, object_ids=None):
             if (any(map(func, items_to_check))):
                 self.data = None
                 raise ValueError('Error in %s data, contains item that %s.' %
-                    (self.name.lower(), ' '.join(func.__name__.split('_'))))
+                                 (self.name.lower(),
+                                 ' '.join(func.__name__.split('_'))))
 
     def get_data_path(self):
         return(os.path.join(self.root_dir, self.data_path))
@@ -325,6 +326,7 @@ def load(self):
     def available(self):
         return True if self.data else False
 
+
 # TODO store this in configuration file
 class DataSourceFactory(object):
 
@@ -340,72 +342,76 @@ def __init__(self):
         # secondary structure sequences corresponds to the protein sequence
         # lengths.
         self.data_sources = {
-            'prot_seq': ('Protein sequence',
-                    file_io.read_fasta, file_io.write_fasta,
-                    Protein.set_protein_sequence,
-                    [
-                        sequtil.is_empty,
-                        sequtil.is_not_an_amino_acid_sequence
-                    ], 'protein.fsa', None),
-            'orf_seq': ('ORF sequence',
-                    file_io.read_fasta, file_io.write_fasta,
-                    Protein.set_orf_sequence,
-                    [
-                        sequtil.is_empty,
-                        sequtil.is_not_a_nucleotide_sequence
-                    ], 'orf.fsa', 'uni_orf.map'),
-            'ss_seq': ('Secondary structure sequence',
-                    file_io.read_fasta, file_io.write_fasta,
-                    Protein.set_ss_sequence,
-                    [
-                        sequtil.is_empty,
-                        sequtil.is_not_a_sec_struct_sequence
-                    ], 'ss.fsa', 'uni_ss.map'),
-            'sa_seq': ('Solvent accessible sequence',
-                    file_io.read_fasta, file_io.write_fasta,
-                    Protein.set_sa_sequence,
-                    [
-                        sequtil.is_empty,
-                        sequtil.is_not_a_solv_access_sequence
-                    ], 'sa.fsa', 'uni_sa.map'),
-            'prot_struct': ('protein structure',
-                    file_io.read_pdb_dir, file_io.write_pdb_dir,
-                    Protein.set_protein_structure,
-                    [
-                    ], os.path.join('structure_data', 'pdb'), 'uni_pdb.map'),
-            'residue_rasa': ('residue relative accessible surface area',
-                    file_io.read_rasa_dir, file_io.write_rasa_dir,
-                    Protein.set_rasa,
-                    [
-                    ], os.path.join('structure_data', 'rasa'),
-                    'uni_rasa.map'),
-            #'residue_rank': ('protein residue ranking',
-            #        file_io.read_residue_rank_dir,
-            #        file_io.write_residue_rank_dir,
-            #        Protein.set_msa_data,
-            #        [
-            #        ], os.path.join('msa_data', 'residue_rank'),
-            #        'uni_rank.map'),
-            'msa': ('Multiple sequence alignment with homologous proteins',
-                    file_io.read_msa_dir,
-                    file_io.write_msa_dir,
-                    Protein.set_msa,
-                    [
-                    ], os.path.join('msa_data', 'msa'),
-                    'uni_msa.map'),
-            'pfam': ('protein family data',
-                    file_io.read_pfam, file_io.write_pfam,
-                    Protein.set_pfam_annotations,
-                    [], 'pfam.txt', None),
-            'flex': ('backbone dynamics data',
-                    file_io.read_flex, file_io.write_flex,
-                    Protein.set_backbone_dynamics,
-                    [], 'flex.txt', None),
-            'interaction': ('interaction counts data',
-                    file_io.read_interaction_counts,
-                    file_io.write_interaction_counts,
-                    Protein.set_interaction_counts,
-                    [], 'interaction.txt', None)
+            'prot_seq': (
+                'Protein sequence',
+                file_io.read_fasta, file_io.write_fasta,
+                Protein.set_protein_sequence,
+                [
+                    sequtil.is_empty,
+                    sequtil.is_not_an_amino_acid_sequence
+                ], 'protein.fsa', None),
+            'orf_seq': (
+                'ORF sequence',
+                file_io.read_fasta, file_io.write_fasta,
+                Protein.set_orf_sequence,
+                [
+                    sequtil.is_empty,
+                    sequtil.is_not_a_nucleotide_sequence
+                ], 'orf.fsa', 'uni_orf.map'),
+            'ss_seq': (
+                'Secondary structure sequence',
+                file_io.read_fasta, file_io.write_fasta,
+                Protein.set_ss_sequence,
+                [
+                    sequtil.is_empty,
+                    sequtil.is_not_a_sec_struct_sequence
+                ], 'ss.fsa', 'uni_ss.map'),
+            'sa_seq': (
+                'Solvent accessible sequence',
+                file_io.read_fasta, file_io.write_fasta,
+                Protein.set_sa_sequence,
+                [
+                    sequtil.is_empty,
+                    sequtil.is_not_a_solv_access_sequence
+                ], 'sa.fsa', 'uni_sa.map'),
+            'prot_struct': (
+                'protein structure',
+                file_io.read_pdb_dir, file_io.write_pdb_dir,
+                Protein.set_protein_structure,
+                [],
+                os.path.join('structure_data', 'pdb'),
+                'uni_pdb.map'),
+            'residue_rasa': (
+                'residue relative accessible surface area',
+                file_io.read_rasa_dir, file_io.write_rasa_dir,
+                Protein.set_rasa,
+                [],
+                os.path.join('structure_data', 'rasa'),
+                'uni_rasa.map'),
+            'msa': (
+                'Multiple sequence alignment with homologous proteins',
+                file_io.read_msa_dir,
+                file_io.write_msa_dir,
+                Protein.set_msa,
+                [
+                ], os.path.join('msa_data', 'msa'),
+                'uni_msa.map'),
+            'pfam': (
+                'protein family data',
+                file_io.read_pfam, file_io.write_pfam,
+                Protein.set_pfam_annotations,
+                [], 'pfam.txt', None),
+            'flex': (
+                'backbone dynamics data',
+                file_io.read_flex, file_io.write_flex,
+                Protein.set_backbone_dynamics,
+                [], 'flex.txt', None),
+            'interaction': (
+                'interaction counts data',
+                file_io.read_interaction_counts,
+                file_io.write_interaction_counts,
+                Protein.set_interaction_counts,
+                [], 'interaction.txt', None)
         }
 
         # make sure that all ids are in the ids list

diff --git a/spice/featext.py b/spice/featext.py
@@ -548,7 +548,7 @@ def available_protein_featcat_ids(self):
         '''
 
         featcat_ids = set()
-            
+
         for f in self.fm_protein.feature_ids:
             parts = f.split('_')
             if(len(parts) == 2):

diff --git a/spice/featmat.py b/spice/featmat.py
@@ -311,7 +311,8 @@ def remove_features(self, feature_ids):
                 del self.feature_matrix
             else:
                 # otherwise delete columns from feature matrix
-                self._feature_matrix = numpy.delete(self.feature_matrix, fis, 1)
+                self._feature_matrix = numpy.delete(self.feature_matrix,
+                                                    fis, 1)
 
                 # and delete feature ids and names
                 for fid in feature_ids:
@@ -355,8 +356,8 @@ def add_custom_features(self, feature_matrix):
             last_cust_feat = sorted(cust_feats)[-1]
             print last_cust_feat
             print len(self.CUSTOM_FEAT_PRE) + 1
-            new_cust_feat_i =\
-                    int(last_cust_feat[(len(self.CUSTOM_FEAT_PRE)):]) + 1
+            new_cust_feat_i = int(
+                last_cust_feat[(len(self.CUSTOM_FEAT_PRE)):]) + 1
 
         featvec_id = '%s%i' % (self.CUSTOM_FEAT_PRE, new_cust_feat_i)
         feat_ids = ['%s_%i' % (featvec_id, i) for i in xrange(num_feat)]
@@ -425,7 +426,7 @@ def class_indices(self, labeling_name, class_ids):
         return sorted([labeling.class_names.index(c) for c in class_ids])
 
     def get_custom_features(self):
-        ''' 
+        '''
         This function returns the available custom feature vector ids.
 
         Returns a dictionary with the custom feature vector ids as keys and the
@@ -471,7 +472,7 @@ def get_dataset(self, feat_ids=None, labeling_name=None, class_ids=None,
 
             # map target to use 0,1,2,... as labels
             target_map = dict(zip(class_is, range(len(class_is))))
-            
+
             # targets are floats because liblinear classification wants this...
             target = numpy.array([float(target_map[t]) for t in target])
         else:
@@ -722,7 +723,7 @@ def save_histogram(self, feat_id, labeling_name, class_ids=None,
 
     def save_scatter(self, feat_id0, feat_id1, labeling_name=None,
                      class_ids=None, colors=None, img_format='png',
-                     root_dir='.', feat0_pre=None, feat1_pre=None, 
+                     root_dir='.', feat0_pre=None, feat1_pre=None,
                      standardized=False):
 
         try:
@@ -761,7 +762,7 @@ def save_scatter(self, feat_id0, feat_id1, labeling_name=None,
         if not(os.path.exists(d)):
             os.makedirs(d)
         out_f = os.path.join(d, 'scatter.%s' % (img_format))
-        
+
         if(standardized):
             # standardize data NOTE that fm is standardized before the objects
             # are sliced out!!!
@@ -887,7 +888,7 @@ class Labeling(object):
     #def __init__(self, name, feature_matrix):
     def __init__(self, name, object_ids, labels, class_names):
         '''
-        Is it really necesary to retain the order of the object ids? Why not 
+        Is it really necesary to retain the order of the object ids? Why not
         initiate with a dict?
         '''
 

diff --git a/spice/mutation.py b/spice/mutation.py
@@ -88,8 +88,8 @@ def pdb_resnum(self):
     def set_protein_data(self, protein, position, aa_from, aa_to):
 
         if not(protein.protein_sequence[position - 1] == aa_from):
-            raise ValueError('Amino acid %s not ' % (aa_from) +\
-                             'on position %i ' % (position) +\
+            raise ValueError('Amino acid %s not ' % (aa_from) +
+                             'on position %i ' % (position) +
                              'in protein %s.' % (protein.pid))
 
         self._protein = protein
@@ -112,7 +112,7 @@ def set_peptide_data(self, aa_pep, aa_pep_i):
         if(self.protein is None):
             raise ValueError('Protein data must be set.')
         if not(aa_pep[aa_pep_i] == self.aa_from):
-            raise ValueError('Amino acid on aa_pep_i in aa_pep does not ' +\
+            raise ValueError('Amino acid on aa_pep_i in aa_pep does not ' +
                              'correspond to aa_from.')
 
         self._aa_pep = aa_pep
@@ -209,7 +209,7 @@ def from_tuple(cls, tuple):
         mismut.set_peptide_data(tuple[5], tuple[6])
         mismut.set_codon_data(tuple[7], tuple[8], tuple[9])
         mismut.set_struct_data(tuple[10], tuple[11])
-        return mismut    
+        return mismut
 
     def tuple_representation(self):
         return (self.protein.pid, self.position, self.aa_from, self.aa_to,
@@ -248,8 +248,8 @@ def signal_diff(self, scale, feature_ids=False):
             return (ids, names)
 
     def signal_auc(self, scale, env_window=21, sig_window=9, edge=1.0,
-                            threshold=1.5, below_threshold=False,
-                            feature_ids=False):
+                   threshold=1.5, below_threshold=False,
+                   feature_ids=False):
         # TODO scale
         num_scales = 19
 

diff --git a/spice/project_management.py b/spice/project_management.py
@@ -253,13 +253,11 @@ def parse_classify_job_files(self, cl_id):
                         assert(tokens[1] == '-f')
                         assert(tokens[3] == '-c')
                         cid = os.path.basename(
-                            os.path.dirname(
-                            os.path.dirname(tokens[4])))
+                            os.path.dirname(os.path.dirname(tokens[4])))
 
                         if(cid == cl_id):
                             data_set = os.path.basename(
-                                os.path.dirname(
-                                os.path.dirname(tokens[2])))
+                                os.path.dirname(os.path.dirname(tokens[2])))
                             data_set_list.append(data_set)
 
                 status_dirs[status] = data_set_list
@@ -849,7 +847,6 @@ def run_classify(self, cl_id, project_id):
         settings_dict = self.get_classifier_settings(cl_id)
         feature_ids = settings_dict['feature_names']
 
-
         feature_cats = set()
         for f in feature_ids:
             fparts = f.split('_')
@@ -880,7 +877,7 @@ def run_classify(self, cl_id, project_id):
         time.sleep(2)
 
         # store path to feature matrix dir
-        fm_dir = self.fm_dir        
+        fm_dir = self.fm_dir
 
         # SWITCH BACK TO ORIGINAL PROJECT
         self.set_project(prev_proj)
@@ -893,7 +890,7 @@ def run_classify(self, cl_id, project_id):
         # output files
         progress_f = os.path.join(out_d, 'progress.txt')
         error_f = os.path.join(out_d, 'error.txt')
-        
+
         # create the list of options for the classification command
         options = [
             '-f %s' % (fm_dir),
@@ -908,4 +905,3 @@ def run_classify(self, cl_id, project_id):
             fout.write('%s\n' % (cmd))
             fout.write('%s\n' % (progress_f))
             fout.write('%s\n' % (error_f))
-