Skip to content

Commit

Permalink
pep8 and CHANGES.txt to markdown
Browse files Browse the repository at this point in the history
  • Loading branch information
basvandenberg committed Mar 15, 2014
1 parent 0fedc7c commit b3936a6
Show file tree
Hide file tree
Showing 6 changed files with 101 additions and 98 deletions.
File renamed without changes.
156 changes: 81 additions & 75 deletions spice/data_set.py
Expand Up @@ -58,7 +58,7 @@ def read_data_source(self, src_id, data_path, mapping_file=None):
assert(self.proteins)
ds = self.ds_dict[src_id]
ds.read_data(data_path, mapping_file=mapping_file,
object_ids=self.get_protein_ids())
object_ids=self.get_protein_ids())
self.propagate_data_source_data(ds)

# TODO mapping? like in the function above
Expand Down Expand Up @@ -97,7 +97,7 @@ def set_mutation_data(self, mutation_data):
pdb_id, pdb_i) in mutation_data:
if(pid in protein_dict.keys()):
protein = protein_dict[pid]
MissenseMutation(protein, pos, fr, to, label, pep, pep_i,
codons, codon_fr, codons_to, pdb_id, pdb_i)
'''
Expand All @@ -113,8 +113,8 @@ def set_mutation_data(self, mutation_data):
mismut_list = list(mismut_tuple)
mismut_list[0] = protein
mismut_tuple = tuple(mismut_list)
# create mutation object, which will imediately linked to the

# create mutation object, which will imediately linked to the
# protein object
MissenseMutation.from_tuple(mismut_tuple)

Expand Down Expand Up @@ -166,7 +166,7 @@ def mutation_f(self):
class DataSource():

def __init__(self, data_set, uid, name, read_func, write_func,
set_data_func, check_funcs, data_path, mapping_file):
set_data_func, check_funcs, data_path, mapping_file):

# callback data set
self.data_set = data_set
Expand Down Expand Up @@ -218,7 +218,7 @@ def read_data(self, data_path, mapping_file=None, object_ids=None):
# get mapping from our uniprot ids to data source ids
if(mapping_file):
object_to_data = [t for t in file_io.read_tuple_list(mapping_file,
(str, str))]
(str, str))]
# 'unzip' into list of mapped ids and list of data file names
uni_othe_dict = dict(object_to_data)

Expand All @@ -234,7 +234,7 @@ def read_data(self, data_path, mapping_file=None, object_ids=None):

# set the data
self.set_data(data, data_mapping=uni_othe_dict,
object_ids=object_ids)
object_ids=object_ids)

# or from a single data file
else:
Expand All @@ -245,7 +245,7 @@ def read_data(self, data_path, mapping_file=None, object_ids=None):
data_dict = dict(data)
data = [(i, data_dict[uni_othe_dict[i]]) for i in object_ids]
self.set_data(data, data_mapping=uni_othe_dict,
object_ids=object_ids)
object_ids=object_ids)
else:
self.set_data(data, object_ids=object_ids)

Expand All @@ -265,7 +265,8 @@ def set_data(self, data, data_mapping=None, object_ids=None):
if (any(map(func, items_to_check))):
self.data = None
raise ValueError('Error in %s data, contains item that %s.' %
(self.name.lower(), ' '.join(func.__name__.split('_'))))
(self.name.lower(),
' '.join(func.__name__.split('_'))))

def get_data_path(self):
return(os.path.join(self.root_dir, self.data_path))
Expand Down Expand Up @@ -325,6 +326,7 @@ def load(self):
def available(self):
return True if self.data else False


# TODO store this in configuration file
class DataSourceFactory(object):

Expand All @@ -340,72 +342,76 @@ def __init__(self):
# secondary structure sequences corresponds to the protein sequence
# lengths.
self.data_sources = {
'prot_seq': ('Protein sequence',
file_io.read_fasta, file_io.write_fasta,
Protein.set_protein_sequence,
[
sequtil.is_empty,
sequtil.is_not_an_amino_acid_sequence
], 'protein.fsa', None),
'orf_seq': ('ORF sequence',
file_io.read_fasta, file_io.write_fasta,
Protein.set_orf_sequence,
[
sequtil.is_empty,
sequtil.is_not_a_nucleotide_sequence
], 'orf.fsa', 'uni_orf.map'),
'ss_seq': ('Secondary structure sequence',
file_io.read_fasta, file_io.write_fasta,
Protein.set_ss_sequence,
[
sequtil.is_empty,
sequtil.is_not_a_sec_struct_sequence
], 'ss.fsa', 'uni_ss.map'),
'sa_seq': ('Solvent accessible sequence',
file_io.read_fasta, file_io.write_fasta,
Protein.set_sa_sequence,
[
sequtil.is_empty,
sequtil.is_not_a_solv_access_sequence
], 'sa.fsa', 'uni_sa.map'),
'prot_struct': ('protein structure',
file_io.read_pdb_dir, file_io.write_pdb_dir,
Protein.set_protein_structure,
[
], os.path.join('structure_data', 'pdb'), 'uni_pdb.map'),
'residue_rasa': ('residue relative accessible surface area',
file_io.read_rasa_dir, file_io.write_rasa_dir,
Protein.set_rasa,
[
], os.path.join('structure_data', 'rasa'),
'uni_rasa.map'),
#'residue_rank': ('protein residue ranking',
# file_io.read_residue_rank_dir,
# file_io.write_residue_rank_dir,
# Protein.set_msa_data,
# [
# ], os.path.join('msa_data', 'residue_rank'),
# 'uni_rank.map'),
'msa': ('Multiple sequence alignment with homologous proteins',
file_io.read_msa_dir,
file_io.write_msa_dir,
Protein.set_msa,
[
], os.path.join('msa_data', 'msa'),
'uni_msa.map'),
'pfam': ('protein family data',
file_io.read_pfam, file_io.write_pfam,
Protein.set_pfam_annotations,
[], 'pfam.txt', None),
'flex': ('backbone dynamics data',
file_io.read_flex, file_io.write_flex,
Protein.set_backbone_dynamics,
[], 'flex.txt', None),
'interaction': ('interaction counts data',
file_io.read_interaction_counts,
file_io.write_interaction_counts,
Protein.set_interaction_counts,
[], 'interaction.txt', None)
'prot_seq': (
'Protein sequence',
file_io.read_fasta, file_io.write_fasta,
Protein.set_protein_sequence,
[
sequtil.is_empty,
sequtil.is_not_an_amino_acid_sequence
], 'protein.fsa', None),
'orf_seq': (
'ORF sequence',
file_io.read_fasta, file_io.write_fasta,
Protein.set_orf_sequence,
[
sequtil.is_empty,
sequtil.is_not_a_nucleotide_sequence
], 'orf.fsa', 'uni_orf.map'),
'ss_seq': (
'Secondary structure sequence',
file_io.read_fasta, file_io.write_fasta,
Protein.set_ss_sequence,
[
sequtil.is_empty,
sequtil.is_not_a_sec_struct_sequence
], 'ss.fsa', 'uni_ss.map'),
'sa_seq': (
'Solvent accessible sequence',
file_io.read_fasta, file_io.write_fasta,
Protein.set_sa_sequence,
[
sequtil.is_empty,
sequtil.is_not_a_solv_access_sequence
], 'sa.fsa', 'uni_sa.map'),
'prot_struct': (
'protein structure',
file_io.read_pdb_dir, file_io.write_pdb_dir,
Protein.set_protein_structure,
[],
os.path.join('structure_data', 'pdb'),
'uni_pdb.map'),
'residue_rasa': (
'residue relative accessible surface area',
file_io.read_rasa_dir, file_io.write_rasa_dir,
Protein.set_rasa,
[],
os.path.join('structure_data', 'rasa'),
'uni_rasa.map'),
'msa': (
'Multiple sequence alignment with homologous proteins',
file_io.read_msa_dir,
file_io.write_msa_dir,
Protein.set_msa,
[
], os.path.join('msa_data', 'msa'),
'uni_msa.map'),
'pfam': (
'protein family data',
file_io.read_pfam, file_io.write_pfam,
Protein.set_pfam_annotations,
[], 'pfam.txt', None),
'flex': (
'backbone dynamics data',
file_io.read_flex, file_io.write_flex,
Protein.set_backbone_dynamics,
[], 'flex.txt', None),
'interaction': (
'interaction counts data',
file_io.read_interaction_counts,
file_io.write_interaction_counts,
Protein.set_interaction_counts,
[], 'interaction.txt', None)
}

# make sure that all ids are in the ids list
Expand Down
2 changes: 1 addition & 1 deletion spice/featext.py
Expand Up @@ -548,7 +548,7 @@ def available_protein_featcat_ids(self):
'''

featcat_ids = set()

for f in self.fm_protein.feature_ids:
parts = f.split('_')
if(len(parts) == 2):
Expand Down
17 changes: 9 additions & 8 deletions spice/featmat.py
Expand Up @@ -311,7 +311,8 @@ def remove_features(self, feature_ids):
del self.feature_matrix
else:
# otherwise delete columns from feature matrix
self._feature_matrix = numpy.delete(self.feature_matrix, fis, 1)
self._feature_matrix = numpy.delete(self.feature_matrix,
fis, 1)

# and delete feature ids and names
for fid in feature_ids:
Expand Down Expand Up @@ -355,8 +356,8 @@ def add_custom_features(self, feature_matrix):
last_cust_feat = sorted(cust_feats)[-1]
print last_cust_feat
print len(self.CUSTOM_FEAT_PRE) + 1
new_cust_feat_i =\
int(last_cust_feat[(len(self.CUSTOM_FEAT_PRE)):]) + 1
new_cust_feat_i = int(
last_cust_feat[(len(self.CUSTOM_FEAT_PRE)):]) + 1

featvec_id = '%s%i' % (self.CUSTOM_FEAT_PRE, new_cust_feat_i)
feat_ids = ['%s_%i' % (featvec_id, i) for i in xrange(num_feat)]
Expand Down Expand Up @@ -425,7 +426,7 @@ def class_indices(self, labeling_name, class_ids):
return sorted([labeling.class_names.index(c) for c in class_ids])

def get_custom_features(self):
'''
'''
This function returns the available custom feature vector ids.
Returns a dictionary with the custom feature vector ids as keys and the
Expand Down Expand Up @@ -471,7 +472,7 @@ def get_dataset(self, feat_ids=None, labeling_name=None, class_ids=None,

# map target to use 0,1,2,... as labels
target_map = dict(zip(class_is, range(len(class_is))))

# targets are floats because liblinear classification wants this...
target = numpy.array([float(target_map[t]) for t in target])
else:
Expand Down Expand Up @@ -722,7 +723,7 @@ def save_histogram(self, feat_id, labeling_name, class_ids=None,

def save_scatter(self, feat_id0, feat_id1, labeling_name=None,
class_ids=None, colors=None, img_format='png',
root_dir='.', feat0_pre=None, feat1_pre=None,
root_dir='.', feat0_pre=None, feat1_pre=None,
standardized=False):

try:
Expand Down Expand Up @@ -761,7 +762,7 @@ def save_scatter(self, feat_id0, feat_id1, labeling_name=None,
if not(os.path.exists(d)):
os.makedirs(d)
out_f = os.path.join(d, 'scatter.%s' % (img_format))

if(standardized):
# standardize data NOTE that fm is standardized before the objects
# are sliced out!!!
Expand Down Expand Up @@ -887,7 +888,7 @@ class Labeling(object):
#def __init__(self, name, feature_matrix):
def __init__(self, name, object_ids, labels, class_names):
'''
Is it really necesary to retain the order of the object ids? Why not
Is it really necesary to retain the order of the object ids? Why not
initiate with a dict?
'''

Expand Down
12 changes: 6 additions & 6 deletions spice/mutation.py
Expand Up @@ -88,8 +88,8 @@ def pdb_resnum(self):
def set_protein_data(self, protein, position, aa_from, aa_to):

if not(protein.protein_sequence[position - 1] == aa_from):
raise ValueError('Amino acid %s not ' % (aa_from) +\
'on position %i ' % (position) +\
raise ValueError('Amino acid %s not ' % (aa_from) +
'on position %i ' % (position) +
'in protein %s.' % (protein.pid))

self._protein = protein
Expand All @@ -112,7 +112,7 @@ def set_peptide_data(self, aa_pep, aa_pep_i):
if(self.protein is None):
raise ValueError('Protein data must be set.')
if not(aa_pep[aa_pep_i] == self.aa_from):
raise ValueError('Amino acid on aa_pep_i in aa_pep does not ' +\
raise ValueError('Amino acid on aa_pep_i in aa_pep does not ' +
'correspond to aa_from.')

self._aa_pep = aa_pep
Expand Down Expand Up @@ -209,7 +209,7 @@ def from_tuple(cls, tuple):
mismut.set_peptide_data(tuple[5], tuple[6])
mismut.set_codon_data(tuple[7], tuple[8], tuple[9])
mismut.set_struct_data(tuple[10], tuple[11])
return mismut
return mismut

def tuple_representation(self):
return (self.protein.pid, self.position, self.aa_from, self.aa_to,
Expand Down Expand Up @@ -248,8 +248,8 @@ def signal_diff(self, scale, feature_ids=False):
return (ids, names)

def signal_auc(self, scale, env_window=21, sig_window=9, edge=1.0,
threshold=1.5, below_threshold=False,
feature_ids=False):
threshold=1.5, below_threshold=False,
feature_ids=False):
# TODO scale
num_scales = 19

Expand Down
12 changes: 4 additions & 8 deletions spice/project_management.py
Expand Up @@ -253,13 +253,11 @@ def parse_classify_job_files(self, cl_id):
assert(tokens[1] == '-f')
assert(tokens[3] == '-c')
cid = os.path.basename(
os.path.dirname(
os.path.dirname(tokens[4])))
os.path.dirname(os.path.dirname(tokens[4])))

if(cid == cl_id):
data_set = os.path.basename(
os.path.dirname(
os.path.dirname(tokens[2])))
os.path.dirname(os.path.dirname(tokens[2])))
data_set_list.append(data_set)

status_dirs[status] = data_set_list
Expand Down Expand Up @@ -849,7 +847,6 @@ def run_classify(self, cl_id, project_id):
settings_dict = self.get_classifier_settings(cl_id)
feature_ids = settings_dict['feature_names']


feature_cats = set()
for f in feature_ids:
fparts = f.split('_')
Expand Down Expand Up @@ -880,7 +877,7 @@ def run_classify(self, cl_id, project_id):
time.sleep(2)

# store path to feature matrix dir
fm_dir = self.fm_dir
fm_dir = self.fm_dir

# SWITCH BACK TO ORIGINAL PROJECT
self.set_project(prev_proj)
Expand All @@ -893,7 +890,7 @@ def run_classify(self, cl_id, project_id):
# output files
progress_f = os.path.join(out_d, 'progress.txt')
error_f = os.path.join(out_d, 'error.txt')

# create the list of options for the classification command
options = [
'-f %s' % (fm_dir),
Expand All @@ -908,4 +905,3 @@ def run_classify(self, cl_id, project_id):
fout.write('%s\n' % (cmd))
fout.write('%s\n' % (progress_f))
fout.write('%s\n' % (error_f))

0 comments on commit b3936a6

Please sign in to comment.