-
Notifications
You must be signed in to change notification settings - Fork 0
/
LookupTaxonDetails3.py
executable file
·122 lines (94 loc) · 3.75 KB
/
LookupTaxonDetails3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
#!/usr/bin/python3
# Script from David Ryder (CEFAS, Weymouth, England)
# Needs ete3 being installed and folder .etetoolkit being present in the home directory
# If folder .etetoolkit is not present in home directory, you need to run ONLY
# the ete3 command once first, delete the output, and then set -e to the
# .etetoolit/taxa.sqlite folder that has now been generated in your folder.
import getopt,sys,sqlite3,os
# Retrieve file names from the command line
def retrieveArguments():
blastFileName=''
lineageFileName=''
outputFileName=''
etetoolkit=''
taxonColumn=13
myopts, args = getopt.getopt(sys.argv[1:],"b:l:o:t:e:")
helpMessage = "-b blast alignments -l taxonomic lineages -o output -t 13 -e etetoolkit"
if len(myopts) != 5:
print("Usage: {} {}".format(sys.argv[0],helpMessage))
exit();
for o, a in myopts:
if o == '-l':
lineageFileName = a
elif o == '-b':
blastFileName = a
elif o == '-o':
outputFileName = a
elif o == '-t':
taxonColumn = int(a)
elif o == '-e':
etetoolkit = a
else:
print("Usage: {} {}".format(sys.argv[0],helpMessage))
exit();
print("Lineage file: {}\nBlast alignment file: {}\nOutput file: {}\nColumn used to store taxonomic id: {}\netetoolkit/taxa.sqlite location: {}".format(lineageFileName,blastFileName,outputFileName,taxonColumn,etetoolkit))
return(lineageFileName,blastFileName,outputFileName,taxonColumn,etetoolkit)
lineageFileName,blastFileName,outputFileName,taxonColumn,etetoolkit = retrieveArguments()
# Read file containing results
lineageFile = open(lineageFileName,'r')
# Open file for writting out results
outputFile = open(outputFileName,'w')
# Create a connection to the database
conn = sqlite3.connect(etetoolkit.format(os.getlogin()))
c = conn.cursor()
c.execute("PRAGMA foreign_keys=ON")
taxonomicLineage = dict()
taxon2SpeciesName = dict()
for line in lineageFile:
line = line.strip('\n')
splitLine = line.split('\t');
taxonID = splitLine[0];
SpeciesName = splitLine[1];
LowestLevel = splitLine[2];
lineage = splitLine[3];
lineageSplit = lineage.split(',');
taxonLineage = splitLine[4];
taxonLineageSplit = taxonLineage.split(',');
taxonLineageRank = []
if taxonID not in taxon2SpeciesName:
taxon2SpeciesName[taxonID] = [LowestLevel,SpeciesName]
#print(line)
for taxon in taxonLineageSplit:
c.execute('SELECT rank FROM species WHERE taxid = ?',[taxon])
#print("\t{}".format(taxon))
result = c.fetchone()
if result:
taxonRank = result[0].strip('\t')
else:
taxonRank = 'No Result'
#print("TaxonID:{}\nTaxon Rank:{}\n".format(taxon,taxonRank))
taxonLineageRank.append(taxonRank)
for i in range(1,len(taxonLineageSplit)):
#outputFile.write("{}\t{}\t{}\t{}\n".format(taxonID,LowestLevel,lineageSplit[i],taxonLineageRank[i]))
if taxonID in taxonomicLineage:
taxonomicLineage[taxonID][taxonLineageRank[i]] = lineageSplit[i]
else:
taxonomicLineage[taxonID] = {taxonLineageRank[i]: lineageSplit[i]}
blastFile = open(blastFileName,'r')
taxonomicLevels = ['superkingdom','kingdom','phylum','subphylum','class','subclass','order','suborder','infraorder','family','genus']
for line in blastFile:
line = line.strip('\n')
splitLine = line.split('\t');
taxonID = splitLine[taxonColumn-1].split(';')[0]
outputFile.write(line)
if (taxonID in taxonomicLineage):
outputFile.write("\t{}\t{}".format(taxon2SpeciesName[taxonID][0],taxon2SpeciesName[taxonID][1]))
for level in taxonomicLevels:
if (level in taxonomicLineage[taxonID]):
outputFile.write("\t{}".format(taxonomicLineage[taxonID][level]))
else:
outputFile.write("\tNA")
else:
for i in range(0,len(taxonomicLevels) + 2):
outputFile.write("\tUnknown")
outputFile.write("\n")