Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'master' of github.com:sh1992/OrangeWidgets
- Loading branch information
Showing
612 changed files
with
15,736,707 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,172 @@ | ||
#bioinformatic file management functions to be implemented | ||
|
||
#goals | ||
#take multiple files and merge them into single data table | ||
#be able to transpose matrix(switching from samples to genes as entries) | ||
#be able to deal with FASTA formats(WHY DOESNT ORANGE DO THIS?) | ||
|
||
""" | ||
IMPORTS | ||
""" | ||
import os | ||
import sys | ||
import csv | ||
|
||
""" | ||
GLOBALS | ||
""" | ||
EXAMPLEFILELIST=['breast-cancer-wisconsin.tab', 'breast-cancer-wisconsin-cont.tab'] | ||
|
||
|
||
""" | ||
SCRIPT | ||
""" | ||
|
||
#fasta reader | ||
def fastaReader(fastaFile): | ||
|
||
pass | ||
|
||
|
||
|
||
def transposeMatrix(textfile): #what structure to use for matrix? array? npArray? nested lists? | ||
file = open(textfile, 'r') | ||
file2 = open(textfile, 'w') | ||
for c in zip(*(l.split() for l in file.readlines() if l.strip())): | ||
file2.write(' '.join(c)) | ||
return file2 | ||
|
||
|
||
def concatFiles(path): #should arguments be directory | ||
|
||
#initialize dictionary | ||
dataDict = {} | ||
|
||
#get list of files | ||
fileList = os.listdir(path) | ||
|
||
#for each file in file list | ||
for f in fileList: | ||
|
||
#open file | ||
fh = open(os.path.join(path,f),'r') | ||
|
||
#skip header | ||
header = fh.readline() | ||
|
||
#grab first line | ||
firstLine = fh.readline().strip("\n").split("\t") | ||
|
||
#intialize dictionary for paitient by paitient id | ||
dataDict[firstLine[0]] = {} | ||
|
||
#add first gene from first line | ||
dataDict[firstLine[0]][firstLine[1]] = firstLine[2] | ||
|
||
#for rest of lines | ||
for i in fh.readlines(): | ||
line = i.strip("\n").split("\t") | ||
dataDict[line[0]][line[1]] = line[2] | ||
|
||
return dataDict | ||
|
||
|
||
def makeDataFile(newFileName,dataDict): | ||
fh = open(newFileName,'w') | ||
paitients = dataDict.keys() | ||
genes = dataDict[paitients[0]].keys() | ||
|
||
header = "patientID\t" | ||
for g in genes: | ||
header += g | ||
header += "\t" | ||
header += "value\n" | ||
|
||
fh.write(header) | ||
|
||
shitz = "d\t" | ||
for i in range(len(genes)): | ||
shitz += "c\t" | ||
shitz += "\n" | ||
|
||
fh.write(shitz) | ||
|
||
for p in patients: | ||
line = p | ||
line += "\t" | ||
for g in genes: | ||
line += dataDict[p][g] | ||
line += "\t" | ||
line += "\n" | ||
fh.write(line) | ||
fh.close | ||
|
||
def makeCSVDataFile(newFileName,dataDict): | ||
fh = open(newFileName,'w') | ||
writer = csv.writer(fh,delimiter = "\t") | ||
patients = dataDict.keys() | ||
genes = dataDict[patients[0]].keys() | ||
|
||
header = [] | ||
header.append("patientID") | ||
for g in genes: | ||
header.append(g) | ||
header.append('value') | ||
|
||
writer.writerow(header) | ||
|
||
for p in patients: | ||
line = [] | ||
line.append(p) | ||
for g in genes: | ||
line.append(dataDict[p][g]) | ||
writer.writerow(line) | ||
|
||
fh.close | ||
|
||
|
||
|
||
|
||
def concat(flist, column_overlap=0, transpose=False): | ||
""" | ||
A concatonate function that returns a large data table of | ||
the files in flist. | ||
Assumes that all files are properly aligned. Will terminate | ||
in error if file lengths do not line up appropriately. | ||
column_overlap specifies the number of repeat columns | ||
in the files. Ignores these columns from concatonation. | ||
Transpose calls the transpose function before returning the | ||
data file matrix. | ||
returns a numpy array of all file constituents. | ||
""" | ||
|
||
data = [] #Container for all file data desired | ||
|
||
for num, fil in enumerate(flist): | ||
f = open(fil, 'r') | ||
reader = csv.reader(f, delimiter='\t') #init csv reader on each file in list | ||
|
||
line_i = reader.next() | ||
i = 0 #counter to make sure which line we are on. | ||
if num < 1: #if it is the first file, add straight to data | ||
while line_i: | ||
data.append(line_i) #add to data as new list | ||
try: | ||
line_i = reader.next() | ||
except: | ||
break #exit loop gracefully. | ||
else: # all other files after first... | ||
while line_i: | ||
data[i].extend(line_i[column_overlap:]) # .. extend the i-th list in data | ||
try: | ||
line_i = reader.next() | ||
i += 1 #update the counter | ||
except: | ||
break #end gracefully | ||
|
||
return data #may addd as numpy array later, if desired. | ||
|
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
#bioinformatic file widget to be implemented | ||
import BF #this is where the functions for dealing with stuff go | ||
import sys | ||
sys.path.append("C:\\Python27\\Lib\\site-packages\\Orange\\OrangeWidgets") | ||
from OWWidget import * | ||
import OWGUI | ||
import numpy as np | ||
|
||
class Bio-file(OWWidget): | ||
|
||
def __init__(self, parent=None, signalManager=None): | ||
OWWidget.__init__(self, parent, signalManager, 'Bio-File') | ||
|
||
#self.inputs = [("Data", ExampleTable, self.data)] | ||
self.outputs = [("Sampled Data", ExampleTable)] | ||
|
||
# GUI | ||
box = OWGUI.widgetBox(self.controlArea, "Info") | ||
self.infoa = OWGUI.widgetLabel(box, 'File tool to help import, organize, and manipulate data of various file types from within orange.') | ||
self.infob = OWGUI.widgetLabel(box, '') | ||
self.resize(100,50) | ||
|
||
|
||
#goals | ||
#take multiple files and merge them into single data table | ||
#be able to transpose matrix(switching from samples to genes as entries) | ||
#be able to deal with FASTA formats(WHY DOESNT ORANGE DO THIS?) | ||
|
||
|
||
|
Oops, something went wrong.