Skip to content

Commit

Permalink
Merge branch 'master' of github.com:sh1992/OrangeWidgets
Browse files Browse the repository at this point in the history
  • Loading branch information
sh1992 committed May 1, 2012
2 parents bfcd729 + 3361dfd commit a6fc2a8
Show file tree
Hide file tree
Showing 612 changed files with 15,736,707 additions and 0 deletions.
172 changes: 172 additions & 0 deletions BioinformaticFileWidget/BF.py
@@ -0,0 +1,172 @@
#bioinformatic file management functions to be implemented

#goals
#take multiple files and merge them into single data table
#be able to transpose matrix(switching from samples to genes as entries)
#be able to deal with FASTA formats(WHY DOESNT ORANGE DO THIS?)

"""
IMPORTS
"""
import os
import sys
import csv

"""
GLOBALS
"""
EXAMPLEFILELIST=['breast-cancer-wisconsin.tab', 'breast-cancer-wisconsin-cont.tab']


"""
SCRIPT
"""

#fasta reader
def fastaReader(fastaFile):

pass



def transposeMatrix(textfile): #what structure to use for matrix? array? npArray? nested lists?
file = open(textfile, 'r')
file2 = open(textfile, 'w')
for c in zip(*(l.split() for l in file.readlines() if l.strip())):
file2.write(' '.join(c))
return file2


def concatFiles(path): #should arguments be directory

#initialize dictionary
dataDict = {}

#get list of files
fileList = os.listdir(path)

#for each file in file list
for f in fileList:

#open file
fh = open(os.path.join(path,f),'r')

#skip header
header = fh.readline()

#grab first line
firstLine = fh.readline().strip("\n").split("\t")

#intialize dictionary for paitient by paitient id
dataDict[firstLine[0]] = {}

#add first gene from first line
dataDict[firstLine[0]][firstLine[1]] = firstLine[2]

#for rest of lines
for i in fh.readlines():
line = i.strip("\n").split("\t")
dataDict[line[0]][line[1]] = line[2]

return dataDict


def makeDataFile(newFileName,dataDict):
fh = open(newFileName,'w')
paitients = dataDict.keys()
genes = dataDict[paitients[0]].keys()

header = "patientID\t"
for g in genes:
header += g
header += "\t"
header += "value\n"

fh.write(header)

shitz = "d\t"
for i in range(len(genes)):
shitz += "c\t"
shitz += "\n"

fh.write(shitz)

for p in patients:
line = p
line += "\t"
for g in genes:
line += dataDict[p][g]
line += "\t"
line += "\n"
fh.write(line)
fh.close

def makeCSVDataFile(newFileName,dataDict):
fh = open(newFileName,'w')
writer = csv.writer(fh,delimiter = "\t")
patients = dataDict.keys()
genes = dataDict[patients[0]].keys()

header = []
header.append("patientID")
for g in genes:
header.append(g)
header.append('value')

writer.writerow(header)

for p in patients:
line = []
line.append(p)
for g in genes:
line.append(dataDict[p][g])
writer.writerow(line)

fh.close




def concat(flist, column_overlap=0, transpose=False):
"""
A concatonate function that returns a large data table of
the files in flist.
Assumes that all files are properly aligned. Will terminate
in error if file lengths do not line up appropriately.
column_overlap specifies the number of repeat columns
in the files. Ignores these columns from concatonation.
Transpose calls the transpose function before returning the
data file matrix.
returns a numpy array of all file constituents.
"""

data = [] #Container for all file data desired

for num, fil in enumerate(flist):
f = open(fil, 'r')
reader = csv.reader(f, delimiter='\t') #init csv reader on each file in list

line_i = reader.next()
i = 0 #counter to make sure which line we are on.
if num < 1: #if it is the first file, add straight to data
while line_i:
data.append(line_i) #add to data as new list
try:
line_i = reader.next()
except:
break #exit loop gracefully.
else: # all other files after first...
while line_i:
data[i].extend(line_i[column_overlap:]) # .. extend the i-th list in data
try:
line_i = reader.next()
i += 1 #update the counter
except:
break #end gracefully

return data #may addd as numpy array later, if desired.

Binary file added BioinformaticFileWidget/BF.pyc
Binary file not shown.
30 changes: 30 additions & 0 deletions BioinformaticFileWidget/BFW.py
@@ -0,0 +1,30 @@
#bioinformatic file widget to be implemented
import BF #this is where the functions for dealing with stuff go
import sys
sys.path.append("C:\\Python27\\Lib\\site-packages\\Orange\\OrangeWidgets")
from OWWidget import *
import OWGUI
import numpy as np

class Bio-file(OWWidget):

def __init__(self, parent=None, signalManager=None):
OWWidget.__init__(self, parent, signalManager, 'Bio-File')

#self.inputs = [("Data", ExampleTable, self.data)]
self.outputs = [("Sampled Data", ExampleTable)]

# GUI
box = OWGUI.widgetBox(self.controlArea, "Info")
self.infoa = OWGUI.widgetLabel(box, 'File tool to help import, organize, and manipulate data of various file types from within orange.')
self.infob = OWGUI.widgetLabel(box, '')
self.resize(100,50)


#goals
#take multiple files and merge them into single data table
#be able to transpose matrix(switching from samples to genes as entries)
#be able to deal with FASTA formats(WHY DOESNT ORANGE DO THIS?)



0 comments on commit a6fc2a8

Please sign in to comment.