Skip to content

Commit

Permalink
The CEL file parser in Bio/Affy/celmodule.cc was replaced by a scanne…
Browse files Browse the repository at this point in the history
…r/consumer

in CelFile.py, using Biopython's parser framework. Compilation of the C++
extension celmodule.cc caused problems on some platforms in the past.
  • Loading branch information
mdehoon committed Feb 11, 2005
1 parent c6d0798 commit c686839
Show file tree
Hide file tree
Showing 2 changed files with 215 additions and 137 deletions.
344 changes: 211 additions & 133 deletions Bio/Affy/CelFile.py
Original file line number Diff line number Diff line change
@@ -1,133 +1,211 @@
# Copyright 2004 by Harry Zuzan. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.

"""
No version number yet.
Classes for accessing the information in Affymetrix cel files.
class CelParser: parses cel files
class CelRecord: stores the information from a cel file
"""

import _cel

class CelRecord:
"""
Stores the information in a cel file
Needs error handling.
Needs to know the chip design.
"""


def __init__(self, data_dict):
"""
Pass the data attributes as a dictionary.
"""
from copy import deepcopy as dcopy

self._intensities = dcopy(data_dict['intensities'])
self._stdevs = dcopy(data_dict['stdevs'])
self._npix = dcopy(data_dict['npix'])

self._nrows, self._ncols = self._intensities.shape


def intensities(self):
"""
Return a two dimensional array of probe cell intensities.
Dimension 1 -> rows
Dimension 2 -> columns
"""
return self._intensities


def stdevs(self):
"""
Return a two dimensional array of probe cell standard deviations.
Dimension 1 -> rows
Dimension 2 -> columns
"""
return self._stdevs


def npix(self):
"""
Return a two dimensional array of the number of pixels in a probe cell.
Dimension 1 -> rows
Dimension 2 -> columns
"""
return self._npix


def nrows(self):
"""
The number of rows of probe cells in an array.
"""
return self._nrows

def ncols(self):
"""
The number of columns of probe cells in an array.
"""
return self._ncols

def size(self):
"""
The size of the probe cell array as a tuple (nrows,ncols).
"""
return self._nrows, self._ncols



class CelParser:
"""
Parses an Affymetrix cel file passed in as a string and returns
an instance of a CelRecord
This class needs error handling.
"""

def __init__(self, data=None):
"""
Usually load the class with the cel file (not file name) as
an argument.
"""

self._intensities = None
self._stdevs = None
self._npix = None

if data is not None: self.parse(data)


def parse(self, data):
"""
Takes the contents of a cel file passed as a string, parses it
and stores it in the three arrays.
There is more information in the cel file that could be retrieved
and stored in CelRecord. The chip type should be a priority.
"""

(self._intensities, self._stdevs, self._npix) = _cel.parse(data)
self._nrows = self._intensities.shape[0]
self._ncols = self._intensities.shape[1]


def __call__(self):
"""
Returns the parsed data as a CelRecord.
"""

record_dict = {}
record_dict['intensities'] = self._intensities
record_dict['stdevs'] = self._stdevs
record_dict['npix'] = self._npix

return CelRecord(record_dict)

# Copyright 2004 by Harry Zuzan. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.

"""
No version number yet.
Classes for accessing the information in Affymetrix cel files.
class CelParser: parses cel files
class CelRecord: stores the information from a cel file
"""

# import _cel

from Bio.ParserSupport import AbstractConsumer
from Numeric import *

class CelScanner:
"""Scannner for Affymetrix CEL files.
Methods:
feed Feed data into the scanner.
The scanner generates (and calls the consumer) the following
types of events:
Rows - the number of rows on the microarray
Cols - the number of columns on the microarray
StartIntensity - generated when the section [INTENSITY] is found
ReadIntensity - one line in the section [INTENSITY]
"""
def feed(self, handle, consumer):
"""scanner.feed(handle, consumer)
Feed in a handle to a Cel file for scanning. handle is a file-like
object that contains the Cel file. consumer is a Consumer
object that will receive events as the report is scanned.
"""
section = ""
for line in handle:
if line.strip()=="": continue
if line[0]=="[":
section = ""
if line[:8]=="[HEADER]":
section = "HEADER"
elif line[:11]=="[INTENSITY]":
section = "INTENSITY"
consumer.StartIntensity()
continue
if section=="HEADER":
keyword, value = line.split("=", 1)
if keyword=="Cols": consumer.Cols(value)
if keyword=="Rows": consumer.Rows(value)
continue
elif section=="INTENSITY":
if "=" in line: continue
consumer.ReadIntensity(line)


class CelConsumer(AbstractConsumer):

def __init__(self):
self._mean = None
self._stdev = None
self._npix = None

def Cols(self, value):
self._cols = int(value)

def Rows(self, value):
self._rows = int(value)

def StartIntensity(self):
self._mean = zeros((self._rows, self._cols), Float)
self._stdev = zeros((self._rows, self._cols), Float)
self._npix = zeros((self._rows, self._cols), Int)

def ReadIntensity(self, line):
y, x, mean, stdev, npix = map(float, line.split())
x = int(x)
y = int(y)
self._mean[x,y] = mean
self._stdev[x,y] = stdev
self._npix[x,y] = int(npix)

class CelRecord:
"""
Stores the information in a cel file
Needs error handling.
Needs to know the chip design.
"""


def __init__(self, data_dict):
"""
Pass the data attributes as a dictionary.
"""
from copy import deepcopy as dcopy

self._intensities = dcopy(data_dict['intensities'])
self._stdevs = dcopy(data_dict['stdevs'])
self._npix = dcopy(data_dict['npix'])

self._nrows, self._ncols = self._intensities.shape


def intensities(self):
"""
Return a two dimensional array of probe cell intensities.
Dimension 1 -> rows
Dimension 2 -> columns
"""
return self._intensities


def stdevs(self):
"""
Return a two dimensional array of probe cell standard deviations.
Dimension 1 -> rows
Dimension 2 -> columns
"""
return self._stdevs


def npix(self):
"""
Return a two dimensional array of the number of pixels in a probe cell.
Dimension 1 -> rows
Dimension 2 -> columns
"""
return self._npix


def nrows(self):
"""
The number of rows of probe cells in an array.
"""
return self._nrows

def ncols(self):
"""
The number of columns of probe cells in an array.
"""
return self._ncols

def size(self):
"""
The size of the probe cell array as a tuple (nrows,ncols).
"""
return self._nrows, self._ncols



class CelParser:
"""
Takes a handle to an Affymetrix cel file, parses the file and
returns an instance of a CelRecord
This class needs error handling.
"""

def __init__(self, handle=None):
"""
Usually load the class with the cel file (not file name) as
an argument.
"""

self._intensities = None
self._stdevs = None
self._npix = None

if handle is not None: self.parse(handle)


def parse(self, handle):
"""
Takes a handle to a cel file, parses it
and stores it in the three arrays.
There is more information in the cel file that could be retrieved
and stored in CelRecord. The chip type should be a priority.
"""

# (self._intensities, self._stdevs, self._npix) = _cel.parse(data)
scanner = CelScanner()
consumer = CelConsumer()
scanner.feed(handle, consumer)
self._intensities = consumer._mean
self._stdevs = consumer._stdev
self._npix = consumer._npix
self._nrows = self._intensities.shape[0]
self._ncols = self._intensities.shape[1]


def __call__(self):
"""
Returns the parsed data as a CelRecord.
"""

record_dict = {}
record_dict['intensities'] = self._intensities
record_dict['stdevs'] = self._stdevs
record_dict['npix'] = self._npix

return CelRecord(record_dict)

8 changes: 4 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -453,10 +453,10 @@ def is_reportlab_installed():
# libraries=["stdc++"],
# language="c++"
# ),
CplusplusExtension('Bio.Affy._cel',
['Bio/Affy/celmodule.cc'],
language="c++"
),
# CplusplusExtension('Bio.Affy._cel', # The file parser in celmodule.cc was
# ['Bio/Affy/celmodule.cc'], # replaced by a scanner/consumer in
# language="c++" # CelFile.py, using Biopython's
# ), # parser framework
]

DATA_FILES=[
Expand Down

0 comments on commit c686839

Please sign in to comment.