```
Author: Florian Wagner
Email: florian.wagner@duke.edu
```

### Programs and third-party Python packages used

In [1]:
from pkg_resources import require

print 'Curl:'
!curl --version | head -n 1
print

print 'Python:'
!python -V
print

print 'Third-party Python packages:'
print str(require('numpy')[0])
print str(require('scipy')[0])
print str(require('cython')[0])
print str(require('genometools')[0])
print str(require('pyaffy')[0])

Curl:
curl 7.42.1 (x86_64-unknown-linux-gnu) libcurl/7.42.1 OpenSSL/1.0.2a zlib/1.2.8

Python:
Python 2.7.9

Third-party Python packages:
numpy 1.10.4
scipy 0.15.1
Cython 0.23.4
genometools 1.2.2
pyaffy 0.3rc1


### Configuration

In [3]:
import os

# data_dir: the directory in which downloaded data will be stored
data_dir = './data/'
# output_dir: the directory in which generated data will be stored
output_dir = './output/'

if not os.path.isdir(data_dir):
    os.mkdir(data_dir)
if not os.path.isdir(output_dir):
    os.mkdir(output_dir)

### Download data

In [4]:
# Download CDF (annotation) file and CEL (data) files
dropbox_folder_url = 'https://www.dropbox.com/sh/z8zafx9oogodky1/'
    
cdf_url = dropbox_folder_url + 'AACcuI150VSFWl4ji4_opG7Ba/HGU133Plus2_Hs_20_ENTREZG.cdf?dl=1'
cdf_file = data_dir + 'HGU133Plus2_Hs_20_ENTREZG.cdf'
!curl -L -o "$cdf_file" "$cdf_url"

cel_url1 = dropbox_folder_url + 'AADBnMN8wFR-nao1Ze695Cmaa/AFX_2_A1.CEL.gz?dl=1'
cel_file1 = data_dir + 'AFX_2_A1.CEL.gz'
!curl -L -o "$cel_file1" "$cel_url1"

cel_url2 = dropbox_folder_url + 'AADbbAoedfLqSvsgWnJZufLHa/AFX_2_A2.CEL.gz?dl=1'
cel_file2 = data_dir + 'AFX_2_A2.CEL.gz'
!curl -L -o "$cel_file2" "$cel_url2"

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   506    0   506    0     0    882      0 --:--:-- --:--:-- --:--:--  1032
100 48.3M  100 48.3M    0     0  3589k      0  0:00:13  0:00:13 --:--:-- 4021k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   506    0   506    0     0    950      0 --:--:-- --:--:-- --:--:--   961
100 5033k  100 5033k    0     0  2409k      0  0:00:02  0:00:02 --:--:-- 6395k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   506    0   506    0     0   1116      0 --:--:-- --:--:-- --:--:--  1124
100 4903k  100 4903k    0     0  2174k      0  0:00:02  0:00:02 --:--:-- 4309k


### Run pyAffy

In [6]:
from collections import OrderedDict
from genometools import misc
from pyaffy import rma

misc.get_logger(verbose = False)

sample_cel_files = OrderedDict([
    ('Sample 1', cel_file1),
    ('Sample 2', cel_file2)
])

genes, samples, X = rma(cdf_file, sample_cel_files)
print ', '.join(genes[:5])
print ', '.join(samples)
print X[:10, :]

[2016-03-01 15:25:29] INFO: Parsing CDF file.
[2016-03-01 15:25:30] INFO: CDF file parsing time: 1.02 s
[2016-03-01 15:25:30] INFO: CDF array design name: HG-U133_Plus_2
[2016-03-01 15:25:30] INFO: CDF rows / columns: 1164 x 1164
[2016-03-01 15:25:30] INFO: Parsing CEL files...
[2016-03-01 15:25:30] INFO: CEL files parsing time: 0.3 s.
[2016-03-01 15:25:30] INFO: Performing background correction...
[2016-03-01 15:25:31] INFO: Background correction time: 0.3 s.
[2016-03-01 15:25:31] INFO: Performing quantile normalization...
[2016-03-01 15:25:31] INFO: Quantile normalization time: 0.2 s.
[2016-03-01 15:25:31] INFO: Summarize probeset intensities (with medianpolish)...
[2016-03-01 15:25:45] INFO: Probeset summarization time: 13.86 s.
[2016-03-01 15:25:45] INFO: Total RMA time: 15.7 s.
100009676_at, 10000_at, 10001_at, 10002_at, 100038246_at
Sample 1, Sample 2
[[ 5.27478027  5.27498913]
 [ 5.97499847  5.96692753]
 [ 8.76105022  8.8299284 ]
 [ 4.63253212  4.52224636]
 [ 5.53205776  5.42527

## Copyright and License

Copyright (c) 2016 Florian Wagner.

This work is licensed under a [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License](http://creativecommons.org/licenses/by-nc-sa/4.0/).