# TEST pycl package
** This notebook contains tests for all function contained in pycl package **

---
# PREDICATES

In [5]:
from pycl import is_readable_file, is_gziped

## Test is_readable_file
    def is_readable_file (fp):
    Verify the readability of a file or list of file

In [18]:
try:
    is_readable_file("./test_data/KJHYTGYUJ")
    print ("OK")
except OSError as E:
    print(E)

./test_data/KJHYTGYUJ is not a valid file


In [17]:
try:
    is_readable_file("./test_data/RADAR_Secondary.txt")
    print ("OK")
except OSError as E:
    print(E)

OK


## Test is_gziped
    def is_gziped (fp):
    Return True if the file is Gziped else False

In [19]:
is_gziped("./test_data/RADAR_Secondary.txt")

False

In [21]:
is_gziped("./test_data/RADAR_Secondary.txt.gz")

True

---
# PATH MANIPULATION

In [8]:
from pycl import file_basename, file_extension, file_name, dir_name

## Test file_basename
    def file_basename (path):
    Return the basename of a file without folder location and extension

In [22]:
file_basename("./test_data/RADAR_Secondary.txt.gz")

'RADAR_Secondary'

## Test file_extension
    def file_extension (path):
    Return The extension of a file in lowercase

In [23]:
file_extension("./test_data/RADAR_Secondary.txt.gz")

'gz'

## Test file_name
    def file_name (path):
    Return The complete name of a file with the extension but without folder location

In [9]:
file_name("./test_data/test/RADAR_Secondary.txt.gz")

'RADAR_Secondary.txt.gz'

## Test dir_name
    def dir_name (path):
    Return the complete path where is located the file without the file name

In [10]:
dir_name("./test_data/test/RADAR_Secondary.txt.gz")

'test'

---
# STRING FORMATTING

In [26]:
from pycl import supersplit, rm_blank

## Test supersplit
    def supersplit (string, separator=""):
	like split but can take a list of separators instead of a simple separator

In [16]:
a = "chr7\t74138\t774138\tA>I|LOC100129917|LUNG:LYMPHOBLASTOID_CELL_LINE|15342557:15258596:22327324\t0"

print(supersplit(a, ["\t","|"]))

print(supersplit(a))

print(supersplit(a, "|"))

['chr7', '74138', '774138', 'A>I', 'LOC100129917', 'LUNG:LYMPHOBLASTOID_CELL_LINE', '15342557:15258596:22327324', '0']
['chr7', '74138', '774138', 'A>I|LOC100129917|LUNG:LYMPHOBLASTOID_CELL_LINE|15342557:15258596:22327324', '0']
['chr7\t74138\t774138\tA>I', 'LOC100129917', 'LUNG:LYMPHOBLASTOID_CELL_LINE', '15342557:15258596:22327324\t0']


## Test rm_blank
    def rm_blank (name, replace=""):
    Replace blank spaces in a name by a given character (default = remove)
    Blanks at extremities are always removed and nor replaced

In [31]:
a = "chr\t\t17|LU NG:LYMPHOBLAST    OID_CELL_LINE|15342557:152585     96:22327324\t0"

print(rm_blank(a))

print(rm_blank(a, replace="*"))

chr17|LUNG:LYMPHOBLASTOID_CELL_LINE|15342557:15258596:223273240
chr*17|LU*NG:LYMPHOBLAST*OID_CELL_LINE|15342557:152585*96:22327324*0


---
# FILE MANIPULATION

In [1]:
from pycl import copyFile, gzip_file, gunzip_file

## Test copyFile
    def copyFile(src, dest):
    Copy a single file to a destination file or folder (with error handling/reporting)
    @param src Source file path
    @param dest Path of the folder where to copy the source file

In [2]:
copyFile(src="./test_data/RADAR_Secondary.txt", dest="./test_data/")

Error: './test_data/RADAR_Secondary.txt' and './test_data/RADAR_Secondary.txt' are the same file


In [3]:
copyFile(src="./test_data/RADAR_Secondary.txt", dest="./test_data/test_dir/")

## Test gzip_file
    def gzip_file (in_path, out_path=None):
    @param in_path Path of the input uncompressed file
    @param out_path Path of the output compressed file (facultative)
    @exception  OSError Can be raise by open

In [4]:
gzip_file("./test_data/RADAR_Secondary.txt")

Compressing ./test_data/RADAR_Secondary.txt


'/home/aleg/Programming/Python3/pycl/test_data/RADAR_Secondary.txt.gz'

## Test gunzip_file
    def gunzip_file (in_path, out_path=None):
    @param in_path Path of the input compressed file
    @param out_path Path of the output uncompressed file (facultative)
    @exception  OSError Can be raise by open

In [5]:
gunzip_file("./test_data/RADAR_Secondary.txt.gz")

Uncompressing ./test_data/RADAR_Secondary.txt.gz


'/home/aleg/Programming/Python3/pycl/test_data/RADAR_Secondary.txt'

---
# FILE INFORMATION

In [1]:
from pycl import head, linerange, colsum, fastcount, simplecount

## Test head
    def head (file, n=10, ignore_hashtag_line=False):
    Emulate linux head cmd

In [2]:
head("./test_data/RADAR_Main.txt", n= 3)

#chromosome	position	gene	strand	annot1	annot2	alu?	non_alu_repetitive?	conservation_chimp	conservation_rhesus	conservation_mouse
chr1	206256301	C1orf186	-	intronic	intronic	no	no	N	N	N
chr6	116991832	intergenic	-	intergenic	intergenic	no	no	N	N	N


In [3]:
head("./test_data/RADAR_Main.txt", n=3, ignore_hashtag_line=True)

chr1	206256301	C1orf186	-	intronic	intronic	no	no	N	N	N
chr6	116991832	intergenic	-	intergenic	intergenic	no	no	N	N	N
chr7	30504355	NOD1	-	intronic	intronic	no	no	N	N	N


In [4]:
head("./test_data/RADAR_Secondary.txt.gz", n=3, ignore_hashtag_line=True)

chr1:1037916	Peng et al 2012	Lymphoblastoid cell line	9	66.67
chr1:1156882	Peng et al 2012	Lymphoblastoid cell line	42	36.59
chr1:1157460	Peng et al 2012	Lymphoblastoid cell line	66	22.73


## Test linerange
    def linerange (file, range_list=[[0,10]]):
    Print a range of lines in a file according to a list of start end lists

In [17]:
file = "./test_data/RADAR_Secondary.txt"
linerange (file, [[0,5],[10,11],[75,80],[98,105]])

0	#location	reference	tissue	coverage	editing_level(%)
1	chr1:1037916	Peng et al 2012	Lymphoblastoid cell line	9	66.67
2	chr1:1156882	Peng et al 2012	Lymphoblastoid cell line	42	36.59
3	chr1:1157460	Peng et al 2012	Lymphoblastoid cell line	66	22.73
4	chr1:1252441	Peng et al 2012	Lymphoblastoid cell line	11	72.73
5	chr1:1252443	Peng et al 2012	Lymphoblastoid cell line	11	45.45

10	chr1:1594977	Peng et al 2012	Lymphoblastoid cell line	227	24.67
11	chr1:1594978	Peng et al 2012	Lymphoblastoid cell line	228	4.82

75	chr1:6710585	Peng et al 2012	Lymphoblastoid cell line	30	65.52
76	chr1:6710595	Peng et al 2012	Lymphoblastoid cell line	28	50.00
77	chr1:6941764	Peng et al 2012	Lymphoblastoid cell line	7	57.14
78	chr1:7908257	Peng et al 2012	Lymphoblastoid cell line	10	40.00
79	chr1:7980494	Peng et al 2012	Lymphoblastoid cell line	38	24.32
80	chr1:7980525	Peng et al 2012	Lymphoblastoid cell line	24	37.50

98	chr1:10602697	Peng et al 2012	Lymphoblastoid cell line	5	60.00
99	chr1:11138237	Peng et

## Test colsum
    def colsum (file, colrange=None, separator="", header=False, ignore_hashtag_line=False, max_items=10, ret_type="md"):
	Create a summary of selected columns of a file
    Possible return types: md = markdown formated table, dict = raw parsing dict, report = Indented_text_report

In [16]:
print(colsum("./test_data/RADAR_Main.txt", header=True, colrange=[0,2,6], max_items=15))

|#chromosome|chr1|chr17|chr9|chr15|chr6|chr14|chr18|chr2|chrY|chr4|chr7|
|:---|:---|:---|:---|:---|:---|:---|:---|:---|:---|:---|:---|
|Count|4|3|2|2|2|1|1|1|1|1|1|

|gene|RABEP1|NUP133|JUB|GREB1L|SPHKAP|NLGN4Y|CELSR2|RBPJ|TLE4|SOCS7|ADPGK|UBE2O|TSC1|GRIK2|MEF2A|...|
|:---|:---|:---|:---|:---|:---|:---|:---|:---|:---|:---|:---|:---|:---|:---|:---|:---|
|Count|1|1|1|1|1|1|1|1|1|1|1|1|1|1|1|...|

|alu?|no|
|:---|:---|
|Count|19|




In [14]:
colsum("./test_data/RADAR_Main.txt", header=True, ret_type="dict", colrange=[0,3])

OrderedDict([(0,
              OrderedDict([('chr1', 4),
                           ('chr6', 2),
                           ('chr7', 1),
                           ('chr15', 2),
                           ('chr9', 2),
                           ('chr17', 3),
                           ('chr4', 1),
                           ('chrY', 1),
                           ('chr2', 1),
                           ('chr18', 1),
                           ('chr14', 1)])),
             (3, OrderedDict([('-', 10), ('+', 9)]))])

In [18]:
print(colsum(
        "./test_data/RADAR_clean.txt",
        header=True,
        ignore_hashtag_line=True,
        ret_type="report",
        separator=["\t","|"],
        max_items=5))

0
	chr1	997
1
	225974746	1
	225974735	1
	225974581	1
	224599486	1
	224584888	1
	...	...
2
	225974746	1
	225974735	1
	225974581	1
	224599486	1
	224584888	1
	...	...
3
	A>I	997
4
	FDPS	34
	MDM4	31
	CTSS	28
	DNAJC11	25
	S100PBP	24
	...	...
5
	YH	997
6
	22327324	997
7
	33.33	31
	66.67	31
	50.00	23
	57.14	22
	60.00	22
	...	...
8
	-	527
	+	470



## Test fastcount
    def fastcount(file):
	Efficient way to count the number of lines in a file

In [19]:
fastcount("./test_data/Small_m5C_Squires_hg38.bed")

200

## Test simplecount
    def simplecount(filename, ignore_hashtag_line=False):
    Simple way to count the number of lines in a file with more options

In [21]:
simplecount("./test_data/Small_m5C_Squires_hg38.bed", ignore_hashtag_line=True)

194

---
# DIRECTORY MANIPULATION

In [1]:
from pycl import mkdir

## Test mkdir
    def mkdir(fp, level=1):
    Reproduce the ability of UNIX "mkdir -p" command
    (ie if the path already exits no exception will be raised).
    Can create nested directories by recursivity
    @param  fp path name where the folder should be created
    @level  level   level in the path where to start to create the directories.
                    Used by the program for the recursive creation of directories
    @exception  OSError or PermissionError can be raise by os.mkdir

In [2]:
mkdir("./test_data/test_dir")

In [5]:
mkdir ("./test/test/test")
!rm -rf ./test

Creating /home/aleg/Programming/Python3/pycl/test
Creating /home/aleg/Programming/Python3/pycl/test/test
Creating /home/aleg/Programming/Python3/pycl/test/test/test


---
# SHELL MANIPULATION

In [2]:
from pycl import bash_basic, bash, make_cmd_str

## Test make_cmd_str
    def make_cmd_str(prog_name, opt_dict={}, opt_list=[]):
    Create a Unix like command line string from a
    @param prog_name Name (if added to the system path) or path of the programm
    @param opt_dict Dictionnary of option arguments such as "-t 5". The option flag have to
    be the key (without "-") and the the option value in the dictionnary value. If no value is
    requested after the option flag "None" had to be asigned to the value field.
    @param opt_list List of simple command line arguments

In [9]:
make_cmd_str("bwa", {"-b":None, "-t":6, "-i":"../idx/seq.fa"}, ["../read1", "../read2"])

'bwa -t 6 -i ../idx/seq.fa -b ../read1 ../read2 '

## Test bash_basic
    def bash_basic(cmd):
	Sent basic bash command

In [3]:
print(bash_basic("ls -l"))
print(bash_basic("echo TTTT"))
print(bash_basic("grep ./test_data/RADAR_Secondary.txt"))

total 116
-rw-rw-r-- 1 aleg aleg 35141 Jun  6 10:20 LICENSE
drwxrwxr-x 2 aleg aleg  4096 Jun 21 12:13 __pycache__
-rwxrwxrwx 1 aleg aleg 22063 Jun 21 12:13 pycl.py
-rwxrwxrwx 1 aleg aleg   143 Jun  5 17:57 README.md
drwxrwxr-x 3 aleg aleg  4096 Jun 10 15:37 test_data
-rw-rw-r-- 1 aleg aleg 43512 Jun 21 12:11 test_pycl.ipynb


None
TTTT


None


None


## Test bash
    def bash(cmd, stdin=None, ret_stderr=False, ret_stdout=True, str_output=True):
    Run a command line in the default shell and return the standard output
    @param  cmd A command line string formated as a string
    @param  stdinput    Facultative parameters to redirect an object to the standard input
    @param  ret_stderr  If True the standard error output will be returned
    @param  ret_stdout  If True the standard output will be returned
    @param  str_output  Transform the std output in a string instead of the bytes-like object
    @note If ret_stderr and ret_stdout are True a tuple will be returned and if both are False
    None will be returned
    @return If no standard error return the standard output as a string
    @exception  OSError Raise if a message is return on the standard error output
    @exception  (ValueError,OSError) May be raise by Popen

In [4]:
print(bash("ls"))

LICENSE
__pycache__
pycl.py
README.md
test_data
test_pycl.ipynb



In [5]:
bash("head -n 2", stdin=bash("ls", str_output=False))

'LICENSE\n__pycache__\n'

In [3]:
bash("hed -n 2", ret_stderr=True)

('', '/bin/sh: 1: hed: not found\n')

---
# DICTIONNARY FORMATTING

In [27]:
from pycl import dict_to_md, dict_to_report

## Test dict_to_md
    def dict_to_md (
    d,
    key_label="",
    value_label="",
    transpose=False,
    sort_by_key=False,
    sort_by_val=True,
    max_items=None):
 	Def to transform a dict into a markdown formated table

In [28]:
d = {"a":12,"b":14,"c":8,"d":56,"e":76}
print (dict_to_md(d, "Letter", "Number", sort_by_val=True))
print (dict_to_md(d, "Letter", "Number", transpose=True, max_items=2))

|Letter|Number|
|:---|:---|
|e|76|
|d|56|
|b|14|
|a|12|
|c|8|

|Letter|e|d|...|
|:---|:---|:---|:---|
|Number|76|56|...|



## Test dict_to_report
    def dict_to_report (
    d,
    tab="\t",
    ntab=0,
    sep=":",
    sort_dict=True,
    max_items=None):
    Recursive function to return a text report from nested dict or OrderedDict objects

In [32]:
d = {"a":12,"b":14,"c":{"c1":12,"c2":{"c2.1":33221,"c2.2":765},"c3":32,"c4":443},"d":56,"e":76}
print(dict_to_report(d, tab=" | "))

d = {"a":12,"b":14,"c":{"c1":12,"c2":{"c2.1":33221,"c2.2":765, "c2.3":7533,"c2.4":76433,"c2.5":876543,"c2.6":89765,"c2.7":8654},"c3":32,"c4":443},"d":56,"e":76}
print(dict_to_report(d, tab="--", max_items=4, sort_dict=True))

a:12
b:14
c
 | c1:12
 | c2
 |  | c2.1:33221
 |  | c2.2:765
 | c3:32
 | c4:443
d:56
e:76

a:12
b:14
c
--c1:12
--c2
----c2.5:876543
----c2.6:89765
----c2.4:76433
----c2.1:33221
----...:...
--c3:32
--c4:443
d:56
e:76



---
# TABLE FORMATTING

In [2]:
from pycl import reformat_table, _decompose_line, _clean_values, _reformat_line

## Test _decompose_line
    def _decompose_line(line, template):
    Helper function for reformat_table. Decompose a line and extract the values given a template list

In [3]:
print(_decompose_line(
    line = "chr1	631539	631540	  Squires|id1	0	+\n",
    template = [0,"\t",1,"\t",2,"\t",3,"|",4,"\t",5,"\t",6]
    ))

print(_decompose_line(
    line = "chr1	6315es|id1		+\n",
    template = [0,"\t",1,"\t",2,"\t",3,"|",4,"\t",5,"\t",6]
    ))

['chr1', '631539', '631540', '  Squires', 'id1', '0', '+\n']
['chr1', '6315es|id1', '', '+\n', '', '', '']


## Test _clean_values
    def _clean_values (
    val_list,
    replace_internal_space=None,
    replace_null_val="*",
    subst_dict={},
    filter_dict={},
    predicate=None):
    
    Helper function for reformat_table. Clean the extracted values

In [4]:
_clean_values(
    val_list=['  chr1', '6315 es|id1', '', '+ \n', '', '', ''],
    replace_internal_space='_',
    replace_null_val="*")

['chr1', '6315_es|id1', '*', '+', '*', '*', '*']

In [5]:
# When the value is in the substitution dictionnary the the function return a line whith the "values" changed accordingly

subst_dict = { 0:{"chr1":"1", "chr4":"4"}, 3:{"Squires":"5376774764"}}

_clean_values(
    val_list=_decompose_line(line = "  chr1	63 1539  	631540	  Squires|id1	0	+\n", template = [0,"\t",1,"\t",2,"\t",3,"|",4,"\t",5,"\t",6]),
    replace_internal_space = '_',
    replace_null_val = "*",
    subst_dict = subst_dict
    )

['1', '63_1539', '631540', '5376774764', 'id1', '0', '+']

In [6]:
# When the value is in the filter dictionnary the function return None

filter_dict = { 0:["chr1","chr2"]}

_clean_values(
    val_list=['chr1', '6315 es|id1', '', '+ \n', '', '', ''],
    replace_internal_space='_',
    replace_null_val="*",
    filter_dict=filter_dict
)

In [13]:
# Using a lambda function as a predicate allows complex filtering based on value from each fields

predicate = lambda val_list: abs(int(val_list[1])-int(val_list[2])) <= 2000

val_list=['chr1', '6315', '7675', "Squires"]
print("With valid line")
print(_clean_values(val_list=val_list, predicate=predicate))

val_list=['chr1', '6315', '8675', "Squires"]
print("With invalid line")
print(_clean_values(val_list=val_list, predicate=predicate))

With valid line
['chr1', '6315', '7675', 'Squires']
With invalid line
None


## Test _reformat_line
    def _reformat_line (val_list, template):
	Helper function for reformat_table. Reassemble a line from a list of values and a template list

In [39]:
line = _reformat_line(
    val_list=['chr1', '631539', '631540', 'Squires', 'id1', '0', '+'],
    template=[0,"\t",1,"\t",2,"\tm5C|-|HeLa|22344696\t-\t",6]
    )

print(line)

chr1	631539	631540	m5C|-|HeLa|22344696	-	+



In [41]:
line = _reformat_line(
    val_list=_clean_values(
        val_list=_decompose_line(
            line = "chr1	631539	631540	  Squires|id1	0	+\n",
            template = [0,"\t",1,"\t",2,"\t",3,"|",4,"\t",5,"\t",6]
            ),
        replace_internal_space='_',
        replace_null_val="*"
        ),
    template=[0,"\t",1,"\t",2,"\tm5C|-|HeLa|22344696\t-\t",6]
    )

print(line)

chr1	631539	631540	m5C|-|HeLa|22344696	-	+



## Test reformat_table
    def reformat_table(
    input_file,
    output_file,
    init_template,
    final_template,
    header = '',
    keep_original_header = True,
    replace_internal_space='_',
    replace_null_val="*",
    subst_dict={},
    filter_dict=[]):
    
	Reformat a table given an intial and a final line templates indicated as a list where numbers
	indicate the data column and strings the formating characters
	Example initial line = "chr1    631539    631540    Squires|id1    0    +"
	Initial template = [0,"\t",1,"\t",2,"\t",3,"|",4,"\t",5,"\t",6]
	Example final line = "chr1    631539    631540    m5C|-|HeLa|22344696    -    -"
	Final template = [0,"\t",1,"\t",2,"\tm5C|-|HeLa|22344696\t-\t",6]
	A nested dictionnary of substitution per position can also be provided to replace
	specific values by others :
	subst_dict = { 0:{"chr1":"1","chr2":"2"}, 3:{"Squires":"5376774764","Li":"27664684"}}
	in addition a dictionnary of list per position can be provided to fiter out lines 
	with specific values :
	filter_dict =  { 0:["chr2", "chr4"], 1:["46767", "87765"], 5:["76559", "77543"]}

In [42]:
reformat_table(
    input_file="./test_data/Small_m5C_Squires_hg38.bed",
    output_file="./test_data/Small_m5C_Squires_hg38_reformat.bed",
    init_template=[0,"\t",1,"\t",2,"\t",3,"|",4,"\t",5,"\t",6],
    final_template=[0,"\t",1,"\t",2,"\tm5C|*|HeLa|22344696\t-\t",6],
    replace_internal_space='_',
    replace_null_val="*",
    keep_original_header=False,
    header="# New header\n"
    )

head ("./test_data/Small_m5C_Squires_hg38_reformat.bed")

194 Lines processed	194 Lines pass	0 Lines filtered out	0 Lines fail

# New header
chr1	631539	631540	m5C|*|HeLa|22344696	-	+
chr1	631540	631541	m5C|*|HeLa|22344696	-	+
chr1	632285	632286	m5C|*|HeLa|22344696	-	+
chr1	632286	632287	m5C|*|HeLa|22344696	-	+
chr1	633058	633059	m5C|*|HeLa|22344696	-	+
chr1	633062	633063	m5C|*|HeLa|22344696	-	+
chr1	634423	634424	m5C|*|HeLa|22344696	-	+
chr1	634424	634425	m5C|*|HeLa|22344696	-	+
chr1	634653	634654	m5C|*|HeLa|22344696	-	+


In [31]:
subst_dict = {0:{"chr1":"1", "chr2":"2"}, 3:{"Peng":"22344696"}}
filter_dict = {18:["intron"]}

reformat_table(
    input_file="./test_data/Small_editing_Peng_hg38.bed",
    output_file="./test_data/Small_editing_Peng_hg38_reformat.bed",
    init_template=[0,"\t",1,"\t",2,"\t",3,"|",4,"|",5,"|",6,"|",7,"|",8,"|",9,"->",10,"|",11,"%|",12,"|",13,"|",14,"|",15,"|",16,"|",17,"|",18,"|",19,"\t",20,"\t",21],
    final_template=[0,"\t",1,"\t",2,"\t",9,">",10,"|",3,"|HeLa|",19,"\t",11,"\t",21],
    replace_internal_space='_',
    replace_null_val="*",
    subst_dict = subst_dict,
    filter_dict = filter_dict
    )

n_lines = fastcount("./test_data/Small_editing_Peng_hg38_reformat.bed")
linerange ("./test_data/Small_editing_Peng_hg38_reformat.bed", [[0,9], [n_lines-4, n_lines-1]])

194 Lines processed	139 Lines pass	55 Lines filtered out	0 Lines fail

0	# Transcriptome-wide map of editing sites [hg38 coordinates]
1	# Reference: Peng et al., Nat. Biotechnol. 30, 253 (2012) [PMID 22327324, DOI 10.1038/nbt.2122]
2	#
3	# Data cleaned and converted to BED6, coordinate conversion to hg38 using liftOver.
4	# Maintainer: Maurits Evers (maurits.evers@anu.edu.au)
5	#
6	1	1251840	1251841	A>G|22344696|HeLa|-	56.25	-
7	1	1252243	1252244	A>G|22344696|HeLa|-	19.44	-
8	1	1663537	1663538	A>G|22344696|HeLa|CDK11B;SLC35E2	24.67	-
9	1	1663538	1663539	A>G|22344696|HeLa|CDK11B;SLC35E2	4.82	-

141	1	9173262	9173263	A>G|22344696|HeLa|-	43.24	-
142	1	9173454	9173455	A>G|22344696|HeLa|-	35.14	-
143	1	9173533	9173534	A>G|22344696|HeLa|-	24.10	-
144	1	9173535	9173536	A>G|22344696|HeLa|-	66.15	-



In [32]:
predicate = lambda val_list: float(val_list[11]) >= 70 

reformat_table(
    input_file="./test_data/Small_editing_Peng_hg38.bed",
    output_file="./test_data/Small_editing_Peng_hg38_reformat.bed",
    init_template=[0,"\t",1,"\t",2,"\t",3,"|",4,"|",5,"|",6,"|",7,"|",8,"|",9,"->",10,"|",11,"%|",12,"|",13,"|",14,"|",15,"|",16,"|",17,"|",18,"|",19,"\t",20,"\t",21],
    final_template=[0,"\t",1,"\t",2,"\t",9,">",10,"|",3,"|HeLa|",19,"\t",11,"\t",21],
    replace_internal_space='_',
    replace_null_val="*",
    predicate= predicate
    )

n_lines = fastcount("./test_data/Small_editing_Peng_hg38_reformat.bed")
linerange ("./test_data/Small_editing_Peng_hg38_reformat.bed", [[0,9], [n_lines-, n_lines-1]])

194 Lines processed	9 Lines pass	185 Lines filtered out	0 Lines fail

0	# Transcriptome-wide map of editing sites [hg38 coordinates]
1	# Reference: Peng et al., Nat. Biotechnol. 30, 253 (2012) [PMID 22327324, DOI 10.1038/nbt.2122]
2	#
3	# Data cleaned and converted to BED6, coordinate conversion to hg38 using liftOver.
4	# Maintainer: Maurits Evers (maurits.evers@anu.edu.au)
5	#
6	chr1	1317060	1317061	A>G|Peng|HeLa|CPSF3L	72.73	-
7	chr1	1732994	1732995	A>G|Peng|HeLa|SLC35E2	77.78	-
8	chr1	1733057	1733058	A>G|Peng|HeLa|SLC35E2	70.00	-
9	chr1	1734418	1734419	A>G|Peng|HeLa|SLC35E2	80.00	-

11	chr1	6649427	6649428	A>G|Peng|HeLa|DNAJC11	70.59	-
12	chr1	7894812	7894813	A>G|Peng|HeLa|-	71.43	-
13	chr1	7909848	7909849	A>G|Peng|HeLa|-	81.82	-
14	chr1	9170342	9170343	A>G|Peng|HeLa|-	70.75	-



---
# WEB TOOLS

In [1]:
from pycl import url_exist, wget
from os import remove

## Test _url_exist
    def url_exist (url):
    Predicate verifying if an url exist without downloading all the link

In [2]:
url_exist("http://www.google.com") # When this one will be False it will probably be the end of the world

True

In [3]:
url_exist("http://www.JUYGKUYHGJHFJ.com")

False

# Test wget
    def wget(url, out_name="", progress_block=100000000):

    Download a file from an URL to a local storage.
    @param  url             A internet URL pointing to the file to download
    @param  outname         Name of the outfile where (facultative)
    @param  progress_block  size of the byte block for the progression of the download

In [4]:
outfile = wget("")
if outfile:
    print(outfile)
    remove(outfile)

unknown url type: ''


In [5]:
outfile = wget("https://github.com/a-slidaster/test_data/RADAR_Secondary.txt.gz")
if outfile:
    print(outfile)
    remove(outfile)

HTTP Error 404: Not Found


In [6]:
outfile = wget("https://github.com/a-slide/pycl/blob/master/test_data/RADAR_Secondary.txt.gz", progress_block=10000)
if outfile:
    print(outfile)
    remove(outfile)

Downloading: https://github.com/a-slide/pycl/blob/master/test_data/RADAR_Secondary.txt.gz	Size unknown
16.4 kB Downloaded
24.6 kB Downloaded
30.3 kB Downloaded
30.3 kB Downloaded
RADAR_Secondary.txt.gz


In [6]:
outfile = wget("https://www.encodeproject.org/files/ENCFF000HJC/@@download/ENCFF000HJC.bigWig", "test.bigWig", 50000000)
if outfile:
    print(outfile)
    remove(outfile)

Downloading: https://www.encodeproject.org/files/ENCFF000HJC/@@download/ENCFF000HJC.bigWig	Bytes: 258930225
50.0 MB Downloaded	[19.31 %]
100.0 MB Downloaded	[38.62 %]
150.0 MB Downloaded	[57.93 %]
200.0 MB Downloaded	[77.24 %]
250.0 MB Downloaded	[96.55 %]
258.9 MB Downloaded	[100 %]
test.bigWig


2