In [1]:
import io
import pathlib
import tarfile

import requests
import xswap

### Download the graph from KONECT

In [2]:
ppi_url = 'http://konect.cc/files/download.tsv.maayan-vidal.tar.bz2'
response = requests.get(ppi_url)

tar = tarfile.open(fileobj=io.BytesIO(response.content), mode='r:bz2')
graph_bytes = tar.extractfile(tar.getmember('maayan-vidal/out.maayan-vidal'))

unprocessed_filename = pathlib.Path('../data/unprocessed_ppi.txt')
unprocessed_filename.parent.mkdir(parents=True, exist_ok=True)
with open(unprocessed_filename, 'wb') as f:
    f.write(graph_bytes.read())

### Process the graph using `xswap.preprocessing`

In [3]:
str_edges = xswap.preprocessing.load_str_edges(unprocessed_filename, node_delim='\t')

mapped_edges, source_map, target_map = xswap.preprocessing.map_str_edges(str_edges, bipartite=False)
ordered_edges = list(map(tuple, map(sorted, mapped_edges)))

mapping_file = pathlib.Path('../data/ppi_mapping.csv')
xswap.preprocessing.write_mapping(mapping_file, source_map)

processed_edges_filename = pathlib.Path('../data/ppi_edges.csv')
xswap.preprocessing.write_edges(processed_edges_filename, ordered_edges)