In [10]:
import requests
import tempfile
import json
import os
from collections import defaultdict
from goatools.obo_parser import GODag
from goatools.godag_plot import plot_gos

def stream_obo_to_temp_file(url):
    """Stream OBO file from URL and temporarily save to disk."""
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.obo')
        with open(temp_file.name, 'wb') as f_out:
            for chunk in response.iter_content(chunk_size=128):
                f_out.write(chunk)
        return temp_file.name
    else:
        print(f"Failed to download OBO file: {response.status_code}")
        return None

def load_go_terms_with_parents(json_file):
    """Load GO terms with parents and p-values from JSON, sorting by p-value."""
    with open(json_file) as f:
        data = json.load(f)
        sorted_data = sorted(data, key=lambda x: x['p_value'])
        terms_with_parents = [
            {
                'native': entry['native'],
                'name': entry['name'],
                'p_value': entry['p_value'],
                'parents': entry.get('parents', []),
                'source': 'JSON'
            } for entry in sorted_data if 'parents' in entry
        ]
        return terms_with_parents

def build_layers(terms_with_parents):
    """Build layers of GO terms based on parent-child relationships."""
    layers = defaultdict(list)
    term_dict = {term['native']: term for term in terms_with_parents}

    # Initialize layers for all terms
    for term in terms_with_parents:
        term['layer'] = None

    def set_layer(term):
        if term['layer'] is not None:
            return term['layer']
        if not term['parents']:
            term['layer'] = 0
        else:
            parent_layers = [set_layer(term_dict[parent]) for parent in term['parents'] if parent in term_dict]
            term['layer'] = max(parent_layers) + 1 if parent_layers else 0
        layers[term['layer']].append(term)
        return term['layer']

    for term in terms_with_parents:
        set_layer(term)

    return layers

def assign_layer_colors(layers):
    """Assign a broad range of colors to each layer."""
    color_palette = [
        '#FF1493', '#00BFFF', '#32CD32',
        '#8A2BE2', '#A52A2A', '#DEB887', '#5F9EA0', '#7FFF00',
        '#D2691E', '#FF7F50', '#6495ED', '#FFF8DC', '#DC143C'
    ]
    
    for layer_index, terms in layers.items():
        color = color_palette[layer_index % len(color_palette)]
        for term in terms:
            term['color'] = color
    return layers

def load_obo_go_terms(obo_file_path):
    """Load GO terms from OBO file."""
    go_dag = GODag(obo_file_path)
    return {go_term.id: go_term for go_term in go_dag.values()}

def generate_dag_plots(json_file, temp_obo_path):
    terms_with_parents = load_go_terms_with_parents(json_file)
    if temp_obo_path:
        go_dag = GODag(temp_obo_path)
        print(f"GO DAG loaded with {len(go_dag)} terms.")
        
        # Load OBO GO terms
        obo_go_terms = load_obo_go_terms(temp_obo_path)
        
        # Mark source of each term
        for term in terms_with_parents:
            if term['native'] in obo_go_terms:
                term['source'] = 'OBO'
                
        # Build layers and assign colors
        layers = build_layers(terms_with_parents)
        colored_layers = assign_layer_colors(layers)
        
        # Prepare colors for plotting
        colors = {term['native']: term['color'] for terms in colored_layers.values() for term in terms}
        
        # Generate plot for DAG
        output_folder = 'tcga/nosmoking3_go:bp_ora_dags'
        os.makedirs(output_folder, exist_ok=True)
        file_name = os.path.splitext(os.path.basename(json_file))[0]
        output_file = f"{output_folder}/dag_plot_{file_name}.png"
        title = f""

        # Plotting the GO DAG with specified colors for GO terms
        plot_gos(output_file, set(term['native'] for terms in colored_layers.values() for term in terms), go_dag, title=title, mark_alt_id=True, parentcnt=True, childcnt=True, go_color=colors)
        print(f"DAG plot generated at {output_file}")

# URL to the Gene Ontology OBO file
obo_url = 'http://purl.obolibrary.org/obo/go/go-basic.obo'
temp_obo_path = stream_obo_to_temp_file(obo_url)

# List of JSON files to process
json_files = [
    'tcga/nosmoking3_go_ora/Age_down_go:bp.json',
    'tcga/nosmoking3_go_ora/Age_up_go:bp.json',
    'tcga/nosmoking3_go_ora/TissueXSex_down_go:bp.json',
    'tcga/nosmoking3_go_ora/TissueXSex_up_go:bp.json',
    'tcga/nosmoking3_go_ora/TumorVsNormal_down_go:bp.json',
    'tcga/nosmoking3_go_ora/TumorVsNormal_up_go:bp.json'
]

for json_file in json_files:
    generate_dag_plots(json_file, temp_obo_path)

# Clean up the temporary OBO file
if temp_obo_path:
    os.remove(temp_obo_path)


/tmp/tmpae4829ks.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
GO DAG loaded with 45667 terms.
/tmp/tmpae4829ks.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
   30 usr  92 GOs  WROTE: tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png
DAG plot generated at tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png
/tmp/tmpae4829ks.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
GO DAG loaded with 45667 terms.
/tmp/tmpae4829ks.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
   84 usr 163 GOs  WROTE: tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_up_go:bp.png
DAG plot generated at tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_up_go:bp.png
/tmp/tmpae4829ks.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
GO DAG loaded with 45667 terms.
/tmp/tmpae4829ks.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
    1 usr   3 GOs  WROTE: tcga/nosmoking3_go:bp_ora_dags/dag_plot_TissueXSex_down_go:bp.png
DAG plot generated at tcga/nosmoking3_go:bp_ora_dags/dag_plot_TissueXSex_down_go:bp.png
/tmp/tmpae4829ks.obo: fmt(1.2) rel(2

In [3]:
import requests
import tempfile
import json
import os
from collections import defaultdict
from goatools.obo_parser import GODag
from goatools.godag_plot import plot_gos

def stream_obo_to_temp_file(url):
    """Stream OBO file from URL and temporarily save to disk."""
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.obo')
        with open(temp_file.name, 'wb') as f_out:
            for chunk in response.iter_content(chunk_size=128):
                f_out.write(chunk)
        return temp_file.name
    else:
        print(f"Failed to download OBO file: {response.status_code}")
        return None

def load_go_terms_with_parents(json_file):
    """Load GO terms with parents and p-values from JSON, sorting by p-value."""
    with open(json_file) as f:
        data = json.load(f)
        sorted_data = sorted(data, key=lambda x: x['p_value'])
        terms_with_parents = [
            {
                'native': entry['native'],
                'name': entry['name'],
                'p_value': entry['p_value'],
                'parents': entry.get('parents', []),
                'source': 'JSON'
            } for entry in sorted_data if 'parents' in entry
        ]
        return terms_with_parents

def build_layers(terms_with_parents):
    """Build layers of GO terms based on parent-child relationships."""
    layers = defaultdict(list)
    term_dict = {term['native']: term for term in terms_with_parents}

    # Initialize layers for all terms
    for term in terms_with_parents:
        term['layer'] = None

    def set_layer(term):
        if term['layer'] is not None:
            return term['layer']
        if not term['parents']:
            term['layer'] = 0
        else:
            parent_layers = [set_layer(term_dict[parent]) for parent in term['parents'] if parent in term_dict]
            term['layer'] = max(parent_layers) + 1 if parent_layers else 0
        layers[term['layer']].append(term)
        return term['layer']

    for term in terms_with_parents:
        set_layer(term)

    return layers

def assign_layer_colors(layers):
    """Assign a broad range of colors to each layer."""
    color_palette = [
        '#9400D3', '#4B0082', '#0000FF', '#00FF00', '#FFFF00', 
        '#FF7F00', '#FF0000', '#FF1493', '#00BFFF', '#32CD32',
        '#8A2BE2', '#A52A2A', '#DEB887', '#5F9EA0', '#7FFF00',
        '#D2691E', '#FF7F50', '#6495ED', '#FFF8DC', '#DC143C'
    ]
    
    for layer_index, terms in layers.items():
        color = color_palette[layer_index % len(color_palette)]
        for term in terms:
            term['color'] = color
    return layers

def load_obo_go_terms(obo_file_path):
    """Load GO terms from OBO file."""
    go_dag = GODag(obo_file_path)
    return {go_term.id: go_term for go_term in go_dag.values()}

def generate_dag_plots(json_file, temp_obo_path):
    terms_with_parents = load_go_terms_with_parents(json_file)
    if temp_obo_path:
        go_dag = GODag(temp_obo_path)
        print(f"GO DAG loaded with {len(go_dag)} terms.")
        
        # Load OBO GO terms
        obo_go_terms = load_obo_go_terms(temp_obo_path)
        
        # Mark source of each term
        for term in terms_with_parents:
            if term['native'] in obo_go_terms:
                term['source'] = 'OBO'
                
        # Build layers and assign colors
        layers = build_layers(terms_with_parents)
        colored_layers = assign_layer_colors(layers)
        
        # Prepare colors for plotting
        colors = {term['native']: term['color'] for terms in colored_layers.values() for term in terms}
        
        # Generate plot for DAG
        output_folder = 'tcga/nosmoking3_go:bp_ora_dags'
        os.makedirs(output_folder, exist_ok=True)
        file_name = os.path.splitext(os.path.basename(json_file))[0]
        output_file = f"{output_folder}/dag_plot_{file_name}.png"
        title = f""

        # Plotting the GO DAG with specified colors for GO terms
        plot_gos(output_file, set(term['native'] for terms in colored_layers.values() for term in terms), go_dag, title=title, mark_alt_id=True, parentcnt=True, childcnt=True, go_color=colors)
        print(f"DAG plot generated at {output_file}")

# URL to the Gene Ontology OBO file
obo_url = 'http://purl.obolibrary.org/obo/go/go-basic.obo'
temp_obo_path = stream_obo_to_temp_file(obo_url)



for json_file in json_files:
    generate_dag_plots(json_file, temp_obo_path)

# Clean up the temporary OBO file
if temp_obo_path:
    os.remove(temp_obo_path)


/tmp/tmp08f7i1ql.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
GO DAG loaded with 45667 terms.
/tmp/tmp08f7i1ql.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
   30 usr  92 GOs  WROTE: tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png
DAG plot generated at tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png
/tmp/tmp08f7i1ql.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
GO DAG loaded with 45667 terms.
/tmp/tmp08f7i1ql.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
   84 usr 163 GOs  WROTE: tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_up_go:bp.png
DAG plot generated at tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_up_go:bp.png
/tmp/tmp08f7i1ql.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
GO DAG loaded with 45667 terms.
/tmp/tmp08f7i1ql.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
    1 usr   3 GOs  WROTE: tcga/nosmoking3_go:bp_ora_dags/dag_plot_TissueXSex_down_go:bp.png
DAG plot generated at tcga/nosmoking3_go:bp_ora_dags/dag_plot_TissueXSex_down_go:bp.png
/tmp/tmp08f7i1ql.obo: fmt(1.2) rel(2

In [4]:
import requests
import tempfile
import json
import os
from collections import defaultdict
from goatools.obo_parser import GODag
from goatools.godag_plot import plot_gos

def stream_obo_to_temp_file(url):
    """Stream OBO file from URL and temporarily save to disk."""
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.obo')
        with open(temp_file.name, 'wb') as f_out:
            for chunk in response.iter_content(chunk_size=128):
                f_out.write(chunk)
        return temp_file.name
    else:
        print(f"Failed to download OBO file: {response.status_code}")
        return None

def load_go_terms_with_parents(json_file):
    """Load GO terms with parents and p-values from JSON, sorting by p-value."""
    with open(json_file) as f:
        data = json.load(f)
        sorted_data = sorted(data, key=lambda x: x['p_value'])
        terms_with_parents = [
            {
                'native': entry['native'],
                'name': entry['name'],
                'p_value': entry['p_value'],
                'parents': entry.get('parents', []),
                'source': 'JSON'
            } for entry in sorted_data if 'parents' in entry
        ]
        return terms_with_parents

def build_layers(terms_with_parents):
    """Build layers of GO terms based on parent-child relationships."""
    layers = defaultdict(list)
    term_dict = {term['native']: term for term in terms_with_parents}

    # Initialize layers for all terms
    for term in terms_with_parents:
        term['layer'] = None

    def set_layer(term):
        if term['layer'] is not None:
            return term['layer']
        if not term['parents']:
            term['layer'] = 0
        else:
            parent_layers = [set_layer(term_dict[parent]) for parent in term['parents'] if parent in term_dict]
            term['layer'] = max(parent_layers) + 1 if parent_layers else 0
        layers[term['layer']].append(term)
        return term['layer']

    for term in terms_with_parents:
        set_layer(term)

    return layers

def assign_layer_colors(layers):
    """Assign a broad range of colors to each layer."""
    color_palette = [
        '#9400D3', '#4B0082', '#0000FF', '#00FF00', '#FFFF00', 
        '#FF7F00', '#FF0000', '#FF1493', '#00BFFF', '#32CD32',
        '#8A2BE2', '#A52A2A', '#DEB887', '#5F9EA0', '#7FFF00',
        '#D2691E', '#FF7F50', '#6495ED', '#FFF8DC', '#DC143C'
    ]
    
    for layer_index, terms in layers.items():
        color = color_palette[layer_index % len(color_palette)]
        for term in terms:
            term['color'] = color
    return layers

def load_obo_go_terms(obo_file_path):
    """Load GO terms from OBO file."""
    go_dag = GODag(obo_file_path)
    return {go_term.id: go_term for go_term in go_dag.values()}

def generate_dag_plots(json_file, temp_obo_path):
    terms_with_parents = load_go_terms_with_parents(json_file)
    if temp_obo_path:
        go_dag = GODag(temp_obo_path)
        print(f"GO DAG loaded with {len(go_dag)} terms.")
        
        # Load OBO GO terms
        obo_go_terms = load_obo_go_terms(temp_obo_path)
        
        # Mark source of each term
        for term in terms_with_parents:
            if term['native'] in obo_go_terms:
                term['source'] = 'OBO'
                
        # Build layers and assign colors
        layers = build_layers(terms_with_parents)
        colored_layers = assign_layer_colors(layers)
        
        # Prepare colors for plotting
        colors = {term['native']: term['color'] for terms in colored_layers.values() for term in terms}
        
        # Generate plot for DAG
        output_folder = 'tcga/nosmoking3_go:bp_ora_dags'
        os.makedirs(output_folder, exist_ok=True)
        file_name = os.path.splitext(os.path.basename(json_file))[0]
        output_file = f"{output_folder}/dag_plot_{file_name}.png"
        title = f""

        # Plotting the GO DAG with specified colors for GO terms
        plot_gos(output_file, set(term['native'] for terms in colored_layers.values() for term in terms), go_dag, title=title, mark_alt_id=True, parentcnt=True, childcnt=True, go_color=colors)
        print(f"DAG plot generated at {output_file}")

# URL to the Gene Ontology OBO file
obo_url = 'http://purl.obolibrary.org/obo/go/go-basic.obo'
temp_obo_path = stream_obo_to_temp_file(obo_url)


for json_file in json_files:
    generate_dag_plots(json_file, temp_obo_path)

# Clean up the temporary OBO file
if temp_obo_path:
    os.remove(temp_obo_path)


/tmp/tmp_5dq38x8.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
GO DAG loaded with 45667 terms.
/tmp/tmp_5dq38x8.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
   30 usr  92 GOs  WROTE: tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png
DAG plot generated at tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png
/tmp/tmp_5dq38x8.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
GO DAG loaded with 45667 terms.
/tmp/tmp_5dq38x8.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
   84 usr 163 GOs  WROTE: tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_up_go:bp.png
DAG plot generated at tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_up_go:bp.png
/tmp/tmp_5dq38x8.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
GO DAG loaded with 45667 terms.
/tmp/tmp_5dq38x8.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
    1 usr   3 GOs  WROTE: tcga/nosmoking3_go:bp_ora_dags/dag_plot_TissueXSex_down_go:bp.png
DAG plot generated at tcga/nosmoking3_go:bp_ora_dags/dag_plot_TissueXSex_down_go:bp.png
/tmp/tmp_5dq38x8.obo: fmt(1.2) rel(2

In [7]:
import requests
import tempfile
import json
import os
import matplotlib.pyplot as plt
import numpy as np
from collections import defaultdict
from goatools.obo_parser import GODag
from goatools.godag_plot import plot_gos

def stream_obo_to_temp_file(url):
    """Stream OBO file from URL and temporarily save to disk."""
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.obo')
        with open(temp_file.name, 'wb') as f_out:
            for chunk in response.iter_content(chunk_size=128):
                f_out.write(chunk)
        return temp_file.name
    else:
        print(f"Failed to download OBO file: {response.status_code}")
        return None

def load_go_terms_with_parents(json_file):
    """Load GO terms with parents and p-values from JSON, sorting by p-value."""
    with open(json_file) as f:
        data = json.load(f)
        sorted_data = sorted(data, key=lambda x: x['p_value'])
        terms_with_parents = [
            {
                'native': entry['native'],
                'name': entry['name'],
                'p_value': entry['p_value'],
                'parents': entry.get('parents', []),
                'source': 'JSON'
            } for entry in sorted_data if 'parents' in entry
        ]
        return terms_with_parents

def build_layers(terms_with_parents):
    """Build layers of GO terms based on parent-child relationships."""
    layers = defaultdict(list)
    term_dict = {term['native']: term for term in terms_with_parents}

    # Initialize layers for all terms
    for term in terms_with_parents:
        term['layer'] = None

    def set_layer(term):
        if term['layer'] is not None:
            return term['layer']
        if not term['parents']:
            term['layer'] = 0
        else:
            parent_layers = [set_layer(term_dict[parent]) for parent in term['parents'] if parent in term_dict]
            term['layer'] = max(parent_layers) + 1 if parent_layers else 0
        layers[term['layer']].append(term)
        return term['layer']

    for term in terms_with_parents:
        set_layer(term)

    return layers

def generate_colors(num_colors):
    """Generate a list of distinct colors using a colormap."""
    cmap = plt.get_cmap('hsv')
    colors = [cmap(i) for i in np.linspace(0, 1, num_colors)]
    return colors

def assign_layer_colors(layers):
    """Assign distinct colors to each layer."""
    num_layers = len(layers)
    colors = generate_colors(num_layers)
    
    for layer_index, terms in layers.items():
        color = colors[layer_index % num_layers]
        hex_color = matplotlib.colors.rgb2hex(color[:3])  # Convert RGBA to hex
        for term in terms:
            term['color'] = hex_color
    return layers

def load_obo_go_terms(obo_file_path):
    """Load GO terms from OBO file."""
    go_dag = GODag(obo_file_path)
    return {go_term.id: go_term for go_term in go_dag.values()}

def generate_dag_plots(json_file, temp_obo_path):
    terms_with_parents = load_go_terms_with_parents(json_file)
    if temp_obo_path:
        go_dag = GODag(temp_obo_path)
        print(f"GO DAG loaded with {len(go_dag)} terms.")
        
        # Load OBO GO terms
        obo_go_terms = load_obo_go_terms(temp_obo_path)
        
        # Mark source of each term
        for term in terms_with_parents:
            if term['native'] in obo_go_terms:
                term['source'] = 'OBO'
                
        # Build layers and assign colors
        layers = build_layers(terms_with_parents)
        colored_layers = assign_layer_colors(layers)
        
        # Prepare colors for plotting
        colors = {term['native']: term['color'] for terms in colored_layers.values() for term in terms}
        
        # Generate plot for DAG
        output_folder = 'tcga/nosmoking3_go:bp_ora_dags'
        os.makedirs(output_folder, exist_ok=True)
        file_name = os.path.splitext(os.path.basename(json_file))[0]
        output_file = f"{output_folder}/dag_plot_{file_name}.png"
        title = f""

        # Plotting the GO DAG with specified colors for GO terms
        plot_gos(output_file, set(term['native'] for terms in colored_layers.values() for term in terms), go_dag, title=title, mark_alt_id=True, parentcnt=True, childcnt=True, go_color=colors)
        print(f"DAG plot generated at {output_file}")

# URL to the Gene Ontology OBO file
obo_url = 'http://purl.obolibrary.org/obo/go/go-basic.obo'
temp_obo_path = stream_obo_to_temp_file(obo_url)



for json_file in json_files:
    generate_dag_plots(json_file, temp_obo_path)

# Clean up the temporary OBO file
if temp_obo_path:
    os.remove(temp_obo_path)


/tmp/tmpa9emrczf.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
GO DAG loaded with 45667 terms.
/tmp/tmpa9emrczf.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms


NameError: name 'matplotlib' is not defined

In [11]:
import requests
import tempfile
import json
import os
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import numpy as np
from collections import defaultdict
from goatools.obo_parser import GODag
from goatools.godag_plot import plot_gos

def stream_obo_to_temp_file(url):
    """Stream OBO file from URL and temporarily save to disk."""
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.obo')
        with open(temp_file.name, 'wb') as f_out:
            for chunk in response.iter_content(chunk_size=128):
                f_out.write(chunk)
        return temp_file.name
    else:
        print(f"Failed to download OBO file: {response.status_code}")
        return None

def load_go_terms_with_parents(json_file):
    """Load GO terms with parents and p-values from JSON, sorting by p-value."""
    with open(json_file) as f:
        data = json.load(f)
        sorted_data = sorted(data, key=lambda x: x['p_value'])
        terms_with_parents = [
            {
                'native': entry['native'],
                'name': entry['name'],
                'p_value': entry['p_value'],
                'parents': entry.get('parents', []),
                'source': 'JSON'
            } for entry in sorted_data if 'parents' in entry
        ]
        return terms_with_parents

def build_layers(terms_with_parents):
    """Build layers of GO terms based on parent-child relationships."""
    layers = defaultdict(list)
    term_dict = {term['native']: term for term in terms_with_parents}

    # Initialize layers for all terms
    for term in terms_with_parents:
        term['layer'] = None

    def set_layer(term):
        if term['layer'] is not None:
            return term['layer']
        if not term['parents']:
            term['layer'] = 0
        else:
            parent_layers = [set_layer(term_dict[parent]) for parent in term['parents'] if parent in term_dict]
            term['layer'] = max(parent_layers) + 1 if parent_layers else 0
        layers[term['layer']].append(term)
        return term['layer']

    for term in terms_with_parents:
        set_layer(term)

    return layers

def generate_colors(num_colors):
    """Generate a list of distinct colors using a colormap."""
    cmap = plt.get_cmap('hsv')
    colors = [cmap(i) for i in np.linspace(0, 1, num_colors)]
    return colors

def assign_layer_colors(layers):
    """Assign distinct colors to each layer."""
    num_layers = len(layers)
    colors = generate_colors(num_layers)
    
    for layer_index, terms in layers.items():
        color = colors[layer_index % num_layers]
        hex_color = mcolors.rgb2hex(color[:3])  # Convert RGBA to hex
        for term in terms:
            term['color'] = hex_color
    return layers

def load_obo_go_terms(obo_file_path):
    """Load GO terms from OBO file."""
    go_dag = GODag(obo_file_path)
    return {go_term.id: go_term for go_term in go_dag.values()}

def generate_dag_plots(json_file, temp_obo_path):
    terms_with_parents = load_go_terms_with_parents(json_file)
    if temp_obo_path:
        go_dag = GODag(temp_obo_path)
        print(f"GO DAG loaded with {len(go_dag)} terms.")
        
        # Load OBO GO terms
        obo_go_terms = load_obo_go_terms(temp_obo_path)
        
        # Mark source of each term
        for term in terms_with_parents:
            if term['native'] in obo_go_terms:
                term['source'] = 'OBO'
                
        # Build layers and assign colors
        layers = build_layers(terms_with_parents)
        colored_layers = assign_layer_colors(layers)
        
        # Prepare colors for plotting
        colors = {term['native']: term['color'] for terms in colored_layers.values() for term in terms}
        
        # Generate plot for DAG
        output_folder = 'tcga/nosmoking3_go:bp_ora_dags'
        os.makedirs(output_folder, exist_ok=True)
        file_name = os.path.splitext(os.path.basename(json_file))[0]
        output_file = f"{output_folder}/dag_plot_{file_name}.png"
        title = f""

        # Plotting the GO DAG with specified colors for GO terms
        plot_gos(output_file, set(term['native'] for terms in colored_layers.values() for term in terms), go_dag, title=title, mark_alt_id=True, parentcnt=True, childcnt=True, go_color=colors)
        print(f"DAG plot generated at {output_file}")

# URL to the Gene Ontology OBO file
obo_url = 'http://purl.obolibrary.org/obo/go/go-basic.obo'
temp_obo_path = stream_obo_to_temp_file(obo_url)


for json_file in json_files:
    generate_dag_plots(json_file, temp_obo_path)

# Clean up the temporary OBO file
if temp_obo_path:
    os.remove(temp_obo_path)


/tmp/tmpawygovas.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
GO DAG loaded with 45667 terms.
/tmp/tmpawygovas.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
   30 usr  92 GOs  WROTE: tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png
DAG plot generated at tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png
/tmp/tmpawygovas.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
GO DAG loaded with 45667 terms.
/tmp/tmpawygovas.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
   84 usr 163 GOs  WROTE: tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_up_go:bp.png
DAG plot generated at tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_up_go:bp.png
/tmp/tmpawygovas.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
GO DAG loaded with 45667 terms.
/tmp/tmpawygovas.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
    1 usr   3 GOs  WROTE: tcga/nosmoking3_go:bp_ora_dags/dag_plot_TissueXSex_down_go:bp.png
DAG plot generated at tcga/nosmoking3_go:bp_ora_dags/dag_plot_TissueXSex_down_go:bp.png
/tmp/tmpawygovas.obo: fmt(1.2) rel(2

In [16]:
import requests
import tempfile
import json
import os
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import numpy as np
from collections import defaultdict
from goatools.obo_parser import GODag
from goatools.godag_plot import plot_gos

def stream_obo_to_temp_file(url):
    """Stream OBO file from URL and temporarily save to disk."""
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.obo')
        with open(temp_file.name, 'wb') as f_out:
            for chunk in response.iter_content(chunk_size=128):
                f_out.write(chunk)
        return temp_file.name
    else:
        print(f"Failed to download OBO file: {response.status_code}")
        return None

def load_go_terms_with_parents(json_file):
    """Load GO terms with parents and p-values from JSON, sorting by p-value."""
    with open(json_file) as f:
        data = json.load(f)
        sorted_data = sorted(data, key=lambda x: x['p_value'])
        terms_with_parents = [
            {
                'native': entry['native'],
                'name': entry['name'],
                'p_value': entry['p_value'],
                'parents': entry.get('parents', []),
                'source': 'JSON'
            } for entry in sorted_data if 'parents' in entry
        ]
        return terms_with_parents

def build_layers(terms_with_parents):
    """Build layers of GO terms based on parent-child relationships."""
    layers = defaultdict(list)
    term_dict = {term['native']: term for term in terms_with_parents}

    # Initialize layers for all terms
    for term in terms_with_parents:
        term['layer'] = None

    def set_layer(term):
        if term['layer'] is not None:
            return term['layer']
        if not term['parents']:
            term['layer'] = 0
        else:
            parent_layers = [set_layer(term_dict[parent]) for parent in term['parents'] if parent in term_dict]
            term['layer'] = max(parent_layers) + 1 if parent_layers else 0
        layers[term['layer']].append(term)
        return term['layer']

    for term in terms_with_parents:
        set_layer(term)

    return layers

def generate_colors(num_colors):
    """Generate a list of distinct colors using a colormap."""
    cmap = plt.get_cmap('hsv')
    colors = [cmap(i) for i in np.linspace(0, 1, num_colors)]
    return colors

def assign_layer_colors(layers):
    """Assign specific colors to the first two layers and distinct colors to each subsequent layer."""
    num_layers = len(layers)
    colors = generate_colors(num_layers)
    
    custom_colors = ['#0000FF', '#FFFF00']  # Blue for the top layer, yellow for the second layer
    
    for layer_index, terms in layers.items():
        if layer_index < len(custom_colors):
            color = mcolors.hex2color(custom_colors[layer_index])
        else:
            color = colors[layer_index % num_layers]
        hex_color = mcolors.rgb2hex(color[:3])  # Convert RGBA to hex
        for term in terms:
            term['color'] = hex_color
    return layers

def load_obo_go_terms(obo_file_path):
    """Load GO terms from OBO file."""
    go_dag = GODag(obo_file_path)
    return {go_term.id: go_term for go_term in go_dag.values()}

def generate_dag_plots(json_file, temp_obo_path):
    terms_with_parents = load_go_terms_with_parents(json_file)
    if temp_obo_path:
        go_dag = GODag(temp_obo_path)
        print(f"GO DAG loaded with {len(go_dag)} terms.")
        
        # Load OBO GO terms
        obo_go_terms = load_obo_go_terms(temp_obo_path)
        
        # Mark source of each term
        for term in terms_with_parents:
            if term['native'] in obo_go_terms:
                term['source'] = 'OBO'
                
        # Build layers and assign colors
        layers = build_layers(terms_with_parents)
        colored_layers = assign_layer_colors(layers)
        
        # Prepare colors for plotting
        colors = {term['native']: term['color'] for terms in colored_layers.values() for term in terms}
        
        # Generate plot for DAG
        output_folder = 'tcga/nosmoking3_go:bp_ora_dags'
        os.makedirs(output_folder, exist_ok=True)
        file_name = os.path.splitext(os.path.basename(json_file))[0]
        output_file = f"{output_folder}/dag_plot_{file_name}.png"
        title = f""

        # Plotting the GO DAG with specified colors for GO terms
        plot_gos(output_file, set(term['native'] for terms in colored_layers.values() for term in terms), go_dag, title=title, mark_alt_id=True, parentcnt=True, childcnt=True, go_color=colors)
        print(f"DAG plot generated at {output_file}")

# URL to the Gene Ontology OBO file
obo_url = 'http://purl.obolibrary.org/obo/go/go-basic.obo'
temp_obo_path = stream_obo_to_temp_file(obo_url)

# List of JSON files to process
json_files = [
    'tcga/nosmoking3_go_ora/Age_down_go:bp.json',
    'tcga/nosmoking3_go_ora/Age_up_go:bp.json',
    'tcga/nosmoking3_go_ora/TissueXSex_down_go:bp.json',
    'tcga/nosmoking3_go_ora/TissueXSex_up_go:bp.json',
    'tcga/nosmoking3_go_ora/TumorVsNormal_down_go:bp.json',
    'tcga/nosmoking3_go_ora/TumorVsNormal_up_go:bp.json'
]

for json_file in json_files:
    generate_dag_plots(json_file, temp_obo_path)

# Clean up the temporary OBO file
if temp_obo_path:
    os.remove(temp_obo_path)


/tmp/tmpks4136g3.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
GO DAG loaded with 45667 terms.
/tmp/tmpks4136g3.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
   30 usr  92 GOs  WROTE: tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png
DAG plot generated at tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png
/tmp/tmpks4136g3.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
GO DAG loaded with 45667 terms.
/tmp/tmpks4136g3.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
   84 usr 163 GOs  WROTE: tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_up_go:bp.png
DAG plot generated at tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_up_go:bp.png
/tmp/tmpks4136g3.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
GO DAG loaded with 45667 terms.
/tmp/tmpks4136g3.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
    1 usr   3 GOs  WROTE: tcga/nosmoking3_go:bp_ora_dags/dag_plot_TissueXSex_down_go:bp.png
DAG plot generated at tcga/nosmoking3_go:bp_ora_dags/dag_plot_TissueXSex_down_go:bp.png
/tmp/tmpks4136g3.obo: fmt(1.2) rel(2

In [14]:
import requests
import tempfile
import json
import os
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import numpy as np
from collections import defaultdict
from goatools.obo_parser import GODag
from goatools.godag_plot import plot_gos

def stream_obo_to_temp_file(url):
    """Stream OBO file from URL and temporarily save to disk."""
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.obo')
        with open(temp_file.name, 'wb') as f_out:
            for chunk in response.iter_content(chunk_size=128):
                f_out.write(chunk)
        return temp_file.name
    else:
        print(f"Failed to download OBO file: {response.status_code}")
        return None

def load_go_terms_with_parents(json_file):
    """Load GO terms with parents and p-values from JSON, sorting by p-value."""
    with open(json_file) as f:
        data = json.load(f)
        sorted_data = sorted(data, key=lambda x: x['p_value'])
        terms_with_parents = [
            {
                'native': entry['native'],
                'name': entry['name'],
                'p_value': entry['p_value'],
                'parents': entry.get('parents', []),
                'source': 'JSON'
            } for entry in sorted_data if 'parents' in entry
        ]
        return terms_with_parents

def build_layers(terms_with_parents):
    """Build layers of GO terms based on parent-child relationships."""
    layers = defaultdict(list)
    term_dict = {term['native']: term for term in terms_with_parents}

    # Initialize layers for all terms
    for term in terms_with_parents:
        term['layer'] = None

    def set_layer(term):
        if term['layer'] is not None:
            return term['layer']
        if not term['parents']:
            term['layer'] = 0
        else:
            parent_layers = [set_layer(term_dict[parent]) for parent in term['parents'] if parent in term_dict]
            term['layer'] = max(parent_layers) + 1 if parent_layers else 0
        layers[term['layer']].append(term)
        return term['layer']

    for term in terms_with_parents:
        set_layer(term)

    return layers

def generate_colors(num_colors):
    """Generate a list of distinct colors using a colormap."""
    cmap = plt.get_cmap('hsv')
    colors = [cmap(i) for i in np.linspace(0, 1, num_colors)]
    return colors

def assign_layer_colors(layers):
    """Assign specific colors to the first two layers and distinct colors to each subsequent layer."""
    num_layers = len(layers)
    colors = generate_colors(num_layers)
    
    custom_colors = ['#0000FF', '#FFFF00']  # Blue for the top layer, yellow for the second layer
    
    for layer_index, terms in layers.items():
        if layer_index < len(custom_colors):
            color = mcolors.hex2color(custom_colors[layer_index])
        else:
            color = colors[layer_index % num_layers]
        hex_color = mcolors.rgb2hex(color[:3])  # Convert RGBA to hex
        for term in terms:
            term['color'] = hex_color
    return layers

def load_obo_go_terms(obo_file_path):
    """Load GO terms from OBO file."""
    go_dag = GODag(obo_file_path)
    return {go_term.id: go_term for go_term in go_dag.values()}

def generate_dag_plots(json_file, temp_obo_path):
    terms_with_parents = load_go_terms_with_parents(json_file)
    if temp_obo_path:
        go_dag = GODag(temp_obo_path)
        print(f"GO DAG loaded with {len(go_dag)} terms.")
        
        # Load OBO GO terms
        obo_go_terms = load_obo_go_terms(temp_obo_path)
        
        # Mark source of each term
        for term in terms_with_parents:
            if term['native'] in obo_go_terms:
                term['source'] = 'OBO'
                
        # Build layers and assign colors
        layers = build_layers(terms_with_parents)
        colored_layers = assign_layer_colors(layers)
        
        # Prepare colors for plotting
        colors = {term['native']: term['color'] for terms in colored_layers.values() for term in terms}
        
        # Generate plot for DAG
        output_folder = 'tcga/nosmoking3_go:bp_ora_dags'
        os.makedirs(output_folder, exist_ok=True)
        file_name = os.path.splitext(os.path.basename(json_file))[0]
        output_file = f"{output_folder}/dag_plot_{file_name}.png"
        title = f""

        # Plotting the GO DAG with specified colors for GO terms
        plot_gos(output_file, set(term['native'] for terms in colored_layers.values() for term in terms), go_dag, title=title, mark_alt_id=True, parentcnt=True, childcnt=True, go_color=colors)
        print(f"DAG plot generated at {output_file}")

# URL to the Gene Ontology OBO file
obo_url = 'http://purl.obolibrary.org/obo/go/go-basic.obo'
temp_obo_path = stream_obo_to_temp_file(obo_url)

# List of JSON files to process
json_files = [
    'tcga/nosmoking3_go_ora/Age_down_go:bp.json',
    'tcga/nosmoking3_go_ora/Age_up_go:bp.json',
    'tcga/nosmoking3_go_ora/TissueXSex_down_go:bp.json',
    'tcga/nosmoking3_go_ora/TissueXSex_up_go:bp.json',
    'tcga/nosmoking3_go_ora/TumorVsNormal_down_go:bp.json',
    'tcga/nosmoking3_go_ora/TumorVsNormal_up_go:bp.json'
]

for json_file in json_files:
    generate_dag_plots(json_file, temp_obo_path)

# Clean up the temporary OBO file
if temp_obo_path:
    os.remove(temp_obo_path)


/tmp/tmpu1lh2vqx.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
GO DAG loaded with 45667 terms.
/tmp/tmpu1lh2vqx.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
   30 usr  92 GOs  WROTE: tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png
DAG plot generated at tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png
/tmp/tmpu1lh2vqx.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
GO DAG loaded with 45667 terms.
/tmp/tmpu1lh2vqx.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
   84 usr 163 GOs  WROTE: tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_up_go:bp.png
DAG plot generated at tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_up_go:bp.png
/tmp/tmpu1lh2vqx.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
GO DAG loaded with 45667 terms.
/tmp/tmpu1lh2vqx.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
    1 usr   3 GOs  WROTE: tcga/nosmoking3_go:bp_ora_dags/dag_plot_TissueXSex_down_go:bp.png
DAG plot generated at tcga/nosmoking3_go:bp_ora_dags/dag_plot_TissueXSex_down_go:bp.png
/tmp/tmpu1lh2vqx.obo: fmt(1.2) rel(2

In [17]:

import requests
import tempfile
import os
import json
from goatools.obo_parser import GODag
from goatools.godag_plot import plot_gos

def stream_obo_to_temp_file(url):
    """Stream OBO file from URL and temporarily save to disk."""
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.obo')
        with open(temp_file.name, 'wb') as f_out:
            for chunk in response.iter_content(chunk_size=128):
                f_out.write(chunk)
        return temp_file.name
    else:
        print(f"Failed to download OBO file: {response.status_code}")
        return None

# URL to the Gene Ontology OBO file
obo_url = 'http://purl.obolibrary.org/obo/go/go-basic.obo'

# Stream OBO file and temporarily save to disk
temp_obo_path = stream_obo_to_temp_file(obo_url)

def generate_dag_plots(json_file, temp_obo_path):
    if temp_obo_path:
        print(f"OBO file temporarily saved to {temp_obo_path}")
        go_dag = GODag(temp_obo_path)
        print(f"GO DAG loaded with {len(go_dag)} terms.")

        # Load significant GO IDs from the JSON result file
        with open(json_file) as f:
            data = json.load(f)
            significant_go_ids = {entry['native'] for entry in data if entry['significant']}

        # Filter and sort GO IDs by relevance and select the top result
        plot_go_ids = {go_id for go_id in significant_go_ids if go_id in go_dag}
        sorted_go_ids = sorted(plot_go_ids, key=lambda x: (go_dag[x].level, go_dag[x].depth), reverse=True)[:2]

        # Plotting the GO DAG
        output_folder = 'tcga/nosmoking3_go:bp_ora_dags'
        os.makedirs(output_folder, exist_ok=True)
        file_name = os.path.splitext(os.path.basename(json_file))[0]
        output_file = f"{output_folder}/dag_plot_{file_name}.png"
        title = f""
        
        plot_gos(output_file, sorted_go_ids, go_dag, title=title, mark_alt_id=True, parentcnt=True, childcnt=True)
        print(f"Top 2 GO DAG plot generated at {output_file}")


# List of JSON files to process
json_files = [
    'tcga/nosmoking3_go_ora/Age_down_go:bp.json'
]

for json_file in json_files:
    generate_dag_plots(json_file, temp_obo_path)

if temp_obo_path:  # Clean up the temporary file
    os.remove(temp_obo_path)

OBO file temporarily saved to /tmp/tmp3x9s552m.obo
/tmp/tmp3x9s552m.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
GO DAG loaded with 45667 terms.
    2 usr  19 GOs  WROTE: tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png
Top 2 GO DAG plot generated at tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png


In [19]:
import requests
import tempfile
import os
import json
from goatools.obo_parser import GODag
from goatools.godag_plot import plot_gos

def stream_obo_to_temp_file(url):
    """Stream OBO file from URL and temporarily save to disk."""
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.obo')
        with open(temp_file.name, 'wb') as f_out:
            for chunk in response.iter_content(chunk_size=128):
                f_out.write(chunk)
        return temp_file.name
    else:
        print(f"Failed to download OBO file: {response.status_code}")
        return None

# URL to the Gene Ontology OBO file
obo_url = 'http://purl.obolibrary.org/obo/go/go-basic.obo'

# Stream OBO file and temporarily save to disk
temp_obo_path = stream_obo_to_temp_file(obo_url)

def generate_dag_plots(json_file, temp_obo_path):
    if temp_obo_path:
        print(f"OBO file temporarily saved to {temp_obo_path}")
        go_dag = GODag(temp_obo_path)
        print(f"GO DAG loaded with {len(go_dag)} terms.")

        # Load significant GO IDs from the JSON result file
        with open(json_file) as f:
            data = json.load(f)
            significant_go_ids = {entry['native'] for entry in data if entry['significant']}

        # Filter and sort GO IDs by relevance and select the top result
        plot_go_ids = {go_id for go_id in significant_go_ids if go_id in go_dag}
        sorted_go_ids = sorted(plot_go_ids, key=lambda x: (go_dag[x].level, go_dag[x].depth), reverse=True)[:2]

        # Create a color mapping for the nodes
        go2color = {go_id: 'red' for go_id in sorted_go_ids}

        # Plotting the GO DAG
        output_folder = 'tcga/nosmoking3_go:bp_ora_dags'
        os.makedirs(output_folder, exist_ok=True)
        file_name = os.path.splitext(os.path.basename(json_file))[0]
        output_file = f"{output_folder}/dag_plot_{file_name}.png"
        title = f""

        plot_gos(output_file, sorted_go_ids, go_dag, title=title, go2color=go2color, mark_alt_id=True, parentcnt=True, childcnt=True)
        print(f"Top 2 GO DAG plot generated at {output_file}")

# List of JSON files to process
json_files = [
    'tcga/nosmoking3_go_ora/Age_down_go:bp.json'
]

for json_file in json_files:
    generate_dag_plots(json_file, temp_obo_path)

if temp_obo_path:  # Clean up the temporary file
    os.remove(temp_obo_path)


OBO file temporarily saved to /tmp/tmpe09n_67x.obo
/tmp/tmpe09n_67x.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
GO DAG loaded with 45667 terms.
    2 usr  19 GOs  WROTE: tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png
Top 2 GO DAG plot generated at tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png


In [20]:
import requests
import tempfile
import os
import json
from goatools.obo_parser import GODag
from goatools.godag_plot import plot_gos

def stream_obo_to_temp_file(url):
    """Stream OBO file from URL and temporarily save to disk."""
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.obo')
        with open(temp_file.name, 'wb') as f_out:
            for chunk in response.iter_content(chunk_size=128):
                f_out.write(chunk)
        return temp_file.name
    else:
        print(f"Failed to download OBO file: {response.status_code}")
        return None

# URL to the Gene Ontology OBO file
obo_url = 'http://purl.obolibrary.org/obo/go/go-basic.obo'

# Stream OBO file and temporarily save to disk
temp_obo_path = stream_obo_to_temp_file(obo_url)

def generate_dag_plots(json_file, temp_obo_path):
    if temp_obo_path:
        print(f"OBO file temporarily saved to {temp_obo_path}")
        go_dag = GODag(temp_obo_path)
        print(f"GO DAG loaded with {len(go_dag)} terms.")

        # Load significant GO IDs from the JSON result file
        with open(json_file) as f:
            data = json.load(f)
            significant_go_ids = {entry['native'] for entry in data if entry['significant']}

        # Filter and sort GO IDs by relevance and select the top result
        plot_go_ids = {go_id for go_id in significant_go_ids if go_id in go_dag}
        sorted_go_ids = sorted(plot_go_ids, key=lambda x: (go_dag[x].level, go_dag[x].depth), reverse=True)[:2]

        # Create a color mapping for the nodes
        go2color = {go_id: 'red' for go_id in sorted_go_ids}

        # Debug: Print the color mapping
        print(f"Color mapping for GO terms: {go2color}")

        # Plotting the GO DAG
        output_folder = 'tcga/nosmoking3_go:bp_ora_dags'
        os.makedirs(output_folder, exist_ok=True)
        file_name = os.path.splitext(os.path.basename(json_file))[0]
        output_file = f"{output_folder}/dag_plot_{file_name}.png"
        title = f""

        # Debug: Verify function call parameters
        print(f"Calling plot_gos with output_file: {output_file}, sorted_go_ids: {sorted_go_ids}, go2color: {go2color}")

        plot_gos(output_file, sorted_go_ids, go_dag, title=title, go2color=go2color, mark_alt_id=True, parentcnt=True, childcnt=True)
        print(f"Top 2 GO DAG plot generated at {output_file}")

# List of JSON files to process
json_files = [
    'tcga/nosmoking3_go_ora/Age_down_go:bp.json'
]

for json_file in json_files:
    generate_dag_plots(json_file, temp_obo_path)

if temp_obo_path:  # Clean up the temporary file
    os.remove(temp_obo_path)


OBO file temporarily saved to /tmp/tmpdnk8fzql.obo
/tmp/tmpdnk8fzql.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
GO DAG loaded with 45667 terms.
Color mapping for GO terms: {'GO:0061644': 'red', 'GO:0034728': 'red'}
Calling plot_gos with output_file: tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png, sorted_go_ids: ['GO:0061644', 'GO:0034728'], go2color: {'GO:0061644': 'red', 'GO:0034728': 'red'}
    2 usr  19 GOs  WROTE: tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png
Top 2 GO DAG plot generated at tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png


In [21]:
import requests
import tempfile
import os
import json
from goatools.obo_parser import GODag
from goatools.godag_plot import plot_gos

def stream_obo_to_temp_file(url):
    """Stream OBO file from URL and temporarily save to disk."""
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.obo')
        with open(temp_file.name, 'wb') as f_out:
            for chunk in response.iter_content(chunk_size=128):
                f_out.write(chunk)
        return temp_file.name
    else:
        print(f"Failed to download OBO file: {response.status_code}")
        return None

# URL to the Gene Ontology OBO file
obo_url = 'http://purl.obolibrary.org/obo/go/go-basic.obo'

# Stream OBO file and temporarily save to disk
temp_obo_path = stream_obo_to_temp_file(obo_url)

def generate_dag_plots(json_file, temp_obo_path):
    if temp_obo_path:
        print(f"OBO file temporarily saved to {temp_obo_path}")
        go_dag = GODag(temp_obo_path)
        print(f"GO DAG loaded with {len(go_dag)} terms.")

        # Load significant GO IDs from the JSON result file
        with open(json_file) as f:
            data = json.load(f)
            significant_go_ids = {entry['native'] for entry in data if entry['significant']}

        # Filter and sort GO IDs by relevance and select the top result
        plot_go_ids = {go_id for go_id in significant_go_ids if go_id in go_dag}
        sorted_go_ids = sorted(plot_go_ids, key=lambda x: (go_dag[x].level, go_dag[x].depth), reverse=True)[:2]

        # Create a color mapping for the nodes
        go2color = {go_id: 'red' for go_id in sorted_go_ids}

        # Debug: Print the color mapping
        print(f"Color mapping for GO terms: {go2color}")

        # Plotting the GO DAG
        output_folder = 'tcga/nosmoking3_go:bp_ora_dags'
        os.makedirs(output_folder, exist_ok=True)
        file_name = os.path.splitext(os.path.basename(json_file))[0]
        output_file = f"{output_folder}/dag_plot_{file_name}.png"
        title = f""

        # Debug: Verify function call parameters
        print(f"Calling plot_gos with output_file: {output_file}, sorted_go_ids: {sorted_go_ids}, go2color: {go2color}")

        plot_gos(output_file, sorted_go_ids, go_dag, title=title, go2color=go2color, mark_alt_id=True, parentcnt=True, childcnt=True)
        print(f"Top 2 GO DAG plot generated at {output_file}")

# List of JSON files to process
json_files = [
    'tcga/nosmoking3_go_ora/Age_down_go:bp.json'
]

for json_file in json_files:
    generate_dag_plots(json_file, temp_obo_path)

if temp_obo_path:  # Clean up the temporary file
    os.remove(temp_obo_path)


OBO file temporarily saved to /tmp/tmpl0sd357w.obo
/tmp/tmpl0sd357w.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
GO DAG loaded with 45667 terms.
Color mapping for GO terms: {'GO:0061644': 'red', 'GO:0034728': 'red'}
Calling plot_gos with output_file: tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png, sorted_go_ids: ['GO:0061644', 'GO:0034728'], go2color: {'GO:0061644': 'red', 'GO:0034728': 'red'}
    2 usr  19 GOs  WROTE: tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png
Top 2 GO DAG plot generated at tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png


In [23]:
import requests
import tempfile
import os
import json
from goatools.obo_parser import GODag
from goatools.godag_plot import plot_gos

def stream_obo_to_temp_file(url):
    """Stream OBO file from URL and temporarily save to disk."""
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.obo')
        with open(temp_file.name, 'wb') as f_out:
            for chunk in response.iter_content(chunk_size=128):
                f_out.write(chunk)
        return temp_file.name
    else:
        print(f"Failed to download OBO file: {response.status_code}")
        return None

# URL to the Gene Ontology OBO file
obo_url = 'http://purl.obolibrary.org/obo/go/go-basic.obo'

# Stream OBO file and temporarily save to disk
temp_obo_path = stream_obo_to_temp_file(obo_url)

def custom_plot_gos(output_file, go_ids, go_dag, go2color=None, **kwargs):
    """Custom function to plot GO terms with colors."""
    from goatools.godag_plot import plot_gos as original_plot_gos
    from goatools.gosubdag.gosubdag import GoSubDag
    from goatools.gosubdag.plot.gosubdag_plot import GoSubDagPlot

    gosubdag = GoSubDag(go_ids, go_dag, prt=None)
    objplt = GoSubDagPlot(gosubdag, **kwargs)
    
    if go2color:
        for go_id, color in go2color.items():
            if go_id in objplt.go2txtcolor:
                objplt.go2txtcolor[go_id] = color
            if go_id in objplt.go2color:
                objplt.go2color[go_id] = color
    
    objplt.plt_dag(output_file, **kwargs)

def generate_dag_plots(json_file, temp_obo_path):
    if temp_obo_path:
        print(f"OBO file temporarily saved to {temp_obo_path}")
        go_dag = GODag(temp_obo_path)
        print(f"GO DAG loaded with {len(go_dag)} terms.")

        # Load significant GO IDs from the JSON result file
        with open(json_file) as f:
            data = json.load(f)
            significant_go_ids = {entry['native'] for entry in data if entry['significant']}

        # Filter and sort GO IDs by relevance and select the top result
        plot_go_ids = {go_id for go_id in significant_go_ids if go_id in go_dag}
        sorted_go_ids = sorted(plot_go_ids, key=lambda x: (go_dag[x].level, go_dag[x].depth), reverse=True)[:2]

        # Create a color mapping for the nodes
        go2color = {go_id: 'red' for go_id in sorted_go_ids}

        # Debug: Print the color mapping
        print(f"Color mapping for GO terms: {go2color}")

        # Plotting the GO DAG
        output_folder = 'tcga/nosmoking3_go:bp_ora_dags'
        os.makedirs(output_folder, exist_ok=True)
        file_name = os.path.splitext(os.path.basename(json_file))[0]
        output_file = f"{output_folder}/dag_plot_{file_name}.png"
        title = f""

        # Debug: Verify function call parameters
        print(f"Calling custom_plot_gos with output_file: {output_file}, sorted_go_ids: {sorted_go_ids}, go2color: {go2color}")

        custom_plot_gos(output_file, sorted_go_ids, go_dag, title=title, go2color=go2color, mark_alt_id=True, parentcnt=True, childcnt=True)
        print(f"Top 2 GO DAG plot generated at {output_file}")

# List of JSON files to process
json_files = [
    'tcga/nosmoking3_go_ora/Age_down_go:bp.json'
]

for json_file in json_files:
    generate_dag_plots(json_file, temp_obo_path)

if temp_obo_path:  # Clean up the temporary file
    os.remove(temp_obo_path)


OBO file temporarily saved to /tmp/tmphm17w0zc.obo
/tmp/tmphm17w0zc.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
GO DAG loaded with 45667 terms.
Color mapping for GO terms: {'GO:0061644': 'red', 'GO:0034728': 'red'}
Calling custom_plot_gos with output_file: tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png, sorted_go_ids: ['GO:0061644', 'GO:0034728'], go2color: {'GO:0061644': 'red', 'GO:0034728': 'red'}


AttributeError: 'GoSubDagPlot' object has no attribute 'go2txtcolor'

In [24]:
import requests
import tempfile
import os
import json
from goatools.obo_parser import GODag
from goatools.gosubdag.gosubdag import GoSubDag
from goatools.gosubdag.plot.gosubdag_plot import GoSubDagPlot
import matplotlib.pyplot as plt

def stream_obo_to_temp_file(url):
    """Stream OBO file from URL and temporarily save to disk."""
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.obo')
        with open(temp_file.name, 'wb') as f_out:
            for chunk in response.iter_content(chunk_size=128):
                f_out.write(chunk)
        return temp_file.name
    else:
        print(f"Failed to download OBO file: {response.status_code}")
        return None

# URL to the Gene Ontology OBO file
obo_url = 'http://purl.obolibrary.org/obo/go/go-basic.obo'

# Stream OBO file and temporarily save to disk
temp_obo_path = stream_obo_to_temp_file(obo_url)

def custom_plot_gos(output_file, go_ids, go_dag, go2color=None, **kwargs):
    """Custom function to plot GO terms with colors."""
    gosubdag = GoSubDag(go_ids, go_dag, prt=None)
    objplt = GoSubDagPlot(gosubdag, **kwargs)

    if go2color:
        for go_id, color in go2color.items():
            if go_id in objplt.gosubdag.go2obj:
                objplt.gosubdag.go2obj[go_id].color = color

    objplt.plt_dag(output_file, **kwargs)

def generate_dag_plots(json_file, temp_obo_path):
    if temp_obo_path:
        print(f"OBO file temporarily saved to {temp_obo_path}")
        go_dag = GODag(temp_obo_path)
        print(f"GO DAG loaded with {len(go_dag)} terms.")

        # Load significant GO IDs from the JSON result file
        with open(json_file) as f:
            data = json.load(f)
            significant_go_ids = {entry['native'] for entry in data if entry['significant']}

        # Filter and sort GO IDs by relevance and select the top result
        plot_go_ids = {go_id for go_id in significant_go_ids if go_id in go_dag}
        sorted_go_ids = sorted(plot_go_ids, key=lambda x: (go_dag[x].level, go_dag[x].depth), reverse=True)[:2]

        # Create a color mapping for the nodes
        go2color = {go_id: 'red' for go_id in sorted_go_ids}

        # Debug: Print the color mapping
        print(f"Color mapping for GO terms: {go2color}")

        # Plotting the GO DAG
        output_folder = 'tcga/nosmoking3_go:bp_ora_dags'
        os.makedirs(output_folder, exist_ok=True)
        file_name = os.path.splitext(os.path.basename(json_file))[0]
        output_file = f"{output_folder}/dag_plot_{file_name}.png"
        title = f""

        # Debug: Verify function call parameters
        print(f"Calling custom_plot_gos with output_file: {output_file}, sorted_go_ids: {sorted_go_ids}, go2color: {go2color}")

        custom_plot_gos(output_file, sorted_go_ids, go_dag, title=title, go2color=go2color, mark_alt_id=True, parentcnt=True, childcnt=True)
        print(f"Top 2 GO DAG plot generated at {output_file}")

# List of JSON files to process
json_files = [
    'tcga/nosmoking3_go_ora/Age_down_go:bp.json'
]

for json_file in json_files:
    generate_dag_plots(json_file, temp_obo_path)

if temp_obo_path:  # Clean up the temporary file
    os.remove(temp_obo_path)


OBO file temporarily saved to /tmp/tmpelp_jj5m.obo
/tmp/tmpelp_jj5m.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
GO DAG loaded with 45667 terms.
Color mapping for GO terms: {'GO:0061644': 'red', 'GO:0034728': 'red'}
Calling custom_plot_gos with output_file: tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png, sorted_go_ids: ['GO:0061644', 'GO:0034728'], go2color: {'GO:0061644': 'red', 'GO:0034728': 'red'}


TypeError: GoSubDagPlot.plt_dag() got an unexpected keyword argument 'title'

In [25]:
import requests
import tempfile
import os
import json
from goatools.obo_parser import GODag
from goatools.gosubdag.gosubdag import GoSubDag
from goatools.gosubdag.plot.gosubdag_plot import GoSubDagPlot
import matplotlib.pyplot as plt

def stream_obo_to_temp_file(url):
    """Stream OBO file from URL and temporarily save to disk."""
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.obo')
        with open(temp_file.name, 'wb') as f_out:
            for chunk in response.iter_content(chunk_size=128):
                f_out.write(chunk)
        return temp_file.name
    else:
        print(f"Failed to download OBO file: {response.status_code}")
        return None

# URL to the Gene Ontology OBO file
obo_url = 'http://purl.obolibrary.org/obo/go/go-basic.obo'

# Stream OBO file and temporarily save to disk
temp_obo_path = stream_obo_to_temp_file(obo_url)

def custom_plot_gos(output_file, go_ids, go_dag, go2color=None, **kwargs):
    """Custom function to plot GO terms with colors."""
    gosubdag = GoSubDag(go_ids, go_dag, prt=None)
    objplt = GoSubDagPlot(gosubdag, go2color=go2color, **kwargs)

    # Plot DAG using the specified engine (default is 'pydot')
    objplt.plt(output_file)

def generate_dag_plots(json_file, temp_obo_path):
    if temp_obo_path:
        print(f"OBO file temporarily saved to {temp_obo_path}")
        go_dag = GODag(temp_obo_path)
        print(f"GO DAG loaded with {len(go_dag)} terms.")

        # Load significant GO IDs from the JSON result file
        with open(json_file) as f:
            data = json.load(f)
            significant_go_ids = {entry['native'] for entry in data if entry['significant']}

        # Filter and sort GO IDs by relevance and select the top result
        plot_go_ids = {go_id for go_id in significant_go_ids if go_id in go_dag}
        sorted_go_ids = sorted(plot_go_ids, key=lambda x: (go_dag[x].level, go_dag[x].depth), reverse=True)[:2]

        # Create a color mapping for the nodes
        go2color = {go_id: 'red' for go_id in sorted_go_ids}

        # Debug: Print the color mapping
        print(f"Color mapping for GO terms: {go2color}")

        # Plotting the GO DAG
        output_folder = 'tcga/nosmoking3_go:bp_ora_dags'
        os.makedirs(output_folder, exist_ok=True)
        file_name = os.path.splitext(os.path.basename(json_file))[0]
        output_file = f"{output_folder}/dag_plot_{file_name}.png"
        title = f""

        # Debug: Verify function call parameters
        print(f"Calling custom_plot_gos with output_file: {output_file}, sorted_go_ids: {sorted_go_ids}, go2color: {go2color}")

        custom_plot_gos(output_file, sorted_go_ids, go_dag, title=title, go2color=go2color, mark_alt_id=True, parentcnt=True, childcnt=True)
        print(f"Top 2 GO DAG plot generated at {output_file}")

# List of JSON files to process
json_files = [
    'tcga/nosmoking3_go_ora/Age_down_go:bp.json'
]

for json_file in json_files:
    generate_dag_plots(json_file, temp_obo_path)

if temp_obo_path:  # Clean up the temporary file
    os.remove(temp_obo_path)


OBO file temporarily saved to /tmp/tmpew6w_ldb.obo
/tmp/tmpew6w_ldb.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
GO DAG loaded with 45667 terms.
Color mapping for GO terms: {'GO:0061644': 'red', 'GO:0034728': 'red'}
Calling custom_plot_gos with output_file: tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png, sorted_go_ids: ['GO:0061644', 'GO:0034728'], go2color: {'GO:0061644': 'red', 'GO:0034728': 'red'}


AttributeError: 'GoSubDagPlot' object has no attribute 'plt'

In [26]:
import requests
import tempfile
import os
import json
from goatools.obo_parser import GODag
from goatools.gosubdag.gosubdag import GoSubDag
from goatools.gosubdag.plot.gosubdag_plot import GoSubDagPlot

def stream_obo_to_temp_file(url):
    """Stream OBO file from URL and temporarily save to disk."""
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.obo')
        with open(temp_file.name, 'wb') as f_out:
            for chunk in response.iter_content(chunk_size=128):
                f_out.write(chunk)
        return temp_file.name
    else:
        print(f"Failed to download OBO file: {response.status_code}")
        return None

# URL to the Gene Ontology OBO file
obo_url = 'http://purl.obolibrary.org/obo/go/go-basic.obo'

# Stream OBO file and temporarily save to disk
temp_obo_path = stream_obo_to_temp_file(obo_url)

def custom_plot_gos(output_file, go_ids, go_dag, go2color=None):
    """Custom function to plot GO terms with colors."""
    gosubdag = GoSubDag(go_ids, go_dag, prt=None)
    objplt = GoSubDagPlot(gosubdag)

    # Apply custom colors
    if go2color:
        for go_id, color in go2color.items():
            if go_id in objplt.gosubdag.go2obj:
                objplt.gosubdag.go2obj[go_id].color = color

    # Plot DAG
    objplt.plt_dag(output_file)

def generate_dag_plots(json_file, temp_obo_path):
    if temp_obo_path:
        print(f"OBO file temporarily saved to {temp_obo_path}")
        go_dag = GODag(temp_obo_path)
        print(f"GO DAG loaded with {len(go_dag)} terms.")

        # Load significant GO IDs from the JSON result file
        with open(json_file) as f:
            data = json.load(f)
            significant_go_ids = {entry['native'] for entry in data if entry['significant']}

        # Filter and sort GO IDs by relevance and select the top result
        plot_go_ids = {go_id for go_id in significant_go_ids if go_id in go_dag}
        sorted_go_ids = sorted(plot_go_ids, key=lambda x: (go_dag[x].level, go_dag[x].depth), reverse=True)[:2]

        # Create a color mapping for the nodes
        go2color = {go_id: 'red' for go_id in sorted_go_ids}

        # Debug: Print the color mapping
        print(f"Color mapping for GO terms: {go2color}")

        # Plotting the GO DAG
        output_folder = 'tcga/nosmoking3_go:bp_ora_dags'
        os.makedirs(output_folder, exist_ok=True)
        file_name = os.path.splitext(os.path.basename(json_file))[0]
        output_file = f"{output_folder}/dag_plot_{file_name}.png"

        # Debug: Verify function call parameters
        print(f"Calling custom_plot_gos with output_file: {output_file}, sorted_go_ids: {sorted_go_ids}, go2color: {go2color}")

        custom_plot_gos(output_file, sorted_go_ids, go_dag, go2color=go2color)
        print(f"Top 2 GO DAG plot generated at {output_file}")

# List of JSON files to process
json_files = [
    'tcga/nosmoking3_go_ora/Age_down_go:bp.json'
]

for json_file in json_files:
    generate_dag_plots(json_file, temp_obo_path)

if temp_obo_path:  # Clean up the temporary file
    os.remove(temp_obo_path)


OBO file temporarily saved to /tmp/tmp_e2mrkxa.obo
/tmp/tmp_e2mrkxa.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
GO DAG loaded with 45667 terms.
Color mapping for GO terms: {'GO:0061644': 'red', 'GO:0034728': 'red'}
Calling custom_plot_gos with output_file: tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png, sorted_go_ids: ['GO:0061644', 'GO:0034728'], go2color: {'GO:0061644': 'red', 'GO:0034728': 'red'}
    2 usr  19 GOs  WROTE: tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png
Top 2 GO DAG plot generated at tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png


In [31]:
import requests
import tempfile
import os
import json
from goatools.obo_parser import GODag
from goatools.gosubdag.gosubdag import GoSubDag
from goatools.gosubdag.plot.gosubdag_plot import GoSubDagPlot

def stream_obo_to_temp_file(url):
    """Stream OBO file from URL and temporarily save to disk."""
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.obo')
        with open(temp_file.name, 'wb') as f_out:
            for chunk in response.iter_content(chunk_size=128):
                f_out.write(chunk)
        return temp_file.name
    else:
        print(f"Failed to download OBO file: {response.status_code}")
        return None

# URL to the Gene Ontology OBO file
obo_url = 'http://purl.obolibrary.org/obo/go/go-basic.obo'

# Stream OBO file and temporarily save to disk
temp_obo_path = stream_obo_to_temp_file(obo_url)

def custom_plot_gos(output_file, go_ids, go_dag, go2color=None):
    """Custom function to plot GO terms with colors."""
    gosubdag = GoSubDag(go_ids, go_dag, prt=None)
    objplt = GoSubDagPlot(gosubdag, go2color=go2color)

    # Plot DAG
    objplt.plt_dag(output_file)

def generate_dag_plots(json_file, temp_obo_path):
    if temp_obo_path:
        print(f"OBO file temporarily saved to {temp_obo_path}")
        go_dag = GODag(temp_obo_path)
        print(f"GO DAG loaded with {len(go_dag)} terms.")

        # Load significant GO IDs from the JSON result file
        with open(json_file) as f:
            data = json.load(f)
            significant_go_ids = {entry['native'] for entry in data if entry['significant']}

        # Filter and sort GO IDs by relevance and select the top result
        plot_go_ids = {go_id for go_id in significant_go_ids if go_id in go_dag}
        sorted_go_ids = sorted(plot_go_ids, key=lambda x: (go_dag[x].level, go_dag[x].depth), reverse=True)[:2]

        # Create a color mapping for the nodes
        go2color = {go_id: 'blue' for go_id in sorted_go_ids}

        # Debug: Print the color mapping
        print(f"Color mapping for GO terms: {go2color}")

        # Plotting the GO DAG
        output_folder = 'tcga/nosmoking3_go:bp_ora_dags'
        os.makedirs(output_folder, exist_ok=True)
        file_name = os.path.splitext(os.path.basename(json_file))[0]
        output_file = f"{output_folder}/dag_plot_{file_name}.png"

        # Debug: Verify function call parameters
        print(f"Calling custom_plot_gos with output_file: {output_file}, sorted_go_ids: {sorted_go_ids}, go2color: {go2color}")

        custom_plot_gos(output_file, sorted_go_ids, go_dag, go2color=go2color)
        print(f"Top 2 GO DAG plot generated at {output_file}")

# List of JSON files to process
json_files = [
    'tcga/nosmoking3_go_ora/Age_down_go:bp.json'
]

for json_file in json_files:
    generate_dag_plots(json_file, temp_obo_path)

if temp_obo_path:  # Clean up the temporary file
    os.remove(temp_obo_path)


OBO file temporarily saved to /tmp/tmp2c7pcm3e.obo
/tmp/tmp2c7pcm3e.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
GO DAG loaded with 45667 terms.
Color mapping for GO terms: {'GO:0061644': 'blue', 'GO:0034728': 'blue'}
Calling custom_plot_gos with output_file: tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png, sorted_go_ids: ['GO:0061644', 'GO:0034728'], go2color: {'GO:0061644': 'blue', 'GO:0034728': 'blue'}
    2 usr  19 GOs  WROTE: tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png
Top 2 GO DAG plot generated at tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png


In [32]:
import requests
import tempfile
import os
import json
from goatools.obo_parser import GODag
from goatools.gosubdag.gosubdag import GoSubDag
from goatools.gosubdag.plot.gosubdag_plot import GoSubDagPlot

def stream_obo_to_temp_file(url):
    """Stream OBO file from URL and temporarily save to disk."""
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.obo')
        with open(temp_file.name, 'wb') as f_out:
            for chunk in response.iter_content(chunk_size=128):
                f_out.write(chunk)
        return temp_file.name
    else:
        print(f"Failed to download OBO file: {response.status_code}")
        return None

# URL to the Gene Ontology OBO file
obo_url = 'http://purl.obolibrary.org/obo/go/go-basic.obo'

# Stream OBO file and temporarily save to disk
temp_obo_path = stream_obo_to_temp_file(obo_url)

def custom_plot_gos(output_file, go_ids, go_dag, go2color=None):
    """Custom function to plot GO terms with colors."""
    gosubdag = GoSubDag(go_ids, go_dag, prt=None)
    objplt = GoSubDagPlot(gosubdag, go2color=go2color)

    # Plot DAG
    objplt.plt_dag(output_file)

def generate_dag_plots(json_file, temp_obo_path):
    if temp_obo_path:
        print(f"OBO file temporarily saved to {temp_obo_path}")
        go_dag = GODag(temp_obo_path)
        print(f"GO DAG loaded with {len(go_dag)} terms.")

        # Load significant GO IDs from the JSON result file
        with open(json_file) as f:
            data = json.load(f)
            significant_go_ids = {entry['native'] for entry in data if entry['significant']}

        # Filter and sort GO IDs by relevance
        plot_go_ids = {go_id for go_id in significant_go_ids if go_id in go_dag}

        # Sort GO IDs by their level and depth
        sorted_go_ids = sorted(plot_go_ids, key=lambda x: (go_dag[x].level, go_dag[x].depth), reverse=True)

        # Create a color mapping for the nodes
        go2color = {}
        for go_id in sorted_go_ids:
            if go_dag[go_id].level == 1:
                go2color[go_id] = 'blue'
            elif go_dag[go_id].level == 2:
                go2color[go_id] = 'green'
            else:
                go2color[go_id] = 'yellow'  # Default color for other levels

        # Debug: Print the color mapping
        print(f"Color mapping for GO terms: {go2color}")

        # Plotting the GO DAG
        output_folder = 'tcga/nosmoking3_go:bp_ora_dags'
        os.makedirs(output_folder, exist_ok=True)
        file_name = os.path.splitext(os.path.basename(json_file))[0]
        output_file = f"{output_folder}/dag_plot_{file_name}.png"

        # Debug: Verify function call parameters
        print(f"Calling custom_plot_gos with output_file: {output_file}, sorted_go_ids: {sorted_go_ids}, go2color: {go2color}")

        custom_plot_gos(output_file, sorted_go_ids, go_dag, go2color=go2color)
        print(f"Top 2 GO DAG plot generated at {output_file}")

# List of JSON files to process
json_files = [
    'tcga/nosmoking3_go_ora/Age_down_go:bp.json'
]

for json_file in json_files:
    generate_dag_plots(json_file, temp_obo_path)

if temp_obo_path:  # Clean up the temporary file
    os.remove(temp_obo_path)


OBO file temporarily saved to /tmp/tmpj6ehv36v.obo
/tmp/tmpj6ehv36v.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
GO DAG loaded with 45667 terms.
Color mapping for GO terms: {'GO:0061644': 'yellow', 'GO:0034728': 'yellow', 'GO:0045653': 'yellow', 'GO:0006334': 'yellow', 'GO:0045910': 'yellow', 'GO:0006338': 'yellow', 'GO:0071168': 'yellow', 'GO:0071459': 'yellow', 'GO:0045652': 'yellow', 'GO:0045638': 'yellow', 'GO:0006396': 'yellow', 'GO:0065004': 'yellow', 'GO:0034502': 'yellow', 'GO:0032200': 'yellow', 'GO:0006325': 'yellow', 'GO:0045637': 'yellow', 'GO:0016070': 'yellow', 'GO:0030219': 'yellow', 'GO:0010467': 'yellow', 'GO:0065003': 'yellow', 'GO:0051276': 'yellow', 'GO:0071824': 'yellow', 'GO:0002227': 'yellow', 'GO:0090304': 'yellow', 'GO:0043933': 'yellow', 'GO:0022607': 'yellow', 'GO:0009059': 'yellow', 'GO:0002385': 'yellow', 'GO:0044085': 'yellow', 'GO:0002251': 'yellow'}
Calling custom_plot_gos with output_file: tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png, sorted

In [33]:
import requests
import tempfile
import os
import json
from goatools.obo_parser import GODag
from goatools.gosubdag.gosubdag import GoSubDag
from goatools.gosubdag.plot.gosubdag_plot import GoSubDagPlot

def stream_obo_to_temp_file(url):
    """Stream OBO file from URL and temporarily save to disk."""
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.obo')
        with open(temp_file.name, 'wb') as f_out:
            for chunk in response.iter_content(chunk_size=128):
                f_out.write(chunk)
        return temp_file.name
    else:
        print(f"Failed to download OBO file: {response.status_code}")
        return None

# URL to the Gene Ontology OBO file
obo_url = 'http://purl.obolibrary.org/obo/go/go-basic.obo'

# Stream OBO file and temporarily save to disk
temp_obo_path = stream_obo_to_temp_file(obo_url)

def custom_plot_gos(output_file, go_ids, go_dag, go2color=None):
    """Custom function to plot GO terms with colors."""
    gosubdag = GoSubDag(go_ids, go_dag, prt=None)
    objplt = GoSubDagPlot(gosubdag, go2color=go2color)

    # Plot DAG
    objplt.plt_dag(output_file)

def generate_dag_plots(json_file, temp_obo_path):
    if temp_obo_path:
        print(f"OBO file temporarily saved to {temp_obo_path}")
        go_dag = GODag(temp_obo_path)
        print(f"GO DAG loaded with {len(go_dag)} terms.")

        # Load significant GO IDs from the JSON result file
        with open(json_file) as f:
            data = json.load(f)
            significant_go_ids = {entry['native'] for entry in data if entry['significant']}

        # Filter and sort GO IDs by relevance and select the top result
        plot_go_ids = {go_id for go_id in significant_go_ids if go_id in go_dag}
        sorted_go_ids = sorted(plot_go_ids, key=lambda x: (go_dag[x].level, go_dag[x].depth), reverse=True)[:2]

        # Create a color mapping for the nodes
        go2color = {}
        if len(sorted_go_ids) > 0:
            go2color[sorted_go_ids[0]] = 'blue'  # Top layer
        if len(sorted_go_ids) > 1:
            go2color[sorted_go_ids[1]] = 'green'  # Second layer

        # Debug: Print the color mapping
        print(f"Color mapping for GO terms: {go2color}")

        # Plotting the GO DAG
        output_folder = 'tcga/nosmoking3_go:bp_ora_dags'
        os.makedirs(output_folder, exist_ok=True)
        file_name = os.path.splitext(os.path.basename(json_file))[0]
        output_file = f"{output_folder}/dag_plot_{file_name}.png"

        # Debug: Verify function call parameters
        print(f"Calling custom_plot_gos with output_file: {output_file}, sorted_go_ids: {sorted_go_ids}, go2color: {go2color}")

        custom_plot_gos(output_file, sorted_go_ids, go_dag, go2color=go2color)
        print(f"Top 2 GO DAG plot generated at {output_file}")

# List of JSON files to process
json_files = [
    'tcga/nosmoking3_go_ora/Age_down_go:bp.json'
]

for json_file in json_files:
    generate_dag_plots(json_file, temp_obo_path)

if temp_obo_path:  # Clean up the temporary file
    os.remove(temp_obo_path)


OBO file temporarily saved to /tmp/tmp_yk04g20.obo
/tmp/tmp_yk04g20.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
GO DAG loaded with 45667 terms.
Color mapping for GO terms: {'GO:0061644': 'blue', 'GO:0034728': 'green'}
Calling custom_plot_gos with output_file: tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png, sorted_go_ids: ['GO:0061644', 'GO:0034728'], go2color: {'GO:0061644': 'blue', 'GO:0034728': 'green'}
    2 usr  19 GOs  WROTE: tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png
Top 2 GO DAG plot generated at tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png


In [35]:
import requests
import tempfile
import os
import json
from goatools.obo_parser import GODag
from goatools.gosubdag.gosubdag import GoSubDag
from goatools.gosubdag.plot.gosubdag_plot import GoSubDagPlot

def stream_obo_to_temp_file(url):
    """Stream OBO file from URL and temporarily save to disk."""
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.obo')
        with open(temp_file.name, 'wb') as f_out:
            for chunk in response.iter_content(chunk_size=128):
                f_out.write(chunk)
        return temp_file.name
    else:
        print(f"Failed to download OBO file: {response.status_code}")
        return None

# URL to the Gene Ontology OBO file
obo_url = 'http://purl.obolibrary.org/obo/go/go-basic.obo'

# Stream OBO file and temporarily save to disk
temp_obo_path = stream_obo_to_temp_file(obo_url)

def custom_plot_gos(output_file, go_ids, go_dag, go2color=None):
    """Custom function to plot GO terms with colors."""
    gosubdag = GoSubDag(go_ids, go_dag, prt=None)
    objplt = GoSubDagPlot(gosubdag, go2color=go2color)

    # Plot DAG
    objplt.plt_dag(output_file)

def generate_dag_plots(json_file, temp_obo_path):
    if temp_obo_path:
        print(f"OBO file temporarily saved to {temp_obo_path}")
        go_dag = GODag(temp_obo_path)
        print(f"GO DAG loaded with {len(go_dag)} terms.")

        # Load significant GO IDs and their p-values from the JSON result file
        with open(json_file) as f:
            data = json.load(f)
            go_entries = [entry for entry in data if entry['significant']]
            # Sort entries by p-value
            sorted_go_entries = sorted(go_entries, key=lambda x: x['p_value'])

        # Create a color mapping for the nodes
        go2color = {}
        for i, entry in enumerate(sorted_go_entries):
            go_id = entry['native']
            if i < 3:
                go2color[go_id] = 'lightgreen'  # Top 3 lowest p-value
            else:
                go2color[go_id] = 'lightyellow'  # The rest

        # Filter and sort GO IDs by relevance and select the top result
        plot_go_ids = {entry['native'] for entry in sorted_go_entries if entry['native'] in go_dag}

        # Debug: Print the color mapping
        print(f"Color mapping for GO terms: {go2color}")

        # Plotting the GO DAG
        output_folder = 'tcga/nosmoking3_go:bp_ora_dags'
        os.makedirs(output_folder, exist_ok=True)
        file_name = os.path.splitext(os.path.basename(json_file))[0]
        output_file = f"{output_folder}/dag_plot_{file_name}.png"

        # Debug: Verify function call parameters
        print(f"Calling custom_plot_gos with output_file: {output_file}, plot_go_ids: {plot_go_ids}, go2color: {go2color}")

        custom_plot_gos(output_file, plot_go_ids, go_dag, go2color=go2color)
        print(f"GO DAG plot generated at {output_file}")

# List of JSON files to process
json_files = [
    'tcga/nosmoking3_go_ora/Age_down_go:bp.json'
]

for json_file in json_files:
    generate_dag_plots(json_file, temp_obo_path)

if temp_obo_path:  # Clean up the temporary file
    os.remove(temp_obo_path)


OBO file temporarily saved to /tmp/tmppxd_j2mf.obo
/tmp/tmppxd_j2mf.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
GO DAG loaded with 45667 terms.
Color mapping for GO terms: {'GO:0006334': 'lightgreen', 'GO:0034728': 'lightgreen', 'GO:0065004': 'lightgreen', 'GO:0061644': 'lightyellow', 'GO:0006338': 'lightyellow', 'GO:0006396': 'lightyellow', 'GO:0006325': 'lightyellow', 'GO:0071459': 'lightyellow', 'GO:0071168': 'lightyellow', 'GO:0071824': 'lightyellow', 'GO:0045653': 'lightyellow', 'GO:0034502': 'lightyellow', 'GO:0045652': 'lightyellow', 'GO:0065003': 'lightyellow', 'GO:0032200': 'lightyellow', 'GO:0043933': 'lightyellow', 'GO:0030219': 'lightyellow', 'GO:0051276': 'lightyellow', 'GO:0022607': 'lightyellow', 'GO:0045638': 'lightyellow', 'GO:0044085': 'lightyellow', 'GO:0002385': 'lightyellow', 'GO:0016070': 'lightyellow', 'GO:0002251': 'lightyellow', 'GO:0010467': 'lightyellow', 'GO:0002227': 'lightyellow', 'GO:0090304': 'lightyellow', 'GO:0009059': 'lightyellow', 'GO:0045637': 'ligh

In [36]:
import requests
import tempfile
import os
import json
from goatools.obo_parser import GODag
from goatools.gosubdag.gosubdag import GoSubDag
from goatools.gosubdag.plot.gosubdag_plot import GoSubDagPlot

def stream_obo_to_temp_file(url):
    """Stream OBO file from URL and temporarily save to disk."""
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.obo')
        with open(temp_file.name, 'wb') as f_out:
            for chunk in response.iter_content(chunk_size=128):
                f_out.write(chunk)
        return temp_file.name
    else:
        print(f"Failed to download OBO file: {response.status_code}")
        return None

# URL to the Gene Ontology OBO file
obo_url = 'http://purl.obolibrary.org/obo/go/go-basic.obo'

# Stream OBO file and temporarily save to disk
temp_obo_path = stream_obo_to_temp_file(obo_url)

def custom_plot_gos(output_file, go_ids, go_dag, go2color=None):
    """Custom function to plot GO terms with colors."""
    gosubdag = GoSubDag(go_ids, go_dag, prt=None)
    objplt = GoSubDagPlot(gosubdag, go2color=go2color)

    # Plot DAG
    objplt.plt_dag(output_file)

def generate_dag_plots(json_file, temp_obo_path):
    if temp_obo_path:
        print(f"OBO file temporarily saved to {temp_obo_path}")
        go_dag = GODag(temp_obo_path)
        print(f"GO DAG loaded with {len(go_dag)} terms.")

        # Load significant GO IDs and their p-values from the JSON result file
        with open(json_file) as f:
            data = json.load(f)
            go_entries = [entry for entry in data if entry['significant']]
            # Sort entries by p-value
            sorted_go_entries = sorted(go_entries, key=lambda x: x['p_value'])

        # Create a color mapping for the nodes
        go2color = {}
        for i, entry in enumerate(sorted_go_entries):
            go_id = entry['native']
            if i < 2:
                go2color[go_id] = 'lightgreen'  # Top 2 lowest p-value
            else:
                go2color[go_id] = 'lightyellow'  # The rest

        # Filter and sort GO IDs by relevance and select the top result
        plot_go_ids = {entry['native'] for entry in sorted_go_entries if entry['native'] in go_dag}

        # Debug: Print the color mapping
        print(f"Color mapping for GO terms: {go2color}")

        # Plotting the GO DAG
        output_folder = 'tcga/nosmoking3_go:bp_ora_dags'
        os.makedirs(output_folder, exist_ok=True)
        file_name = os.path.splitext(os.path.basename(json_file))[0]
        output_file = f"{output_folder}/dag_plot_{file_name}.png"

        # Debug: Verify function call parameters
        print(f"Calling custom_plot_gos with output_file: {output_file}, plot_go_ids: {plot_go_ids}, go2color: {go2color}")

        custom_plot_gos(output_file, plot_go_ids, go_dag, go2color=go2color)
        print(f"GO DAG plot generated at {output_file}")

# List of JSON files to process
json_files = [
    'tcga/nosmoking3_go_ora/Age_down_go:bp.json'
]

for json_file in json_files:
    generate_dag_plots(json_file, temp_obo_path)

if temp_obo_path:  # Clean up the temporary file
    os.remove(temp_obo_path)


OBO file temporarily saved to /tmp/tmpnz9r2q6_.obo
/tmp/tmpnz9r2q6_.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
GO DAG loaded with 45667 terms.
Color mapping for GO terms: {'GO:0006334': 'lightgreen', 'GO:0034728': 'lightgreen', 'GO:0065004': 'lightyellow', 'GO:0061644': 'lightyellow', 'GO:0006338': 'lightyellow', 'GO:0006396': 'lightyellow', 'GO:0006325': 'lightyellow', 'GO:0071459': 'lightyellow', 'GO:0071168': 'lightyellow', 'GO:0071824': 'lightyellow', 'GO:0045653': 'lightyellow', 'GO:0034502': 'lightyellow', 'GO:0045652': 'lightyellow', 'GO:0065003': 'lightyellow', 'GO:0032200': 'lightyellow', 'GO:0043933': 'lightyellow', 'GO:0030219': 'lightyellow', 'GO:0051276': 'lightyellow', 'GO:0022607': 'lightyellow', 'GO:0045638': 'lightyellow', 'GO:0044085': 'lightyellow', 'GO:0002385': 'lightyellow', 'GO:0016070': 'lightyellow', 'GO:0002251': 'lightyellow', 'GO:0010467': 'lightyellow', 'GO:0002227': 'lightyellow', 'GO:0090304': 'lightyellow', 'GO:0009059': 'lightyellow', 'GO:0045637': 'lig

In [37]:
import requests
import tempfile
import os
import json
from goatools.obo_parser import GODag
from goatools.gosubdag.gosubdag import GoSubDag
from goatools.gosubdag.plot.gosubdag_plot import GoSubDagPlot

def stream_obo_to_temp_file(url):
    """Stream OBO file from URL and temporarily save to disk."""
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.obo')
        with open(temp_file.name, 'wb') as f_out:
            for chunk in response.iter_content(chunk_size=128):
                f_out.write(chunk)
        return temp_file.name
    else:
        print(f"Failed to download OBO file: {response.status_code}")
        return None

# URL to the Gene Ontology OBO file
obo_url = 'http://purl.obolibrary.org/obo/go/go-basic.obo'

# Stream OBO file and temporarily save to disk
temp_obo_path = stream_obo_to_temp_file(obo_url)

def custom_plot_gos(output_file, go_ids, go_dag, go2color=None):
    """Custom function to plot GO terms with colors."""
    gosubdag = GoSubDag(go_ids, go_dag, prt=None)
    objplt = GoSubDagPlot(gosubdag, go2color=go2color)

    # Plot DAG
    objplt.plt_dag(output_file)

def generate_dag_plots(json_file, temp_obo_path):
    if temp_obo_path:
        print(f"OBO file temporarily saved to {temp_obo_path}")
        go_dag = GODag(temp_obo_path)
        print(f"GO DAG loaded with {len(go_dag)} terms.")

        # Load significant GO IDs and their p-values from the JSON result file
        with open(json_file) as f:
            data = json.load(f)
            go_entries = [entry for entry in data if entry['significant']]
            # Sort entries by p-value
            sorted_go_entries = sorted(go_entries, key=lambda x: x['p_value'])

        # Filter and sort GO IDs by relevance and select the top 2 based on level and depth
        plot_go_ids = {entry['native'] for entry in sorted_go_entries if entry['native'] in go_dag}
        sorted_go_ids = sorted(plot_go_ids, key=lambda x: (go_dag[x].level, go_dag[x].depth), reverse=True)[:2]

        # Create a color mapping for the nodes
        go2color = {}
        for go_id in sorted_go_ids:
            go2color[go_id] = 'lightgreen'

        for go_id in plot_go_ids:
            if go_id not in go2color:
                go2color[go_id] = 'lightyellow'

        # Debug: Print the color mapping
        print(f"Color mapping for GO terms: {go2color}")

        # Plotting the GO DAG
        output_folder = 'tcga/nosmoking3_go:bp_ora_dags'
        os.makedirs(output_folder, exist_ok=True)
        file_name = os.path.splitext(os.path.basename(json_file))[0]
        output_file = f"{output_folder}/dag_plot_{file_name}.png"

        # Debug: Verify function call parameters
        print(f"Calling custom_plot_gos with output_file: {output_file}, plot_go_ids: {plot_go_ids}, go2color: {go2color}")

        custom_plot_gos(output_file, plot_go_ids, go_dag, go2color=go2color)
        print(f"GO DAG plot generated at {output_file}")

# List of JSON files to process
json_files = [
    'tcga/nosmoking3_go_ora/Age_down_go:bp.json'
]

for json_file in json_files:
    generate_dag_plots(json_file, temp_obo_path)

if temp_obo_path:  # Clean up the temporary file
    os.remove(temp_obo_path)


OBO file temporarily saved to /tmp/tmppfq2_g24.obo
/tmp/tmppfq2_g24.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
GO DAG loaded with 45667 terms.
Color mapping for GO terms: {'GO:0061644': 'lightgreen', 'GO:0034728': 'lightgreen', 'GO:0043933': 'lightyellow', 'GO:0022607': 'lightyellow', 'GO:0065004': 'lightyellow', 'GO:0045637': 'lightyellow', 'GO:0016070': 'lightyellow', 'GO:0006338': 'lightyellow', 'GO:0045653': 'lightyellow', 'GO:0030219': 'lightyellow', 'GO:0006334': 'lightyellow', 'GO:0034502': 'lightyellow', 'GO:0010467': 'lightyellow', 'GO:0071168': 'lightyellow', 'GO:0045652': 'lightyellow', 'GO:0065003': 'lightyellow', 'GO:0051276': 'lightyellow', 'GO:0044085': 'lightyellow', 'GO:0009059': 'lightyellow', 'GO:0090304': 'lightyellow', 'GO:0002385': 'lightyellow', 'GO:0071459': 'lightyellow', 'GO:0045638': 'lightyellow', 'GO:0032200': 'lightyellow', 'GO:0002227': 'lightyellow', 'GO:0006396': 'lightyellow', 'GO:0071824': 'lightyellow', 'GO:0002251': 'lightyellow', 'GO:0006325': 'lig

In [65]:
import requests
import tempfile
import os
import json
from goatools.obo_parser import GODag
from goatools.gosubdag.gosubdag import GoSubDag
from goatools.gosubdag.plot.gosubdag_plot import GoSubDagPlot

def stream_obo_to_temp_file(url):
    """Stream OBO file from URL and temporarily save to disk."""
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.obo')
        with open(temp_file.name, 'wb') as f_out:
            for chunk in response.iter_content(chunk_size=128):
                f_out.write(chunk)
        return temp_file.name
    else:
        print(f"Failed to download OBO file: {response.status_code}")
        return None

# URL to the Gene Ontology OBO file
obo_url = 'http://purl.obolibrary.org/obo/go/go-basic.obo'

# Stream OBO file and temporarily save to disk
temp_obo_path = stream_obo_to_temp_file(obo_url)

def custom_plot_gos(output_file, go_ids, go_dag, go2color=None):
    """Custom function to plot GO terms with colors."""
    gosubdag = GoSubDag(go_ids, go_dag, prt=None)
    objplt = GoSubDagPlot(gosubdag, go2color=go2color)

    # Plot DAG
    objplt.plt_dag(output_file)

def generate_dag_plots(json_file, temp_obo_path):
    if temp_obo_path:
        print(f"OBO file temporarily saved to {temp_obo_path}")
        go_dag = GODag(temp_obo_path)
        print(f"GO DAG loaded with {len(go_dag)} terms.")

        # Load significant GO IDs and their p-values from the JSON result file
        with open(json_file) as f:
            data = json.load(f)
            go_entries = [entry for entry in data if entry['significant']]
            # Sort entries by p-value
            sorted_go_entries = sorted(go_entries, key=lambda x: x['p_value'])

        # Create a color mapping for the nodes
        go2color = {}
        for i, entry in enumerate(sorted_go_entries):
            go_id = entry['native']
            if i < 2:
                go2color[go_id] = 'yellow'  # Top 2 lowest p-value
            else:
                go2color[go_id] = 'lightgreen'  # The rest

        # Filter and sort GO IDs by relevance and select the top 2 based on level and depth
        plot_go_ids = {entry['native'] for entry in sorted_go_entries if entry['native'] in go_dag}
        sorted_go_ids = sorted(plot_go_ids, key=lambda x: (go_dag[x].level, go_dag[x].depth), reverse=True)[:2]

        # Reduce the size of the DAG to only include top 2 nodes and their direct parents and children
        top_nodes = set(sorted_go_ids)
        for go_id in sorted_go_ids:
            top_nodes.update(go_dag[go_id].get_all_parents())
            top_nodes.update(go_dag[go_id].get_all_children())

        # Debug: Print the color mapping and selected nodes
        print(f"Color mapping for GO terms: {go2color}")
        print(f"Top nodes in the DAG: {top_nodes}")

        # Plotting the GO DAG
        output_folder = 'tcga/nosmoking3_go:bp_ora_dags'
        os.makedirs(output_folder, exist_ok=True)
        file_name = os.path.splitext(os.path.basename(json_file))[0]
        output_file = f"{output_folder}/dag_plot_{file_name}.png"

        # Debug: Verify function call parameters
        print(f"Calling custom_plot_gos with output_file: {output_file}, top_nodes: {top_nodes}, go2color: {go2color}")

        custom_plot_gos(output_file, top_nodes, go_dag, go2color=go2color)
        print(f"GO DAG plot generated at {output_file}")

# List of JSON files to process
json_files = [
    'tcga/nosmoking3_go_ora/Age_down_go:bp.json',
    'tcga/nosmoking3_go_ora/Age_up_go:bp.json',
    'tcga/nosmoking3_go_ora/TissueXSex_down_go:bp.json',
    'tcga/nosmoking3_go_ora/TissueXSex_up_go:bp.json',
    'tcga/nosmoking3_go_ora/TumorVsNormal_down_go:bp.json',
    'tcga/nosmoking3_go_ora/TumorVsNormal_up_go:bp.json'
]

for json_file in json_files:
    generate_dag_plots(json_file, temp_obo_path)

if temp_obo_path:  # Clean up the temporary file
    os.remove(temp_obo_path)


OBO file temporarily saved to /tmp/tmpngtvh7cg.obo
/tmp/tmpngtvh7cg.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
GO DAG loaded with 45667 terms.
Color mapping for GO terms: {'GO:0006334': 'yellow', 'GO:0034728': 'yellow', 'GO:0065004': 'lightgreen', 'GO:0061644': 'lightgreen', 'GO:0006338': 'lightgreen', 'GO:0006396': 'lightgreen', 'GO:0006325': 'lightgreen', 'GO:0071459': 'lightgreen', 'GO:0071168': 'lightgreen', 'GO:0071824': 'lightgreen', 'GO:0045653': 'lightgreen', 'GO:0034502': 'lightgreen', 'GO:0045652': 'lightgreen', 'GO:0065003': 'lightgreen', 'GO:0032200': 'lightgreen', 'GO:0043933': 'lightgreen', 'GO:0030219': 'lightgreen', 'GO:0051276': 'lightgreen', 'GO:0022607': 'lightgreen', 'GO:0045638': 'lightgreen', 'GO:0044085': 'lightgreen', 'GO:0002385': 'lightgreen', 'GO:0016070': 'lightgreen', 'GO:0002251': 'lightgreen', 'GO:0010467': 'lightgreen', 'GO:0002227': 'lightgreen', 'GO:0090304': 'lightgreen', 'GO:0009059': 'lightgreen', 'GO:0045637': 'lightgreen', 'GO:0045910': 'lightgree

In [57]:
import requests
import tempfile
import os
import json
from goatools.obo_parser import GODag
from goatools.gosubdag.gosubdag import GoSubDag
from goatools.gosubdag.plot.gosubdag_plot import GoSubDagPlot

def stream_obo_to_temp_file(url):
    """Stream OBO file from URL and temporarily save to disk."""
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.obo')
        with open(temp_file.name, 'wb') as f_out:
            for chunk in response.iter_content(chunk_size=128):
                f_out.write(chunk)
        return temp_file.name
    else:
        print(f"Failed to download OBO file: {response.status_code}")
        return None

# URL to the Gene Ontology OBO file
obo_url = 'http://purl.obolibrary.org/obo/go/go-basic.obo'

# Stream OBO file and temporarily save to disk
temp_obo_path = stream_obo_to_temp_file(obo_url)

def custom_plot_gos(output_file, go_ids, go_dag, go2color=None):
    """Custom function to plot GO terms with colors."""
    gosubdag = GoSubDag(go_ids, go_dag, prt=None)
    objplt = GoSubDagPlot(gosubdag, go2color=go2color)

    # Plot DAG
    objplt.plt_dag(output_file)

def generate_dag_plots(json_file, temp_obo_path):
    if temp_obo_path:
        print(f"OBO file temporarily saved to {temp_obo_path}")
        go_dag = GODag(temp_obo_path)
        print(f"GO DAG loaded with {len(go_dag)} terms.")

        # Load significant GO IDs and their p-values from the JSON result file
        with open(json_file) as f:
            data = json.load(f)
            go_entries = [entry for entry in data if entry['significant']]
            # Sort entries by p-value
            sorted_go_entries = sorted(go_entries, key=lambda x: x['p_value'])

        # Create a color mapping for the nodes
        go2color = {}
        for i, entry in enumerate(sorted_go_entries):
            go_id = entry['native']
            if i < 2:
                go2color[go_id] = 'yellow'  # Top 2 lowest p-value
            else:
                go2color[go_id] = 'lightgreen'  # The rest

        # Filter and sort GO IDs by relevance and select the top 2 based on level and depth
        plot_go_ids = {entry['native'] for entry in sorted_go_entries if entry['native'] in go_dag}
        sorted_go_ids = sorted(plot_go_ids, key=lambda x: (go_dag[x].level, go_dag[x].depth), reverse=True)[:1]

        # Reduce the size of the DAG to only include top 2 nodes and their direct parents and children
        top_nodes = set(sorted_go_ids)
        for go_id in sorted_go_ids:
            top_nodes.update(go_dag[go_id].get_all_parents())
            top_nodes.update(go_dag[go_id].get_all_children())

        # Debug: Print the color mapping and selected nodes
        print(f"Color mapping for GO terms: {go2color}")
        print(f"Top nodes in the DAG: {top_nodes}")

        # Plotting the GO DAG
        output_folder = 'tcga/nosmoking3_go:bp_ora_dags'
        os.makedirs(output_folder, exist_ok=True)
        file_name = os.path.splitext(os.path.basename(json_file))[0]
        output_file = f"{output_folder}/dag_plot_{file_name}.png"

        # Debug: Verify function call parameters
        print(f"Calling custom_plot_gos with output_file: {output_file}, top_nodes: {top_nodes}, go2color: {go2color}")

        custom_plot_gos(output_file, top_nodes, go_dag, go2color=go2color)
        print(f"GO DAG plot generated at {output_file}")

# List of JSON files to process
json_files = [
    'tcga/nosmoking3_go_ora/Age_down_go:bp.json',
    'tcga/nosmoking3_go_ora/Age_up_go:bp.json',
    'tcga/nosmoking3_go_ora/TissueXSex_down_go:bp.json',
    'tcga/nosmoking3_go_ora/TissueXSex_up_go:bp.json',
    'tcga/nosmoking3_go_ora/TumorVsNormal_down_go:bp.json',
    'tcga/nosmoking3_go_ora/TumorVsNormal_up_go:bp.json'
]

for json_file in json_files:
    generate_dag_plots(json_file, temp_obo_path)

if temp_obo_path:  # Clean up the temporary file
    os.remove(temp_obo_path)


OBO file temporarily saved to /tmp/tmp574bhiqs.obo
/tmp/tmp574bhiqs.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
GO DAG loaded with 45667 terms.
Color mapping for GO terms: {'GO:0006334': 'yellow', 'GO:0034728': 'yellow', 'GO:0065004': 'lightgreen', 'GO:0061644': 'lightgreen', 'GO:0006338': 'lightgreen', 'GO:0006396': 'lightgreen', 'GO:0006325': 'lightgreen', 'GO:0071459': 'lightgreen', 'GO:0071168': 'lightgreen', 'GO:0071824': 'lightgreen', 'GO:0045653': 'lightgreen', 'GO:0034502': 'lightgreen', 'GO:0045652': 'lightgreen', 'GO:0065003': 'lightgreen', 'GO:0032200': 'lightgreen', 'GO:0043933': 'lightgreen', 'GO:0030219': 'lightgreen', 'GO:0051276': 'lightgreen', 'GO:0022607': 'lightgreen', 'GO:0045638': 'lightgreen', 'GO:0044085': 'lightgreen', 'GO:0002385': 'lightgreen', 'GO:0016070': 'lightgreen', 'GO:0002251': 'lightgreen', 'GO:0010467': 'lightgreen', 'GO:0002227': 'lightgreen', 'GO:0090304': 'lightgreen', 'GO:0009059': 'lightgreen', 'GO:0045637': 'lightgreen', 'GO:0045910': 'lightgree

In [60]:
import requests
import tempfile
import os
import json
from goatools.obo_parser import GODag
from goatools.gosubdag.gosubdag import GoSubDag
from goatools.gosubdag.plot.gosubdag_plot import GoSubDagPlot

def stream_obo_to_temp_file(url):
    """Stream OBO file from URL and temporarily save to disk."""
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.obo')
        with open(temp_file.name, 'wb') as f_out:
            for chunk in response.iter_content(chunk_size=128):
                f_out.write(chunk)
        return temp_file.name
    else:
        print(f"Failed to download OBO file: {response.status_code}")
        return None

# URL to the Gene Ontology OBO file
obo_url = 'http://purl.obolibrary.org/obo/go/go-basic.obo'

# Stream OBO file and temporarily save to disk
temp_obo_path = stream_obo_to_temp_file(obo_url)

def custom_plot_gos(output_file, go_ids, go_dag, go2color=None):
    """Custom function to plot GO terms with colors."""
    gosubdag = GoSubDag(go_ids, go_dag, prt=None)
    objplt = GoSubDagPlot(gosubdag, go2color=go2color)

    # Plot DAG
    objplt.plt_dag(output_file)

def generate_dag_plots(json_file, temp_obo_path, max_terms=5):
    if temp_obo_path:
        print(f"OBO file temporarily saved to {temp_obo_path}")
        go_dag = GODag(temp_obo_path)
        print(f"GO DAG loaded with {len(go_dag)} terms.")

        # Load significant GO IDs from the JSON result file
        with open(json_file) as f:
            data = json.load(f)
            significant_go_ids = {entry['native'] for entry in data if entry['significant']}

        # Filter and sort GO IDs by level and depth, and select the top N results
        plot_go_ids = {go_id for go_id in significant_go_ids if go_id in go_dag}
        sorted_go_ids = sorted(plot_go_ids, key=lambda x: (go_dag[x].level, go_dag[x].depth), reverse=True)[:max_terms]

        # Create a color mapping for the nodes
        go2color = {go_id: 'lightgreen' for go_id in sorted_go_ids}
        
        # Apply custom colors if needed
        if len(sorted_go_ids) > 0:
            go2color[sorted_go_ids[0]] = 'yellow'  # Highlight the first term
        if len(sorted_go_ids) > 1:
            go2color[sorted_go_ids[1]] = 'green'  # Highlight the second term


        # Debug: Print the color mapping
        print(f"Color mapping for GO terms: {go2color}")

        # Plotting the GO DAG
        output_folder = 'tcga/nosmoking3_go:bp_ora_dags'
        os.makedirs(output_folder, exist_ok=True)
        file_name = os.path.splitext(os.path.basename(json_file))[0]
        output_file = f"{output_folder}/dag_plot_{file_name}.png"

        # Debug: Verify function call parameters
        print(f"Calling custom_plot_gos with output_file: {output_file}, sorted_go_ids: {sorted_go_ids}, go2color: {go2color}")

        custom_plot_gos(output_file, sorted_go_ids, go_dag, go2color=go2color)
        print(f"Top {max_terms} GO DAG plot generated at {output_file}")


# Generate plots with a limited number of top GO terms
for json_file in json_files:
    generate_dag_plots(json_file, temp_obo_path, max_terms=1)

if temp_obo_path:  # Clean up the temporary file
    os.remove(temp_obo_path)


OBO file temporarily saved to /tmp/tmpwgf4h3c7.obo
/tmp/tmpwgf4h3c7.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
GO DAG loaded with 45667 terms.
Color mapping for GO terms: {'GO:0061644': 'yellow'}
Calling custom_plot_gos with output_file: tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png, sorted_go_ids: ['GO:0061644'], go2color: {'GO:0061644': 'yellow'}
    1 usr  12 GOs  WROTE: tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png
Top 1 GO DAG plot generated at tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png
OBO file temporarily saved to /tmp/tmpwgf4h3c7.obo
/tmp/tmpwgf4h3c7.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
GO DAG loaded with 45667 terms.
Color mapping for GO terms: {'GO:0042776': 'yellow'}
Calling custom_plot_gos with output_file: tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_up_go:bp.png, sorted_go_ids: ['GO:0042776'], go2color: {'GO:0042776': 'yellow'}
    1 usr  48 GOs  WROTE: tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_up_go:bp.png
Top 1 GO DAG plo

In [62]:
import requests
import tempfile
import os
import json
from goatools.obo_parser import GODag
from goatools.gosubdag.gosubdag import GoSubDag
from goatools.gosubdag.plot.gosubdag_plot import GoSubDagPlot

def stream_obo_to_temp_file(url):
    """Stream OBO file from URL and temporarily save to disk."""
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.obo')
        with open(temp_file.name, 'wb') as f_out:
            for chunk in response.iter_content(chunk_size=128):
                f_out.write(chunk)
        return temp_file.name
    else:
        print(f"Failed to download OBO file: {response.status_code}")
        return None

# URL to the Gene Ontology OBO file
obo_url = 'http://purl.obolibrary.org/obo/go/go-basic.obo'

# Stream OBO file and temporarily save to disk
temp_obo_path = stream_obo_to_temp_file(obo_url)

def custom_plot_gos(output_file, go_ids, go_dag, go2color=None):
    """Custom function to plot GO terms with colors."""
    gosubdag = GoSubDag(go_ids, go_dag, prt=None)
    objplt = GoSubDagPlot(gosubdag, go2color=go2color)

    # Plot DAG
    objplt.plt_dag(output_file)

def generate_dag_plots(json_file, temp_obo_path, max_terms=5):
    if temp_obo_path:
        print(f"OBO file temporarily saved to {temp_obo_path}")
        go_dag = GODag(temp_obo_path)
        print(f"GO DAG loaded with {len(go_dag)} terms.")

        # Load significant GO IDs from the JSON result file
        with open(json_file) as f:
            data = json.load(f)
            significant_go_entries = [entry for entry in data if entry['significant']]

        # Filter and sort GO IDs by level and depth, and select the top N results
        plot_go_entries = [entry for entry in significant_go_entries if entry['native'] in go_dag]
        sorted_go_entries = sorted(plot_go_entries, key=lambda x: (go_dag[x['native']].level, go_dag[x['native']].depth), reverse=True)[:max_terms]
        
        # Create a color mapping for the nodes
        go2color = {}
        for i, entry in enumerate(sorted_go_entries):
            go_id = entry['native']
            if i < 2:
                go2color[go_id] = 'yellow'  # Top 2 lowest p-value
            else:
                go2color[go_id] = 'lightgreen'  # The rest

        # Debug: Print the color mapping
        print(f"Color mapping for GO terms: {go2color}")

        # Plotting the GO DAG
        output_folder = 'tcga/nosmoking3_go:bp_ora_dags'
        os.makedirs(output_folder, exist_ok=True)
        file_name = os.path.splitext(os.path.basename(json_file))[0]
        output_file = f"{output_folder}/dag_plot_{file_name}.png"

        # Debug: Verify function call parameters
        sorted_go_ids = [entry['native'] for entry in sorted_go_entries]
        print(f"Calling custom_plot_gos with output_file: {output_file}, sorted_go_ids: {sorted_go_ids}, go2color: {go2color}")

        custom_plot_gos(output_file, sorted_go_ids, go_dag, go2color=go2color)
        print(f"Top {max_terms} GO DAG plot generated at {output_file}")

# List of JSON files to process
json_files = [
    'tcga/nosmoking3_go_ora/Age_down_go:bp.json',
    'tcga/nosmoking3_go_ora/Age_up_go:bp.json',
    'tcga/nosmoking3_go_ora/TissueXSex_down_go:bp.json',
    'tcga/nosmoking3_go_ora/TissueXSex_up_go:bp.json',
    'tcga/nosmoking3_go_ora/TumorVsNormal_down_go:bp.json',
    'tcga/nosmoking3_go_ora/TumorVsNormal_up_go:bp.json'
]

# Generate plots with a limited number of top GO terms
for json_file in json_files:
    generate_dag_plots(json_file, temp_obo_path, max_terms=2)

if temp_obo_path:  # Clean up the temporary file
    os.remove(temp_obo_path)


OBO file temporarily saved to /tmp/tmp68x55qpa.obo
/tmp/tmp68x55qpa.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
GO DAG loaded with 45667 terms.
Color mapping for GO terms: {'GO:0034728': 'yellow', 'GO:0061644': 'yellow'}
Calling custom_plot_gos with output_file: tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png, sorted_go_ids: ['GO:0034728', 'GO:0061644'], go2color: {'GO:0034728': 'yellow', 'GO:0061644': 'yellow'}
    2 usr  19 GOs  WROTE: tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png
Top 2 GO DAG plot generated at tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png
OBO file temporarily saved to /tmp/tmp68x55qpa.obo
/tmp/tmp68x55qpa.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
GO DAG loaded with 45667 terms.
Color mapping for GO terms: {'GO:0042776': 'yellow', 'GO:0015986': 'yellow'}
Calling custom_plot_gos with output_file: tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_up_go:bp.png, sorted_go_ids: ['GO:0042776', 'GO:0015986'], go2color: {'GO:0042776': 'yellow',

In [75]:
import requests
import tempfile
import os
import json
from goatools.obo_parser import GODag
from goatools.gosubdag.gosubdag import GoSubDag
from goatools.gosubdag.plot.gosubdag_plot import GoSubDagPlot

def stream_obo_to_temp_file(url):
    """Stream OBO file from URL and temporarily save to disk."""
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.obo')
        with open(temp_file.name, 'wb') as f_out:
            for chunk in response.iter_content(chunk_size=128):
                f_out.write(chunk)
        return temp_file.name
    else:
        print(f"Failed to download OBO file: {response.status_code}")
        return None

# URL to the Gene Ontology OBO file
obo_url = 'http://purl.obolibrary.org/obo/go/go-basic.obo'

# Stream OBO file and temporarily save to disk
temp_obo_path = stream_obo_to_temp_file(obo_url)

def custom_plot_gos(output_file, go_ids, go_dag, go2color=None):
    """Custom function to plot GO terms with colors."""
    gosubdag = GoSubDag(go_ids, go_dag, prt=None)
    objplt = GoSubDagPlot(gosubdag, go2color=go2color)

    # Plot DAG
    objplt.plt_dag(output_file)

def generate_dag_plots(json_file, temp_obo_path, max_terms=5):
    if temp_obo_path:
        print(f"OBO file temporarily saved to {temp_obo_path}")
        go_dag = GODag(temp_obo_path)
        print(f"GO DAG loaded with {len(go_dag)} terms.")

        # Load significant GO IDs from the JSON result file
        with open(json_file) as f:
            data = json.load(f)
            significant_go_entries = [entry for entry in data if entry['significant']]

        # Filter and sort GO IDs by level and depth, and select the top N results
        plot_go_entries = [entry for entry in significant_go_entries if entry['native'] in go_dag]
        sorted_go_entries = sorted(plot_go_entries, key=lambda x: (go_dag[x['native']].level, go_dag[x['native']].depth), reverse=True)[:max_terms]
        
        # Create a color mapping for the nodes
        go2color = {entry['native']: 'lightgreen' for entry in sorted_go_entries}
        for i, entry in enumerate(sorted_go_entries):
            if i < 2:
                go2color[entry['native']] = 'yellow'  # Top 2 lowest p-value

        # Debug: Print the color mapping
        print(f"Color mapping for GO terms: {go2color}")

        # Plotting the GO DAG
        output_folder = 'tcga/nosmoking3_go:bp_ora_dags'
        os.makedirs(output_folder, exist_ok=True)
        file_name = os.path.splitext(os.path.basename(json_file))[0]
        output_file = f"{output_folder}/dag_plot_{file_name}.png"

        # Debug: Verify function call parameters
        sorted_go_ids = [entry['native'] for entry in sorted_go_entries]
        print(f"Calling custom_plot_gos with output_file: {output_file}, sorted_go_ids: {sorted_go_ids}, go2color: {go2color}")

        custom_plot_gos(output_file, sorted_go_ids, go_dag, go2color=go2color)
        print(f"Top {max_terms} GO DAG plot generated at {output_file}")


# List of JSON files to process
json_files = [
    'tcga/nosmoking3_go_ora/Age_down_go:bp.json',
    'tcga/nosmoking3_go_ora/Age_up_go:bp.json',
    'tcga/nosmoking3_go_ora/TissueXSex_down_go:bp.json',
    'tcga/nosmoking3_go_ora/TissueXSex_up_go:bp.json',
    'tcga/nosmoking3_go_ora/TumorVsNormal_down_go:bp.json',
    'tcga/nosmoking3_go_ora/TumorVsNormal_up_go:bp.json'
]

# Generate plots with a limited number of top GO terms
for json_file in json_files:
    generate_dag_plots(json_file, temp_obo_path, max_terms=2)

if temp_obo_path:  # Clean up the temporary file
    os.remove(temp_obo_path)


OBO file temporarily saved to /tmp/tmp5php9tip.obo
/tmp/tmp5php9tip.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
GO DAG loaded with 45667 terms.
Color mapping for GO terms: {'GO:0034728': 'yellow', 'GO:0061644': 'yellow'}
Calling custom_plot_gos with output_file: tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png, sorted_go_ids: ['GO:0034728', 'GO:0061644'], go2color: {'GO:0034728': 'yellow', 'GO:0061644': 'yellow'}
    2 usr  19 GOs  WROTE: tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png
Top 2 GO DAG plot generated at tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png
OBO file temporarily saved to /tmp/tmp5php9tip.obo
/tmp/tmp5php9tip.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
GO DAG loaded with 45667 terms.
Color mapping for GO terms: {'GO:0042776': 'yellow', 'GO:0015986': 'yellow'}
Calling custom_plot_gos with output_file: tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_up_go:bp.png, sorted_go_ids: ['GO:0042776', 'GO:0015986'], go2color: {'GO:0042776': 'yellow',

In [76]:
import requests
import tempfile
import os
import json
from goatools.obo_parser import GODag
from goatools.gosubdag.gosubdag import GoSubDag
from goatools.gosubdag.plot.gosubdag_plot import GoSubDagPlot

def stream_obo_to_temp_file(url):
    """Stream OBO file from URL and temporarily save to disk."""
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.obo')
        with open(temp_file.name, 'wb') as f_out:
            for chunk in response.iter_content(chunk_size=128):
                f_out.write(chunk)
        return temp_file.name
    else:
        print(f"Failed to download OBO file: {response.status_code}")
        return None

# URL to the Gene Ontology OBO file
obo_url = 'http://purl.obolibrary.org/obo/go/go-basic.obo'

# Stream OBO file and temporarily save to disk
temp_obo_path = stream_obo_to_temp_file(obo_url)

def custom_plot_gos(output_file, go_ids, go_dag, go2color=None):
    """Custom function to plot GO terms with colors."""
    gosubdag = GoSubDag(go_ids, go_dag, prt=None)
    objplt = GoSubDagPlot(gosubdag, go2color=go2color)

    # Plot DAG
    objplt.plt_dag(output_file)

def generate_dag_plots(json_file, temp_obo_path, max_terms=5):
    if temp_obo_path:
        print(f"OBO file temporarily saved to {temp_obo_path}")
        go_dag = GODag(temp_obo_path)
        print(f"GO DAG loaded with {len(go_dag)} terms.")

        # Load significant GO IDs and their p-values from the JSON result file
        with open(json_file) as f:
            data = json.load(f)
            go_entries = [entry for entry in data if entry['significant']]
            # Sort entries by p-value
            sorted_go_entries = sorted(go_entries, key=lambda x: x['p_value'])

        # Filter and sort GO IDs by level and depth, and select the top N results
        plot_go_entries = [entry for entry in sorted_go_entries if entry['native'] in go_dag][:max_terms]
        
        # Create a color mapping for the nodes
        go2color = {}
        for i, entry in enumerate(plot_go_entries):
            go_id = entry['native']
            if i < 2:
                go2color[go_id] = 'yellow'  # Top 2 lowest p-value
            else:
                go2color[go_id] = 'lightgreen'  # The rest

        # Debug: Print the color mapping
        print(f"Color mapping for GO terms: {go2color}")

        # Plotting the GO DAG
        output_folder = 'tcga/nosmoking3_go:bp_ora_dags'
        os.makedirs(output_folder, exist_ok=True)
        file_name = os.path.splitext(os.path.basename(json_file))[0]
        output_file = f"{output_folder}/dag_plot_{file_name}.png"

        # Debug: Verify function call parameters
        sorted_go_ids = [entry['native'] for entry in plot_go_entries]
        print(f"Calling custom_plot_gos with output_file: {output_file}, sorted_go_ids: {sorted_go_ids}, go2color: {go2color}")

        custom_plot_gos(output_file, sorted_go_ids, go_dag, go2color=go2color)
        print(f"Top {max_terms} GO DAG plot generated at {output_file}")


# List of JSON files to process
json_files = [
    'tcga/nosmoking3_go_ora/Age_down_go:bp.json',
    'tcga/nosmoking3_go_ora/Age_up_go:bp.json',
    'tcga/nosmoking3_go_ora/TissueXSex_down_go:bp.json',
    'tcga/nosmoking3_go_ora/TissueXSex_up_go:bp.json',
    'tcga/nosmoking3_go_ora/TumorVsNormal_down_go:bp.json',
    'tcga/nosmoking3_go_ora/TumorVsNormal_up_go:bp.json'
]

# Generate plots with a limited number of top GO terms
for json_file in json_files:
    generate_dag_plots(json_file, temp_obo_path, max_terms=4)

if temp_obo_path:  # Clean up the temporary file
    os.remove(temp_obo_path)


OBO file temporarily saved to /tmp/tmpr3nqolrs.obo
/tmp/tmpr3nqolrs.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
GO DAG loaded with 45667 terms.
Color mapping for GO terms: {'GO:0006334': 'yellow', 'GO:0034728': 'yellow', 'GO:0065004': 'lightgreen', 'GO:0061644': 'lightgreen'}
Calling custom_plot_gos with output_file: tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png, sorted_go_ids: ['GO:0006334', 'GO:0034728', 'GO:0065004', 'GO:0061644'], go2color: {'GO:0006334': 'yellow', 'GO:0034728': 'yellow', 'GO:0065004': 'lightgreen', 'GO:0061644': 'lightgreen'}
    4 usr  23 GOs  WROTE: tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png
Top 4 GO DAG plot generated at tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png
OBO file temporarily saved to /tmp/tmpr3nqolrs.obo
/tmp/tmpr3nqolrs.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
GO DAG loaded with 45667 terms.
Color mapping for GO terms: {'GO:0008544': 'yellow', 'GO:0009888': 'yellow', 'GO:0019646': 'lightgreen', 'GO:0045109

In [78]:
import requests
import tempfile
import os
import json
from goatools.obo_parser import GODag
from goatools.gosubdag.gosubdag import GoSubDag
from goatools.gosubdag.plot.gosubdag_plot import GoSubDagPlot

def stream_obo_to_temp_file(url):
    """Stream OBO file from URL and temporarily save to disk."""
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.obo')
        with open(temp_file.name, 'wb') as f_out:
            for chunk in response.iter_content(chunk_size=128):
                f_out.write(chunk)
        return temp_file.name
    else:
        print(f"Failed to download OBO file: {response.status_code}")
        return None

# URL to the Gene Ontology OBO file
obo_url = 'http://purl.obolibrary.org/obo/go/go-basic.obo'

# Stream OBO file and temporarily save to disk
temp_obo_path = stream_obo_to_temp_file(obo_url)

def shorten_name(name, max_length=10):
    """Shorten long names to a specified maximum length."""
    if len(name) > max_length:
        return name[:max_length] + '...'
    return name

def custom_plot_gos(output_file, go_ids, go_dag, go2color=None, go_names=None):
    """Custom function to plot GO terms with colors."""
    gosubdag = GoSubDag(go_ids, go_dag, prt=None)
    objplt = GoSubDagPlot(gosubdag, go2color=go2color)
    
    # Shorten GO term names
    if go_names:
        for go_id in go_ids:
            if go_id in go_names:
                objplt.gosubdag.go2obj[go_id].name = shorten_name(go_names[go_id])

    # Plot DAG
    objplt.plt_dag(output_file)

def generate_dag_plots(json_file, temp_obo_path, max_terms=5):
    if temp_obo_path:
        print(f"OBO file temporarily saved to {temp_obo_path}")
        go_dag = GODag(temp_obo_path)
        print(f"GO DAG loaded with {len(go_dag)} terms.")

        # Load significant GO IDs and their p-values from the JSON result file
        with open(json_file) as f:
            data = json.load(f)
            go_entries = [entry for entry in data if entry['significant']]
            # Sort entries by p-value
            sorted_go_entries = sorted(go_entries, key=lambda x: x['p_value'])

        # Filter and sort GO IDs by level and depth, and select the top N results
        plot_go_entries = [entry for entry in sorted_go_entries if entry['native'] in go_dag][:max_terms]
        
        # Create a color mapping for the nodes
        go2color = {}
        go_names = {}
        for i, entry in enumerate(plot_go_entries):
            go_id = entry['native']
            go_names[go_id] = entry['name']
            if i < 2:
                go2color[go_id] = 'yellow'  # Top 2 lowest p-value
            else:
                go2color[go_id] = 'lightgreen'  # The rest

        # Debug: Print the color mapping
        print(f"Color mapping for GO terms: {go2color}")

        # Plotting the GO DAG
        output_folder = 'tcga/nosmoking3_go:bp_ora_dags'
        os.makedirs(output_folder, exist_ok=True)
        file_name = os.path.splitext(os.path.basename(json_file))[0]
        output_file = f"{output_folder}/dag_plot_{file_name}.png"

        # Debug: Verify function call parameters
        sorted_go_ids = [entry['native'] for entry in plot_go_entries]
        print(f"Calling custom_plot_gos with output_file: {output_file}, sorted_go_ids: {sorted_go_ids}, go2color: {go2color}, go_names: {go_names}")

        custom_plot_gos(output_file, sorted_go_ids, go_dag, go2color=go2color, go_names=go_names)
        print(f"Top {max_terms} GO DAG plot generated at {output_file}")


# Generate plots with a limited number of top GO terms
for json_file in json_files:
    generate_dag_plots(json_file, temp_obo_path, max_terms=5)

if temp_obo_path:  # Clean up the temporary file
    os.remove(temp_obo_path)


OBO file temporarily saved to /tmp/tmptf00utgh.obo
/tmp/tmptf00utgh.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
GO DAG loaded with 45667 terms.
Color mapping for GO terms: {'GO:0006334': 'yellow', 'GO:0034728': 'yellow', 'GO:0065004': 'lightgreen', 'GO:0061644': 'lightgreen', 'GO:0006338': 'lightgreen'}
Calling custom_plot_gos with output_file: tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png, sorted_go_ids: ['GO:0006334', 'GO:0034728', 'GO:0065004', 'GO:0061644', 'GO:0006338'], go2color: {'GO:0006334': 'yellow', 'GO:0034728': 'yellow', 'GO:0065004': 'lightgreen', 'GO:0061644': 'lightgreen', 'GO:0006338': 'lightgreen'}, go_names: {'GO:0006334': 'nucleosome assembly', 'GO:0034728': 'nucleosome organization', 'GO:0065004': 'protein-DNA complex assembly', 'GO:0061644': 'protein localization to CENP-A containing chromatin', 'GO:0006338': 'chromatin remodeling'}
    5 usr  23 GOs  WROTE: tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png
Top 5 GO DAG plot generated at tcga/

In [79]:
import requests
import tempfile
import os
import json
from goatools.obo_parser import GODag
from goatools.gosubdag.gosubdag import GoSubDag
from goatools.gosubdag.plot.gosubdag_plot import GoSubDagPlot

def stream_obo_to_temp_file(url):
    """Stream OBO file from URL and temporarily save to disk."""
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.obo')
        with open(temp_file.name, 'wb') as f_out:
            for chunk in response.iter_content(chunk_size=128):
                f_out.write(chunk)
        return temp_file.name
    else:
        print(f"Failed to download OBO file: {response.status_code}")
        return None

# URL to the Gene Ontology OBO file
obo_url = 'http://purl.obolibrary.org/obo/go/go-basic.obo'

# Stream OBO file and temporarily save to disk
temp_obo_path = stream_obo_to_temp_file(obo_url)

def shorten_name(name, max_length=10):
    """Shorten long names to a specified maximum length."""
    if len(name) > max_length:
        return name[:max_length] + '...'
    return name

def custom_plot_gos(output_file, go_ids, go_dag, go2color=None, go_names=None):
    """Custom function to plot GO terms with colors and shortened names."""
    gosubdag = GoSubDag(go_ids, go_dag, prt=None)
    objplt = GoSubDagPlot(gosubdag, go2color=go2color)
    
    # Shorten GO term names
    if go_names:
        for go_id in go_ids:
            if go_id in go_names:
                shortened_name = shorten_name(go_names[go_id])
                gosubdag.go2obj[go_id].name = shortened_name

    # Plot DAG
    objplt.plt_dag(output_file)

def generate_dag_plots(json_file, temp_obo_path, max_terms=5):
    if temp_obo_path:
        print(f"OBO file temporarily saved to {temp_obo_path}")
        go_dag = GODag(temp_obo_path)
        print(f"GO DAG loaded with {len(go_dag)} terms.")

        # Load significant GO IDs and their p-values from the JSON result file
        with open(json_file) as f:
            data = json.load(f)
            go_entries = [entry for entry in data if entry['significant']]
            # Sort entries by p-value
            sorted_go_entries = sorted(go_entries, key=lambda x: x['p_value'])

        # Filter and sort GO IDs by level and depth, and select the top N results
        plot_go_entries = [entry for entry in sorted_go_entries if entry['native'] in go_dag][:max_terms]
        
        # Create a color mapping for the nodes
        go2color = {}
        go_names = {}
        for i, entry in enumerate(plot_go_entries):
            go_id = entry['native']
            go_names[go_id] = entry['name']
            if i < 2:
                go2color[go_id] = 'yellow'  # Top 2 lowest p-value
            else:
                go2color[go_id] = 'lightgreen'  # The rest

        # Debug: Print the color mapping
        print(f"Color mapping for GO terms: {go2color}")

        # Plotting the GO DAG
        output_folder = 'tcga/nosmoking3_go:bp_ora_dags'
        os.makedirs(output_folder, exist_ok=True)
        file_name = os.path.splitext(os.path.basename(json_file))[0]
        output_file = f"{output_folder}/dag_plot_{file_name}.png"

        # Debug: Verify function call parameters
        sorted_go_ids = [entry['native'] for entry in plot_go_entries]
        print(f"Calling custom_plot_gos with output_file: {output_file}, sorted_go_ids: {sorted_go_ids}, go2color: {go2color}, go_names: {go_names}")

        custom_plot_gos(output_file, sorted_go_ids, go_dag, go2color=go2color, go_names=go_names)
        print(f"Top {max_terms} GO DAG plot generated at {output_file}")

# Generate plots with a limited number of top GO terms
for json_file in json_files:
    generate_dag_plots(json_file, temp_obo_path, max_terms=5)

if temp_obo_path:  # Clean up the temporary file
    os.remove(temp_obo_path)


OBO file temporarily saved to /tmp/tmpz0_4lvrg.obo
/tmp/tmpz0_4lvrg.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
GO DAG loaded with 45667 terms.
Color mapping for GO terms: {'GO:0006334': 'yellow', 'GO:0034728': 'yellow', 'GO:0065004': 'lightgreen', 'GO:0061644': 'lightgreen', 'GO:0006338': 'lightgreen'}
Calling custom_plot_gos with output_file: tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png, sorted_go_ids: ['GO:0006334', 'GO:0034728', 'GO:0065004', 'GO:0061644', 'GO:0006338'], go2color: {'GO:0006334': 'yellow', 'GO:0034728': 'yellow', 'GO:0065004': 'lightgreen', 'GO:0061644': 'lightgreen', 'GO:0006338': 'lightgreen'}, go_names: {'GO:0006334': 'nucleosome assembly', 'GO:0034728': 'nucleosome organization', 'GO:0065004': 'protein-DNA complex assembly', 'GO:0061644': 'protein localization to CENP-A containing chromatin', 'GO:0006338': 'chromatin remodeling'}
    5 usr  23 GOs  WROTE: tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png
Top 5 GO DAG plot generated at tcga/

In [86]:
import requests
import tempfile
import os
import json
from goatools.obo_parser import GODag
from goatools.gosubdag.gosubdag import GoSubDag
from goatools.gosubdag.plot.gosubdag_plot import GoSubDagPlot

def stream_obo_to_temp_file(url):
    """Stream OBO file from URL and temporarily save to disk."""
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.obo')
        with open(temp_file.name, 'wb') as f_out:
            for chunk in response.iter_content(chunk_size=128):
                f_out.write(chunk)
        return temp_file.name
    else:
        print(f"Failed to download OBO file: {response.status_code}")
        return None

# URL to the Gene Ontology OBO file
obo_url = 'http://purl.obolibrary.org/obo/go/go-basic.obo'

# Stream OBO file and temporarily save to disk
temp_obo_path = stream_obo_to_temp_file(obo_url)

def shorten_name(name, max_length=50):
    """Shorten long names to a specified maximum length."""
    if len(name) > max_length:
        return name[:max_length] + '...'
    return name

def custom_plot_gos(output_file, go_ids, go_dag, go2color=None):
    """Custom function to plot GO terms with colors and shortened names."""
    # Shorten names for all GO terms in the DAG
    for go_id in go_dag:
        go_dag[go_id].name = shorten_name(go_dag[go_id].name)

    gosubdag = GoSubDag(go_ids, go_dag, prt=None)
    objplt = GoSubDagPlot(gosubdag, go2color=go2color)
    objplt.plt_dag(output_file)

def generate_dag_plots(json_file, temp_obo_path, max_terms=5):
    if temp_obo_path:
        print(f"OBO file temporarily saved to {temp_obo_path}")
        go_dag = GODag(temp_obo_path)
        print(f"GO DAG loaded with {len(go_dag)} terms.")

        # Load significant GO IDs and their p-values from the JSON result file
        with open(json_file) as f:
            data = json.load(f)
            go_entries = [entry for entry in data if entry['significant']]
            # Sort entries by p-value
            sorted_go_entries = sorted(go_entries, key=lambda x: x['p_value'])

        # Filter and sort GO IDs by level and depth, and select the top N results
        plot_go_entries = [entry for entry in sorted_go_entries if entry['native'] in go_dag][:max_terms]
        
        # Create a color mapping for the nodes
        go2color = {}
        go_names = {}
        for i, entry in enumerate(plot_go_entries):
            go_id = entry['native']
            go_names[go_id] = entry['name']
            if i < 2:
                go2color[go_id] = 'yellow'  # Top 2 lowest p-value
            else:
                go2color[go_id] = 'lightgreen'  # The rest

        # Debug: Print the color mapping
        print(f"Color mapping for GO terms: {go2color}")

        # Plotting the GO DAG
        output_folder = 'tcga/nosmoking3_go:bp_ora_dags'
        os.makedirs(output_folder, exist_ok=True)
        file_name = os.path.splitext(os.path.basename(json_file))[0]
        output_file = f"{output_folder}/dag_plot_{file_name}.png"

        # Debug: Verify function call parameters
        sorted_go_ids = [entry['native'] for entry in plot_go_entries]
        print(f"Calling custom_plot_gos with output_file: {output_file}, sorted_go_ids: {sorted_go_ids}, go2color: {go2color}")

        custom_plot_gos(output_file, sorted_go_ids, go_dag, go2color=go2color)
        print(f"Top {max_terms} GO DAG plot generated at {output_file}")


# List of JSON files to process
json_files = [
    'tcga/nosmoking3_go_ora/Age_down_go:bp.json',
    'tcga/nosmoking3_go_ora/Age_up_go:bp.json',
    'tcga/nosmoking3_go_ora/TissueXSex_down_go:bp.json',
    'tcga/nosmoking3_go_ora/TissueXSex_up_go:bp.json',
    'tcga/nosmoking3_go_ora/TumorVsNormal_down_go:bp.json',
    'tcga/nosmoking3_go_ora/TumorVsNormal_up_go:bp.json'
]

# Generate plots with a limited number of top GO terms
for json_file in json_files:
    generate_dag_plots(json_file, temp_obo_path, max_terms=4)

if temp_obo_path:  # Clean up the temporary file
    os.remove(temp_obo_path)


OBO file temporarily saved to /tmp/tmpl2qn13ax.obo
/tmp/tmpl2qn13ax.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
GO DAG loaded with 45667 terms.
Color mapping for GO terms: {'GO:0006334': 'yellow', 'GO:0034728': 'yellow', 'GO:0065004': 'lightgreen', 'GO:0061644': 'lightgreen'}
Calling custom_plot_gos with output_file: tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png, sorted_go_ids: ['GO:0006334', 'GO:0034728', 'GO:0065004', 'GO:0061644'], go2color: {'GO:0006334': 'yellow', 'GO:0034728': 'yellow', 'GO:0065004': 'lightgreen', 'GO:0061644': 'lightgreen'}
    4 usr  23 GOs  WROTE: tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png
Top 4 GO DAG plot generated at tcga/nosmoking3_go:bp_ora_dags/dag_plot_Age_down_go:bp.png
OBO file temporarily saved to /tmp/tmpl2qn13ax.obo
/tmp/tmpl2qn13ax.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
GO DAG loaded with 45667 terms.
Color mapping for GO terms: {'GO:0008544': 'yellow', 'GO:0009888': 'yellow', 'GO:0019646': 'lightgreen', 'GO:0045109

In [2]:
%pip install pygraphviz goatools
import requests
import tempfile
import os
import json
from goatools.obo_parser import GODag
from goatools.gosubdag.gosubdag import GoSubDag
from goatools.gosubdag.plot.gosubdag_plot import GoSubDagPlot

def stream_obo_to_temp_file(url):
    """Stream OBO file from URL and temporarily save to disk."""
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.obo')
        with open(temp_file.name, 'wb') as f_out:
            for chunk in response.iter_content(chunk_size=128):
                f_out.write(chunk)
        return temp_file.name
    else:
        print(f"Failed to download OBO file: {response.status_code}")
        return None

# URL to the Gene Ontology OBO file
obo_url = 'http://purl.obolibrary.org/obo/go/go-basic.obo'

# Stream OBO file and temporarily save to disk
temp_obo_path = stream_obo_to_temp_file(obo_url)

def shorten_name(name, max_length=50):
    """Shorten long names to a specified maximum length."""
    if len(name) > max_length:
        return name[:max_length] + '...'
    return name

def custom_plot_gos(output_file, go_ids, go_dag, go2color=None):
    """Custom function to plot GO terms with colors and shortened names."""
    # Shorten names for all GO terms in the DAG
    for go_id in go_dag:
        go_dag[go_id].name = shorten_name(go_dag[go_id].name)

    gosubdag = GoSubDag(go_ids, go_dag, prt=None)
    objplt = GoSubDagPlot(gosubdag, go2color=go2color)
    objplt.plt_dag(output_file)

def generate_dag_plots(json_file, temp_obo_path, max_terms=5):
    if temp_obo_path:
        print(f"OBO file temporarily saved to {temp_obo_path}")
        go_dag = GODag(temp_obo_path)
        print(f"GO DAG loaded with {len(go_dag)} terms.")

        # Load significant GO IDs and their p-values from the JSON result file
        with open(json_file) as f:
            data = json.load(f)
            go_entries = [entry for entry in data if entry['significant']]
            # Sort entries by p-value
            sorted_go_entries = sorted(go_entries, key=lambda x: x['p_value'])

        # Filter and sort GO IDs by level and depth, and select the top N results
        plot_go_entries = [entry for entry in sorted_go_entries if entry['native'] in go_dag][:max_terms]
        
        # Create a color mapping for the nodes
        go2color = {}
        go_names = {}
        for i, entry in enumerate(plot_go_entries):
            go_id = entry['native']
            go_names[go_id] = entry['name']
            if i < 1:
                go2color[go_id] = 'yellow'  # Top 2 lowest p-value
            elif i < 2:
                go2color[go_id] = 'lightblue'  # Top 2 lowest p-value
            elif i < 3:
                go2color[go_id] = 'orange'  # Top 2 lowest p-value
            else:
                go2color[go_id] = 'lightgreen'  # The rest

        # Debug: Print the color mapping
        print(f"Color mapping for GO terms: {go2color}")

        # Plotting the GO DAG
        output_folder = 'tcga/nosmoking3_go:bp_ora_dags'
        os.makedirs(output_folder, exist_ok=True)
        file_name = os.path.splitext(os.path.basename(json_file))[0]
        output_file = f"{output_folder}/dag_plot_{file_name}.png"

        # Debug: Verify function call parameters
        sorted_go_ids = [entry['native'] for entry in plot_go_entries]
        print(f"Calling custom_plot_gos with output_file: {output_file}, sorted_go_ids: {sorted_go_ids}, go2color: {go2color}")

        custom_plot_gos(output_file, sorted_go_ids, go_dag, go2color=go2color)
        print(f"Top {max_terms} GO DAG plot generated at {output_file}")


# List of JSON files to process
json_files = [
    'tcga/nosmoking3_go_ora/Age_down_go:bp.json',
    'tcga/nosmoking3_go_ora/Age_up_go:bp.json',
    'tcga/nosmoking3_go_ora/TissueXSex_down_go:bp.json',
    'tcga/nosmoking3_go_ora/TissueXSex_up_go:bp.json',
    'tcga/nosmoking3_go_ora/TumorVsNormal_down_go:bp.json',
    'tcga/nosmoking3_go_ora/TumorVsNormal_up_go:bp.json'
]

# Generate plots with a limited number of top GO terms
for json_file in json_files:
    generate_dag_plots(json_file, temp_obo_path, max_terms=4)

if temp_obo_path:  # Clean up the temporary file
    os.remove(temp_obo_path)


Collecting pygraphviz
  Downloading pygraphviz-1.13.tar.gz (104 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m104.6/104.6 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Installing backend dependencies ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
[?25hCollecting goatools
  Downloading goatools-1.4.7-py3-none-any.whl.metadata (14 kB)
Collecting docopt (from goatools)
  Downloading docopt-0.6.2.tar.gz (25 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting ftpretty (from goatools)
  Downloading ftpretty-0.4.0-py2.py3-none-any.whl.metadata (6.6 kB)
Collecting openpyxl (from goatools)
  Downloading openpyxl-3.1.3-py2.py3-none-any.whl.metadata (2.5 kB)
Collecting pydot (from goatools)
  Downloading pydot-2.0.0-py3-none-any.whl.metadata (9.6 kB)
Collecting scipy (from goatools)
  Downloading 

In [5]:
import requests
import tempfile
import os
import json
from goatools.obo_parser import GODag
from goatools.gosubdag.gosubdag import GoSubDag
from goatools.gosubdag.plot.gosubdag_plot import GoSubDagPlot

def stream_obo_to_temp_file(url):
    """Stream OBO file from URL and temporarily save to disk."""
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.obo')
        with open(temp_file.name, 'wb') as f_out:
            for chunk in response.iter_content(chunk_size=128):
                f_out.write(chunk)
        return temp_file.name
    else:
        print(f"Failed to download OBO file: {response.status_code}")
        return None

# URL to the Gene Ontology OBO file
obo_url = 'http://purl.obolibrary.org/obo/go/go-basic.obo'

# Stream OBO file and temporarily save to disk
temp_obo_path = stream_obo_to_temp_file(obo_url)

def shorten_name(name, max_length=50):
    """Shorten long names to a specified maximum length."""
    if len(name) > max_length:
        return name[:max_length] + '...'
    return name

def custom_plot_gos(output_file, go_ids, go_dag, go2color=None):
    """Custom function to plot GO terms with colors and shortened names."""
    # Shorten names for all GO terms in the DAG
    for go_id in go_dag:
        go_dag[go_id].name = shorten_name(go_dag[go_id].name)

    gosubdag = GoSubDag(go_ids, go_dag, prt=None)
    objplt = GoSubDagPlot(gosubdag, go2color=go2color)
    objplt.plt_dag(output_file)

def generate_dag_plots(json_file, temp_obo_path, max_terms=5):
    if temp_obo_path:
        print(f"OBO file temporarily saved to {temp_obo_path}")
        go_dag = GODag(temp_obo_path)
        print(f"GO DAG loaded with {len(go_dag)} terms.")

        # Load significant GO IDs and their p-values from the JSON result file
        with open(json_file) as f:
            data = json.load(f)
            go_entries = [entry for entry in data if entry['significant']]
            # Sort entries by p-value
            sorted_go_entries = sorted(go_entries, key=lambda x: x['p_value'])

        # Filter and sort GO IDs by level and depth, and select the top N results
        plot_go_entries = [entry for entry in sorted_go_entries if entry['native'] in go_dag][:max_terms]
        
        # Create a color mapping for the nodes
        go2color = {}
        go_names = {}
        for i, entry in enumerate(plot_go_entries):
            go_id = entry['native']
            go_names[go_id] = entry['name']
            if i < 1:
                go2color[go_id] = 'yellow'  # Top 2 lowest p-value
            elif i < 2:
                go2color[go_id] = 'lightblue'  # Top 2 lowest p-value
            elif i < 3:
                go2color[go_id] = 'orange'  # Top 2 lowest p-value
            else:
                go2color[go_id] = 'lightgreen'  # The rest

        # Debug: Print the color mapping
        print(f"Color mapping for GO terms: {go2color}")

        # Plotting the GO DAG
        output_folder = 'tcga/unified3_go:bp_ora_dags'
        os.makedirs(output_folder, exist_ok=True)
        file_name = os.path.splitext(os.path.basename(json_file))[0]
        output_file = f"{output_folder}/dag_plot_{file_name}.png"

        # Debug: Verify function call parameters
        sorted_go_ids = [entry['native'] for entry in plot_go_entries]
        print(f"Calling custom_plot_gos with output_file: {output_file}, sorted_go_ids: {sorted_go_ids}, go2color: {go2color}")

        custom_plot_gos(output_file, sorted_go_ids, go_dag, go2color=go2color)
        print(f"Top {max_terms} GO DAG plot generated at {output_file}")


# List of JSON files to process
json_files = [
    'tcga/unified3_go_ora/TumorVsNormal_up_go:bp.json'
]

# Generate plots with a limited number of top GO terms
for json_file in json_files:
    generate_dag_plots(json_file, temp_obo_path, max_terms=4)

if temp_obo_path:  # Clean up the temporary file
    os.remove(temp_obo_path)


OBO file temporarily saved to /tmp/tmpfsqmys8t.obo
/tmp/tmpfsqmys8t.obo: fmt(1.2) rel(2024-04-24) 45,667 Terms
GO DAG loaded with 45667 terms.
Color mapping for GO terms: {'GO:0002250': 'yellow', 'GO:0016064': 'lightblue', 'GO:0019724': 'orange', 'GO:1903047': 'lightgreen'}
Calling custom_plot_gos with output_file: tcga/unified3_go:bp_ora_dags/dag_plot_TumorVsNormal_up_go:bp.png, sorted_go_ids: ['GO:0002250', 'GO:0016064', 'GO:0019724', 'GO:1903047'], go2color: {'GO:0002250': 'yellow', 'GO:0016064': 'lightblue', 'GO:0019724': 'orange', 'GO:1903047': 'lightgreen'}
    4 usr  14 GOs  WROTE: tcga/unified3_go:bp_ora_dags/dag_plot_TumorVsNormal_up_go:bp.png
Top 4 GO DAG plot generated at tcga/unified3_go:bp_ora_dags/dag_plot_TumorVsNormal_up_go:bp.png


In [1]:
%pip install celltypegenomics

Collecting celltypegenomics
  Downloading celltypegenomics-0.0.8-py3-none-any.whl.metadata (3.1 kB)
Downloading celltypegenomics-0.0.8-py3-none-any.whl (601 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m601.6/601.6 kB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: celltypegenomics
Successfully installed celltypegenomics-0.0.8
Note: you may need to restart the kernel to use updated packages.


In [14]:
import celltypegenomics

with open('genelists/Genelist1.txt', 'r') as file:
    genelist = file.read().splitlines()

result = celltypegenomics.celltypefishertest(genelist)

result

Unnamed: 0,p_value,odds_ratio,count_in_both,count_in_genelist_not_cell_type,count_in_cell_type_not_genelist,count_in_neither,adjusted_p_value
Suprabasal keratinocytes,4.524017e-70,26.866238,78,122,464,19498,4.478776e-68
Basal keratinocytes,7.369658e-38,21.508184,43,157,251,19711,3.647981e-36
Squamous epithelial cells,1.2879480000000001e-23,11.342236,36,164,379,19583,4.250230000000001e-22
Serous glandular cells,1.44811e-18,13.182911,25,175,214,19748,3.5840730000000005e-17
Basal respiratory cells,2.150189e-18,11.382659,27,173,270,19692,4.257373e-17
Ionocytes,3.263099e-14,9.865123,22,178,247,19715,5.384114e-13
Club cells,2.788076e-10,9.90991,15,185,162,19800,3.943136e-09
Early spermatids,5.435286e-10,0.113205,4,196,3049,16913,6.726167e-09
granulocytes,1.288205e-08,2.895019,46,154,1867,18095,1.417026e-07
neutrophil,4.137123e-07,3.198935,29,171,1005,18957,4.095752e-06


In [19]:
csv = celltypegenomics.celltypefishertest(genelist, alpha=1)

csv.to_csv('Genelist1_results.csv')

In [24]:
result = celltypegenomics.celltypefishertest(genelist, heca=1, alpha=0.99)

result

Unnamed: 0,p_value,odds_ratio,count_in_both,count_in_genelist_not_cell_type,count_in_cell_type_not_genelist,count_in_neither,adjusted_p_value
Monocyte,0.00015,37.983503,3,197,8,19954,0.014516
Neutrophilic granulocyte,0.00021,15.648352,4,196,26,19936,0.014516
Tubule cell,0.019741,100.306533,1,199,1,19961,0.681078
Parietal cell,0.019741,100.306533,1,199,1,19961,0.681078
Perineural epithelial cell,0.048629,25.072864,1,199,4,19958,0.74564
Keratinocyte,0.039095,33.432161,1,199,3,19959,0.74564
Ductal cell,0.039095,33.432161,1,199,3,19959,0.74564
Epithelial cell,0.044631,6.291035,2,198,32,19930,0.74564
Muller cell,0.039095,33.432161,1,199,3,19959,0.74564


In [23]:
result = celltypegenomics.celltypefishertest(genelist, hpa_marker_genes=1, alpha=0.99)

result

Unnamed: 0,p_value,odds_ratio,count_in_both,count_in_genelist_not_cell_type,count_in_cell_type_not_genelist,count_in_neither,adjusted_p_value
Kupffer cells,0.000292,201.626263,2,198,1,19961,0.015758
Ito cells,0.029466,50.150754,1,199,2,19960,0.397793
Suprabasal keratinocytes,0.029466,50.150754,1,199,2,19960,0.397793
Muller glia cells,0.029466,50.150754,1,199,2,19960,0.397793


In [9]:
result = celltypegenomics.celltypefishertest(genelist, tissue=1)

result

Unnamed: 0,p_value,odds_ratio,count_in_both,count_in_genelist_not_cell_type,count_in_cell_type_not_genelist,count_in_neither,adjusted_p_value
esophagus,7.601492e-83,38.030381,81,119,351,19611,2.736537e-81
vagina,1.690853e-62,65.774834,49,151,98,19864,3.043535e-61
skin 1,8.633227000000001e-28,10.425535,46,154,556,19406,1.035987e-26
cervix,9.204009e-16,14.106838,20,180,156,19806,8.283608e-15
testis,3.222478e-08,0.045357,1,199,1991,17971,2.320184e-07
salivary gland,3.390757e-07,5.598009,15,185,285,19677,2.034454e-06
bone marrow,0.003423813,2.165297,19,181,923,19039,0.01760818


In [10]:
import celltypegenomics

with open('genelists/Genelist2.txt', 'r') as file:
    genelist = file.read().splitlines()

result = celltypegenomics.celltypefishertest(genelist)

result

Unnamed: 0,p_value,odds_ratio,count_in_both,count_in_genelist_not_cell_type,count_in_cell_type_not_genelist,count_in_neither,adjusted_p_value
Leydig cells,2.17885e-12,7.860914,22,178,309,19653,2.157062e-10
Fibroblasts,1.016129e-10,6.35203,22,178,381,19581,5.02984e-09
Astrocytes,1.924237e-09,3.505503,37,163,1214,18748,6.349983e-08
Peritubular cells,1.208348e-08,5.573777,19,181,369,19593,2.990661e-07
Oligodendrocyte precursor cells,2.324104e-07,2.812417,39,161,1583,18379,4.601726e-06
Basal prostatic cells,1.016557e-06,6.538088,12,188,193,19769,1.677319e-05
Spermatocytes,1.7136e-05,0.065917,1,199,1414,18548,0.0002423519
Smooth muscle cells,2.414927e-05,4.086826,14,186,361,19601,0.0002988473
Early spermatids,9.401559e-05,0.355167,12,188,3041,16921,0.001034171
Secretory cells,0.0001288004,5.815728,8,192,142,19820,0.001275124


In [11]:
result = celltypegenomics.celltypefishertest(genelist, heca=1)

result

Unnamed: 0,p_value,odds_ratio,count_in_both,count_in_genelist_not_cell_type,count_in_cell_type_not_genelist,count_in_neither,adjusted_p_value


In [12]:
result = celltypegenomics.celltypefishertest(genelist, hpa_marker_genes=1)

result

Unnamed: 0,p_value,odds_ratio,count_in_both,count_in_genelist_not_cell_type,count_in_cell_type_not_genelist,count_in_neither,adjusted_p_value


In [13]:
result = celltypegenomics.celltypefishertest(genelist, tissue=1)

result

Unnamed: 0,p_value,odds_ratio,count_in_both,count_in_genelist_not_cell_type,count_in_cell_type_not_genelist,count_in_neither,adjusted_p_value
skin 1,6.338412e-07,3.913509,21,179,581,19381,2.3e-05
testis,2.120921e-05,0.184515,4,196,1988,17974,0.000382
lymphoid tissue,0.0003244379,0.19199,3,197,1467,18495,0.003893
brain,0.002743082,1.808208,36,164,2161,17801,0.021491
skeletal muscle,0.00298485,2.218152,19,181,902,19060,0.021491
bone marrow,0.006665766,0.204406,2,198,940,19022,0.039995
