Esta é uma script em jupyter para fazer pequenas funções e depois passar para a script em python.

In [1]:
import os
import subprocess
from Bio import SeqIO, SeqFeature, Entrez, AlignIO, ExPASy, SwissProt
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.Blast import NCBIXML, NCBIWWW
from Bio.Align.Applications import ClustalwCommandline
from Bio.Phylo.TreeConstruction import DistanceTreeConstructor, DistanceCalculator
from Bio import Phylo


Due to the on going maintenance burden of keeping command line application
wrappers up to date, we have decided to deprecate and eventually remove these
modules.

We instead now recommend building your command line and invoking it directly
with the subprocess module.


### Análise da sequência e das features presentes no NCBI

Funtion to get gene genbank file

In [9]:
def get_seq(accession, filename, db, rettype, email="your_email@example.com", output_dir="output"):
    """
    Fetches a sequence from NCBI and saves it to a file in the specified output directory.
    
    Args:
        accession (str): NCBI Accession or ID.
        filename (str): File name to save the sequence.
        db (str): NCBI database to query (e.g., "nucleotide", "protein").
        rettype (str): Format of the returned sequence (e.g., "fasta", "gb").
        email (str): User's email for NCBI access.
        output_dir (str): Directory where the file will be saved.
    
    Returns:
        str: Full path to the saved file.
    """
    Entrez.email = email
    os.makedirs(output_dir, exist_ok=True)  # Ensure output directory exists
    output_path = os.path.join(output_dir, filename)  # Full path to save file

    try:
        print(f"Fetching data for accession: {accession} from {db} database...")
        handle = Entrez.efetch(db=db, id=accession, rettype=rettype, retmode="text")
        sequence_data = handle.read()
        handle.close()

        # Save the sequence to the output file
        with open(output_path, "w") as file:
            file.write(sequence_data)
        print(f"Sequence saved successfully to {output_path}.")
        return output_path

    except Exception as e:
        print(f"An error occurred: {e}")
        return None


In [13]:
get_seq("U10926.1", db="nucleotide",rettype="gb", filename = "comS.gb", output_dir="genes")

Fetching data for accession: U10926.1 from nucleotide database...
Sequence saved successfully to genes\comS.gb.


'genes\\comS.gb'

In [8]:
get_seq("AAA61567.1", filename = "comS_prot.fasta", output_dir="proteins",db="protein",rettype="fasta")

Fetching data for accession: AAA61567.1 from protein database...
Sequence saved successfully to proteins\comS_prot.fasta.


'proteins\\comS_prot.fasta'

Funtion to extract cds sequence

In [2]:
def extract_cds_from_genbank(genbank_file, filename="cds_sequences.fasta", output_dir="genes_cds"):
    """
    Extracts CDS sequences from a GenBank file and saves them as FASTA.
    
    Args:
        genbank_file (str): Path to the GenBank file.
        filename (str): Filename of the extracted CDS sequences in FASTA format.
        output_dir (str): Directory where the file will be saved.
    
    Returns:
        None
    """
    os.makedirs(output_dir, exist_ok=True)  # Create the directory if it doesn't exist
    output_path = os.path.join(output_dir, filename)  # Full path to the output file
    
    cds_count = 0
    with open(output_path, "w") as fasta_output:
        for record in SeqIO.parse(genbank_file, "genbank"):
            # Extract species name from the record annotations
            species_name = record.annotations.get("organism", "Unknown_species").replace(" ", "_")

            for feature in record.features:
                if feature.type == "CDS":
                    # Extract the CDS sequence
                    cds_seq = feature.extract(record.seq)
                    
                    # Get the gene ID from qualifiers
                    gene_id = feature.qualifiers.get("gene", ["Unknown_gene"])[0]
                    
                    # Write the CDS to the FASTA file
                    fasta_output.write(f">{species_name}|{gene_id}\n{cds_seq}\n")
                    cds_count += 1
    
    print(f"{cds_count} CDS sequences extracted and saved to {filename}.")

In [3]:
extract_cds_from_genbank("genes/comS.gb", "comS_cds.fasta")

1 CDS sequences extracted and saved to comS_cds.fasta.


Function to get the annotation from genbank file

In [50]:
def extract_annotations(filename):
    """
    Extracts annotations from a GenBank file.
    
    Args:
        filename (str): Path to the GenBank file.
    """
    try:
        record = SeqIO.read(filename, "genbank")
        print(f"Gene Description: {record.description}")
        print(f"Organism: {record.annotations.get('organism', 'Unknown')}")
        print(f"Annotations: {record.annotations}")

    except Exception as e:
        print(f"An error occurred: {e}")


In [51]:
extract_annotations("genes/comS.gb")

Gene Description: Bacillus subtilis 168 genetic competence regulation (comS) gene, complete cds
Organism: Bacillus subtilis subsp. subtilis str. 168
Annotations: {'molecule_type': 'DNA', 'topology': 'linear', 'data_file_division': 'BCT', 'date': '26-JAN-1995', 'accessions': ['U10926'], 'sequence_version': 1, 'keywords': [''], 'source': 'Bacillus subtilis subsp. subtilis str. 168', 'organism': 'Bacillus subtilis subsp. subtilis str. 168', 'taxonomy': ['Bacteria', 'Bacillati', 'Bacillota', 'Bacilli', 'Bacillales', 'Bacillaceae', 'Bacillus'], 'references': [Reference(title='Identification of comS, a gene of the srfA operon that regulates the establishment of genetic competence in Bacillus subtilis', ...), Reference(title="Nucleotide sequence of 5' portion of srfA that contains the region required for competence establishment in Bacillus subtilus", ...), Reference(title='Direct Submission', ...)]}


Funtion to analyse features and qualifiers

In [53]:
def analyze_features(filename):
    """
    Analyzes features and qualifiers in a GenBank file.
    
    Args:
        filename (str): Path to the GenBank file.
    """
    try:
        record = SeqIO.read(f"genes/{filename}", "genbank")
        print(f"Number of Features: {len(record.features)}")
        for feature in record.features:
            print(f"Type: {feature.type}")
            print(f"Location: {feature.location}")
            print(f"Qualifiers: {feature.qualifiers}")
            print("-" * 50)
            
    except Exception as e:
        print(f"An error occurred: {e}")


In [54]:
analyze_features("comS.gb")

Number of Features: 4
Type: source
Location: [0:570](+)
Qualifiers: {'organism': ['Bacillus subtilis subsp. subtilis str. 168'], 'mol_type': ['genomic DNA'], 'strain': ['168'], 'sub_species': ['subtilis'], 'type_material': ['type strain of Bacillus subtilis'], 'db_xref': ['taxon:224308'], 'map': ['30 degrees'], 'note': ['sequence similar to srfA2 gene, GenBank Accession Number X70356, and srfAB gene, GenBank Accession Number D13262']}
--------------------------------------------------
Type: regulatory
Location: [188:189](+)
Qualifiers: {'regulatory_class': ['ribosome_binding_site']}
--------------------------------------------------
Type: gene
Location: [210:351](+)
Qualifiers: {'gene': ['comS']}
--------------------------------------------------
Type: CDS
Location: [210:351](+)
Qualifiers: {'gene': ['comS'], 'function': ['regulation of genetic competence'], 'experiment': ['experimental evidence, no additional details recorded'], 'codon_start': ['1'], 'transl_table': ['11'], 'organism'

Funtion to extract external references

In [38]:
def extract_external_references(filename):
    """
    Extracts external database references from a GenBank file.
    
    Args:
        filename (str): Path to the GenBank file.
    """
    try:
        record = SeqIO.read(filename, "genbank")
        external_refs = []

        for feature in record.features:
            if "db_xref" in feature.qualifiers:
                external_refs.extend(feature.qualifiers["db_xref"])
        print("External References:")

        for ref in set(external_refs):
            print(ref)
            
    except Exception as e:
        print(f"An error occurred: {e}")



In [56]:
extract_external_references("genes/comS.gb")

External References:
taxon:224308


### Análise de homologias por BLAST

Função para fazer um NCBI blast apartir de um file (pode-se escolher o tipo de blast, database e outros parâmetros)

In [39]:
def blast(file_name, file_format = "fasta", program = "blastn", database = "nt", e_value = 0.05, hitlist_size = 100):
    """
    Performs a BLAST search for a sequence contained in a file.
    
    Parameters:
        file_name (str): Name of the file containing the sequence.
        file_format (str): Format of the file (default: 'fasta').
        program (str): BLAST program to use (default: 'blastn').
        database (str): Database to search (default: 'nt').
        e_value (float): E-value threshold for the search (default: 0.05).
        hitlist_size (int): Number of hits to return (default: 100).
    
    Returns:
        A handle with the BLAST results, or None in case of an error.
    """

    try:
        # Open the sequence file and read it using Biopython
        record = SeqIO.read(open(file_name), format=file_format)

        print("BLASTing...")
        # Perform the BLAST search using NCBIWWW.qblast
        result_handle = NCBIWWW.qblast(program, database, record.format("fasta"), expect=e_value, hitlist_size=hitlist_size)

        # Print success message and return the BLAST results handle
        print("BLAST completed successfully.")
        return result_handle

    except Exception as e:
        # Catch any exceptions and print an error message
        print(f"Error running BLAST: {e}")
        return None

Função que utiliza função blast, mas guardar o output num file.

In [15]:
def get_blast(file_name, output_name="blast_result", output_dir="blast_output", file_format="fasta", program="blastn", database="nt", e_value=0.01, hitlist_size=100):
    """
    Performs a BLAST search using the `blast` function and saves the results to a file.
    
    Parameters:
        file_name (str): Name of the file containing the sequence.
        output_name (str): Name of the file to save the results.
        Other parameters are the same as those in the `blast` function.
    """
    os.makedirs(output_dir, exist_ok=True)  # Create the directory if it doesn't exist
    output_path = os.path.join(output_dir, output_name)  # Full path to the output file

    result_handle = blast(file_name, file_format, program, database, e_value, hitlist_size)

    if result_handle is not None:
        try:
            # Save the results to an XML file
            with open(f"{output_path}.xml", "w") as save_file:
                save_file.write(result_handle.read())
                print(f"Results saved to {output_name}.xml.")

        except Exception as e:
            print(f"Error saving the results: {e}")

        finally:
            result_handle.close()

    

Teste da função *get_blast* 

In [20]:
get_blast("genes_cds/comS_cds.fasta", output_name ="comS_cds", hitlist_size=10000)

BLASTing...
BLAST concluído com sucesso.
Resultados salvos em comS_cds.xml.


In [34]:
get_blast("proteins/comS_prot.fasta", output_name ="comS_protein",program = "blastp", database = "nr", hitlist_size=10000)

BLASTing...
BLAST concluído com sucesso.
Resultados salvos em comS_protein.xml.


Function to parse a blast result

In [5]:
def parse_blast_results(file_name, exclude=None, e_value_thresh=0.05, identity_thresh=90):
    """
    Parses BLAST results and extracts significant hits, excluding specific species or TaxIDs.
    
    Args:
        file_name (str): Path to the BLAST result file in XML format.
        exclude_taxid (int or None): TaxID of the species to exclude.
        exclude_species (str or None): Species name to exclude (e.g., "Bacillus subtilis").
        e_value_thresh (float): Threshold for E-value significance.
        identity_thresh (float): Minimum percentage identity for significant hits.
    
    Returns:
        list: A list of dictionaries containing significant hit information.
    """
    from Bio.Blast import NCBIXML

    with open(file_name) as result_handle:
        blast_record = NCBIXML.read(result_handle)
    
    significant_hits = []
    for alignment in blast_record.alignments:
        EXC = False
        if exclude:
            for _ in exclude:
                if _.lower() in alignment.title.lower():
                    EXC = True
                    break  # Exit the loop once a match is found
        
        if not EXC:
            for hsp in alignment.hsps:
                if hsp.expect < e_value_thresh and (hsp.identities / hsp.align_length) * 100 > identity_thresh:
                    hit_info = {
                        "title": alignment.title,
                        "length": alignment.length,
                        "e_value": hsp.expect,
                        "identity": (hsp.identities / hsp.align_length) * 100,
                        "alignment_length": hsp.align_length
                    }
                    significant_hits.append(hit_info)
                    
    
    return significant_hits

In [32]:
parse_blast_results("blast_output/comS_cds.xml", exclude=["subtilis","chromosome","complete", "genome"], e_value_thresh=0.05, identity_thresh=70)

[{'title': 'gi|1257575185|dbj|LC171348.1| Bacillus sp. FW1 genes for pyrene metabolism, contig_7',
  'length': 455392,
  'e_value': 5.1292e-61,
  'identity': 98.58156028368793,
  'alignment_length': 141},
 {'title': 'gi|1757450143|gb|MK570508.1| Bacillus amyloliquefaciens strain TSBSO3.8 surfactin gene region',
  'length': 65411,
  'e_value': 7.136e-34,
  'identity': 84.39716312056737,
  'alignment_length': 141},
 {'title': 'gi|1757450197|gb|MK570509.1| Bacillus amyloliquefaciens strain H2O-1 surfactin gene region',
  'length': 65415,
  'e_value': 7.136e-34,
  'identity': 84.39716312056737,
  'alignment_length': 141},
 {'title': 'gi|2784321086|gb|CP165606.1| Bacillus velezensis strain BP1 plasmid unnamed',
  'length': 4056860,
  'e_value': 3.0343e-32,
  'identity': 83.68794326241135,
  'alignment_length': 141},
 {'title': 'gi|42820782|emb|AJ575642.1| Bacillus amyloliquefaciens yciC gene, yx01 gene, yckc gene, yckD gene, yckE gene, nin gene, nuc gene, hxlB gene, hxlA gene, hxlR gene, xy

In [6]:
parse_blast_results("blast_output/comS_protein.xml", exclude=["subtilis"], e_value_thresh=0.05, identity_thresh=70)

[{'title': 'ref|WP_231516564.1| competence protein ComS [Bacillus sp. MBGLi79]',
  'length': 46,
  'e_value': 8.88301e-25,
  'identity': 97.82608695652173,
  'alignment_length': 46},
 {'title': 'ref|WP_013307914.1| MULTISPECIES: competence protein ComS [Bacillus] >gb|ADM36419.1| regulator of genetic competence [Bacillus spizizenii str. W23] >gb|MCI3986321.1| competence protein ComS [Bacillus vallismortis] >gb|MCI4136758.1| competence protein ComS [Bacillus vallismortis] >gb|MCM3415811.1| competence protein ComS [Bacillus spizizenii] >gb|MCR4391170.1| competence protein ComS [Bacillus spizizenii]',
  'length': 46,
  'e_value': 1.47269e-24,
  'identity': 97.82608695652173,
  'alignment_length': 46},
 {'title': 'ref|WP_236827566.1| MULTISPECIES: competence protein ComS [Bacillus] >gb|MCI4169681.1| competence protein ComS [Bacillus spizizenii] >gb|MED0869893.1| competence protein ComS [Bacillus spizizenii] >gb|MED1072999.1| competence protein ComS [Bacillus spizizenii] >gb|MFD3202642.1| co

### Alinhamento múltiplo e filogenia

Função de extract de sequências

In [25]:
def extract_sequences_from_blast(file_name, query_fasta, exclude=None, e_value_thresh=0.05, identity_thresh=90, n_seq=20, coverage_thresh=0.7):
    """
    Extracts sequences from BLAST results for alignment, using a query FASTA file.
    
    Args:
        file_name (str): Path to the BLAST XML result file.
        query_fasta (str): Path to the query sequence FASTA file.
        exclude (list): List of species or keywords to exclude.
        e_value_thresh (float): Threshold for E-value significance.
        identity_thresh (float): Minimum percentage identity for significant hits.
        n_seq (int): Maximum number of sequences to extract.
        coverage_thresh (float): Minimum fraction of query sequence covered by the alignment (0 to 1).
    
    Returns:
        dict: Dictionary with sequence titles as keys and sequences as values, including the query sequence.
    """
    # Read the query sequence from the FASTA file
    with open(query_fasta, "r") as fasta_handle:
        query_record = SeqIO.read(fasta_handle, "fasta")
        query_sequence = str(query_record.seq)
    
    if not query_sequence:
        raise ValueError("Query sequence could not be read from the FASTA file.")
    
    # Initialize the dictionary with the query sequence
    sequences = {"Query": query_sequence}
    added_species = set()  # To track species that are already added

    # Parse the BLAST XML file
    with open(file_name) as result_handle:
        blast_record = NCBIXML.read(result_handle)

    query_length = len(query_sequence)  # Length of the query sequence
    
    for alignment in blast_record.alignments:
        # Check exclusion criteria
        if exclude and any(term.lower() in alignment.title.lower() for term in exclude):
            continue  # Skip this alignment if it matches exclusion criteria
        
        for hsp in alignment.hsps:
            # Check E-value, identity, and coverage thresholds
            if hsp.expect < e_value_thresh and (hsp.identities / hsp.align_length) * 100 > identity_thresh:
                # Calculate coverage based on query_start and query_end
                coverage = (hsp.query_end - hsp.query_start + 1) / query_length  # Coverage as fraction of query length
                
                if coverage >= coverage_thresh:  # Check if the coverage meets the threshold
                    # Extract species name from the title
                    title_parts = alignment.title.split("|")[-1]  # Use the last part of the title
                    species_name = title_parts.split("[")[-1].split("]")[0]  # Extract text within brackets
                    # Only keep the first two words and replace the space with an underscore
                    formatted_species_name = "_".join(species_name.split()[:2])

                    # Add the second-to-last part of the title to the header (e.g., the accession code)
                    sequence_header = f"{formatted_species_name}|{alignment.title.split('|')[-2]}"  # Adding accession or ID code
                    
                    # Add the sequence only if the species has not been added yet
                    if formatted_species_name not in added_species:
                        sequences[sequence_header] = hsp.sbjct
                        added_species.add(formatted_species_name)  # Mark the species as added
                        
                        if len(sequences) >= n_seq:  # Stop if desired number of sequences is reached
                            return sequences
    
    return sequences


In [26]:
sequences_cds=extract_sequences_from_blast("blast_output/comS_cds.xml", query_fasta="genes_cds/comS_cds.fasta",
                                        exclude=["subtilis"], e_value_thresh=0.05, identity_thresh=90, coverage_thresh=0.7)

In [31]:
sequences_prot=extract_sequences_from_blast("blast_output/comS_protein.xml", query_fasta="proteins/comS_prot.fasta",
                                        exclude=["subtilis"], e_value_thresh=0.05, identity_thresh=90, coverage_thresh=0.7)

In [28]:
def save_to_fasta(sequences, output_file="sequences.fasta", output_dir="homologs"):
    """
    Saves sequences to a FASTA file, with species names as headers.
    
    Args:
        sequences (dict): Dictionary of sequences with species names as keys.
        output_file (str): Name of the FASTA file.
        output_dir (str): Directory to save the FASTA file.
    """
    os.makedirs(output_dir, exist_ok=True)  # Create the directory if it doesn't exist
    output_path = os.path.join(output_dir, output_file)  # Full path to the output file

    # Create SeqRecord objects with species names as IDs
    records = [SeqRecord(Seq(seq), id=species, description="") for species, seq in sequences.items()]
    SeqIO.write(records, output_path, "fasta")
    print(f"Sequences saved to {output_path}.")

In [29]:
save_to_fasta(sequences_cds, "comS_cds_homologs.fasta")

Sequences saved to homologs\comS_cds_homologs.fasta.


In [30]:
save_to_fasta(sequences_prot, "comS_prot_homologs.fasta")

Sequences saved to homologs\comS_prot_homologs.fasta.


In [55]:
def run_clustalO(input_file, output_file="aligned_sequences.aln", output_dir="clustal_output"):
    """
    Runs Clustal Omega for multiple sequence alignment.
    
    Args:
        input_file (str): Path to the input FASTA file.
        output_file (str): Name of the output alignment file.
        output_dir (str): Directory to save the alignment result.
    
    Returns:
        str: Full path to the output alignment file.
    """
    os.makedirs(output_dir, exist_ok=True)  # Create the directory if it doesn't exist
    output_path = os.path.join(output_dir, output_file)  # Full path to the output file

    clustalo_exe = "clustal-omega-1.2.2-win64\\clustalo"  # Path to Clustal Omega executable

    # Command to run Clustal Omega
    command = [
        clustalo_exe,
        "--infile", input_file,
        "--outfile", output_path,
        "--outfmt", "clu",  # Output format: Clustal
        "--force"  # Overwrite existing files if necessary
    ]

    try:
        # Run the command
        result = subprocess.run(command, capture_output=True, text=True, check=True)
        print("Clustal Omega alignment completed successfully.")
        print(result.stdout)
        return output_path
    except subprocess.CalledProcessError as e:
        print("Error during Clustal Omega execution:")
        print(e.stderr)
        raise



In [56]:
aligned_file = run_clustalO(input_file="homologs/comS_homologs.fasta", output_file="comS.aln", output_dir="clustal_output")

Clustal Omega alignment completed successfully.



Função para costrução de árvores apartir do output do clustalOmega

In [57]:
def build_trees(file_name, file_type, matrix="blosum62", output_dir="tree_output"):
    """
    Builds phylogenetic trees (UPGMA and NJ) from a sequence alignment file.
    
    Args:
        file_name (str): Path to the input alignment file.
        file_type (str): Format of the input alignment file (e.g., "clustal", "fasta").
        matrix (str): Scoring matrix for distance calculation (default: "blosum62").
        output_dir (str): Directory to save intermediate files and outputs (default: "tree_output").
    
    Returns:
        None: Displays the trees as ASCII diagrams in the console.
    """
    # Ensure the output directory exists
    os.makedirs(output_dir, exist_ok=True)

    try:
        # Read the alignment file
        alignment = AlignIO.read(file_name, file_type)
        
        # Save the alignment in Stockholm format for reference
        stockholm_file = os.path.join(output_dir, "alignment.sth")
        AlignIO.write(alignment, stockholm_file, "stockholm")
        print(f"Alignment saved in Stockholm format at: {stockholm_file}")

        # Calculate pairwise distances
        calculator = DistanceCalculator(matrix)
        dm = calculator.get_distance(alignment)
        
        # Construct trees using UPGMA and NJ methods
        constructor = DistanceTreeConstructor()
        upgma_tree = constructor.upgma(dm)
        nj_tree = constructor.nj(dm)

        # Display trees as ASCII
        print("Neighbor-Joining Tree:")
        Phylo.draw_ascii(nj_tree, column_width=120)
        print("-" * 50)
        print("UPGMA Tree:")
        Phylo.draw_ascii(upgma_tree, column_width=120)

    except Exception as e:
        print(f"An error occurred: {e}")

In [58]:
build_trees(
    file_name="clustal_output/comS.aln",
    file_type="clustal",
    matrix="blosum62",
    output_dir="tree_output"
)

Alignment saved in Stockholm format at: tree_output\alignment.sth
Neighbor-Joining Tree:
                         ___ Query
  ______________________|
 |                      |__ Bacillus_sp.
 |
_|                                    , Bacillus_velezensis
 |____________________________________|
 |                                    | Bacillus_amyloliquefaciens
 |
 |_________________________________________________________________________________________ Bacillus_licheniformis

--------------------------------------------------
UPGMA Tree:
  ____________________________________________________________ Bacillus_licheniformis
 |
_|                                                                                         , Bacillus_velezensis
 |                                                         ________________________________|
 |                                                        |                                | Bacillus_amyloliquefaciens
 |________________________________________

Função para ver a regulação

In [97]:
def get_regulatory_proteins(gene_id):
    """
    Fetches regulatory annotations for a gene from UniProt.
    
    Args:
        gene_id (str): UniProt ID of the gene.
    
    Returns:
        list: List of regulatory interactions.
    """
    handle = ExPASy.get_sprot_raw(gene_id)
    record = SwissProt.read(handle)
    regulatory_info = []
    for comment in record.comments:
        if "regulation" in comment.lower():
            regulatory_info.append(comment)
    if not regulatory_info:
        return "Regulatory Info not found"
    return regulatory_info

In [98]:
gene_id = "P80355"  # Example UniProt ID
regulation_data = get_regulatory_proteins(gene_id)
print(regulation_data)

Regulatory Info not found


Função para identificar variantes

In [72]:
def fetch_variants_from_dbSNP(gene_id):
    """
    Fetches variant information for a gene from dbSNP via NCBI Entrez.
    
    Args:
        gene_id (str): The Gene ID of the gene of interest.
    
    Returns:
        str: Raw data from dbSNP related to the gene.
    """
    Entrez.email = "your_email@example.com"
    handle = Entrez.esearch(db="snp", term=f"{gene_id}[Gene ID]")
    record = Entrez.read(handle)
    handle.close()

    # Fetch variant data
    snp_ids = record["IdList"]
    if not snp_ids:
        return "No variants found for this gene."
    
    handle = Entrez.efetch(db="snp", id=",".join(snp_ids), rettype="xml", retmode="text")
    data = handle.read()
    handle.close()
    return data

In [74]:
gene_id = "938310" 
variants = fetch_variants_from_dbSNP(gene_id)
print(variants)

No variants found for this gene.
