# Notebook for R (Biomart Homolog download)

# R Notebook

In [None]:
format(Sys.Date(), "%d-%b-%Y")

In [None]:
library(biomaRt)

In [None]:
# Function to retrieve orthologs for different species
get_species_orthologs <- function(species_name) {
  # Connect to Ensembl BioMart
  ensembl <- useMart("ensembl", dataset = "hsapiens_gene_ensembl")

  # Retrieve all human genes with their Ensembl IDs and HGNC symbols
    human_genes <- getBM(
        attributes = c("hgnc_id", "hgnc_symbol", "ensembl_gene_id"),
        mart = ensembl
    )
  
  # Define species-specific attributes dynamically based on species name
  species_column <- paste0(species_name, "_homolog_ensembl_gene")
  species_gene_name <- paste0(species_name, "_homolog_associated_gene_name")
  
  # Get orthologs for the specified species
  orthologs <- getBM(
    attributes = c("ensembl_gene_id", 
                   species_column, 
                   species_gene_name),
    mart = ensembl
  )
  final_result <- merge(human_genes, orthologs, by = "ensembl_gene_id", all.x = TRUE)
  readr::write_csv(final_result, paste0("data/",species_name, "_ID_biomart.csv"))
  return(final_result)
}

In [None]:
# Chimp
get_species_orthologs("ptroglodytes")

In [None]:
# Chicken (Gallus gallus)
chicken_orthologs <- get_species_orthologs("ggallus")

In [None]:
# Pig (Sus scrofa)
pig_orthologs <- get_species_orthologs("sscrofa")

In [None]:
# Cow (Bos taurus)
cow_orthologs <- get_species_orthologs("btaurus")

In [None]:
# Dog (Canis lupus familiaris)
dog_orthologs <- get_species_orthologs("clfamiliaris")

In [None]:
# Horse (Equus caballus)
horse_orthologs <- get_species_orthologs("ecaballus")

In [None]:
# Sheep (Ovis aries rambouillet)
sheep_orthologs <- get_species_orthologs("oarambouillet")

In [None]:
# Mouse (Mus musculus) # test
mouse_orthologs <- get_species_orthologs("mmusculus")

In [None]:
# Rat (Rattus norvegicus) # test
rat_orthologs <- get_species_orthologs("rnorvegicus")

In [None]:
# Zebrafish (Danio rerio) # test
rat_orthologs <- get_species_orthologs("drerio")

# Check homologs available

In [None]:
ensembl_human <- useMart("ensembl", dataset ="hsapiens_gene_ensembl")
attributes_human <- listAttributes(ensembl_human)

In [None]:
homologs_available <- attributes_human[grep("homolog_ensembl_gene", attributes_human$name), ]

In [None]:
readr::write_csv(homologs_available, paste0("data/","human_homologs", "_biomart.csv"))