In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import json
import time
import requests, sys

#### STEP1: Target list

In [8]:
genes = ['ENSMUST00000000312',
         'ENSMUST00000018186',
         'ENSMUST00000027675',
         'ENSMUST00000030944',
         'ENSMUST00000037811',
         'ENSMUST00000073490',
         'ENSMUST00000099946',
         'ENSMUST00000113270',
         'ENSMUST00000171239'
        ]

#### STEP2: Retreive cDNA and CDS code from Ensembl

In [4]:
# Ensemble REST API
api_prefix = "https://rest.ensembl.org"

In [6]:
genedata = dict()
for gene in genes:
    genedata[gene] = dict()
    genesubdata = {
        'cdna': f'/sequence/id/{gene}?type=cdna',
        'cds': f'/sequence/id/{gene}?type=cds'
    }
    for k, v in genesubdata.items():
        r = requests.get(f'{api_prefix}{v}', headers={ "Content-Type" : "text/plain"}, verify=False)
        if not r.ok:
            print(f'error: {gene}/{k} - {r.raise_for_status()}')
            continue
        genedata[gene][k] = r.text
        print(f'{api_prefix}{v}')



https://rest.ensembl.org/sequence/id/ENSMUST00000000312?type=cdna




https://rest.ensembl.org/sequence/id/ENSMUST00000000312?type=cds


#### STEP3: Extract 3'UTR from cDNA and CDS

In [15]:
for k, v in genedata.items():
    cdna = genedata[k]['cdna']
    cds =  genedata[k]['cds']
    utr3 = cdna[cdna.find(cds) + len(cds):]
    genedata[k]['utr3'] = utr3

In [19]:
for k,v in genedata.items():
    print(f'>{k} utr3\n{genedata[k]["utr3"]}\n\n')

>ENSMUST00000000312 utr3
GGGACTAGCAAGTCTCCCCCGTGTGGCACCATGGGAGATGCAGAATAATTATATCAGTGGTCTTTCAGCTCCTTCCCTGAGTGTGTAGAAGAGAGACTGATCTGAGAAGTGTGCAGATTGCATAGTGGTCTCACTCTCCCTACTGGACTGTCTGTGTTAGGATGGTTTTCACTGATTGTTGAAATCTTTTTTTATTTTTTATTTTTACAGTGCTGAGATATAAACTGTGCCTTTTTTTGTTTGTTTGTTTCTGTTTTTGTTCTTTTGAGCTATGATCTGCCCCAGACACAACAGCCCCAAGCCCCTCACACCTCACTAATTTTTTACATTGTGTACTTGCCCTCAATTACCATGTTTGCTGTATTCTAATAGTCACTCATGTTCCTGAATTCTGTTGCCCTGCCCAGGTGATATTCTAGGATGCAGAAATGCCTGGGCCCTTTTATGGTGAGAGACAGGTATCTTGGTGTGGGTGCAACTGCGCTGGATAGTGTGTGTGTTCCCAAGATCTTTCGTGGTATTCCCTCTCCACCTCCAGAGAACTCATTTACAGTGGCATTCCTTGTTCGGCTATGTGTCTGGGGCAGAACAAAAAAAAGGGACCACTATGCATGCTGCACACGTCTCAGATTCTTAGGTACACACCTGATTCTTAGGTGCATGCCATAGTGGGATATGTTGCTTTGATCAGAACCTGCAGGGAGGTTTTCGGGCACCACTTAAGTTTCTTGGCGTTTCTTTCAAACCAAAACTAAAGAATGGTTGTTCTCTGAGAGAGACTGGAGTGCCACCACCAAAGACAGAGGAGAGAAAAGGAGAGAAACCAAACTTGGGGACAGCAACATCAGCGAACCCGGCTAGTTGGCACACCGATGGTGAGGGTACACAGGCGGTGAGACCTATCCCACAAGATTTCTGGAAGACTAGGCTTATCTCAACCAATGTTTTCTGGCTGGAATCTTTGTCCATGTATTC

In [4]:
import os

In [5]:
os.chdir('..')

In [6]:
from rnamotif.connectors.get_ensembl import get_ensembl_seq_by_gene

In [9]:
res = get_ensembl_seq_by_gene(genes,seq_type='cdna',is_ssl_verification=False)



In [10]:
res

{'ENSMUST00000000312ENSMUST00000018186': {'cdna': {'retsts': False,
   'retval': '{"error":"ID \'ENSMUST00000000312ENSMUST00000018186\' not found"}'}},
 'ENSMUST00000027675': {'cdna': {'retsts': True,
   'retval': 'AAAACAGTTTCAGTTTTGACAATAGTCACCAGTAGTGCCTTCCTGGAAGCTTCTAGAAGGGTACATACTGGCTTGCAGGCTGTGCCCGAAACTGGATCACCTGGAGAGAAGGAAGTAGCTAAAACATTCTCATACAAGAAGCCAACCTGAGCGGCACAGCCCCCCTGGAAGCCACAAGCAATGAGGCTCTACTTGTTCACGCTCTTGGTAACTGTCTTTTCAGGGGTCTCCACAAAAAGCCCCATATTTGGTCCCCAGGAGGTGAGTAGTATAGAAGGCGACTCTGTTTCCATCACGTGCTACTACCCAGACACCTCTGTCAACCGGCACACCCGGAAATACTGGTGCCGACAAGGAGCCAGCGGCATGTGCACAACGCTCATCTCTTCAAATGGCTACCTCTCCAAGGAGTATTCAGGCAGAGCCAACCTCATCAACTTCCCAGAGAACAACACATTTGTGATTAACATTGAGCAGCTCACCCAGGACGACACTGGGAGCTACAAGTGTGGCCTGGGTACCAGTAACCGAGGCCTGTCCTTCGATGTCAGCCTGGAGGTCAGCCAGGTTCCTGAGTTGCCGAGTGACACCCACGTCTACACAAAGGACATAGGCAGAAATGTGACCATTGAATGCCCTTTCAAAAGGGAGAATGCTCCCAGCAAGAAATCCCTGTGTAAGAAGACAAACCAGTCCTGCGAACTTGTCATTGACTCTACTGAGAAGGTGAACCCCAGCTATATAGGCAGAGCAAAACTTTTTATGAAAGGGACCGACCTAACTGT

In [11]:
res.keys()

dict_keys(['ENSMUST00000000312ENSMUST00000018186', 'ENSMUST00000027675', 'ENSMUST00000030944', 'ENSMUST00000037811', 'ENSMUST00000073490', 'ENSMUST00000099946', 'ENSMUST00000113270', 'ENSMUST00000171239'])