In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import json
import time
import requests, sys

#### STEP1: Target list

In [3]:
genes = ['ENSMUST00000000312',
         'ENSMUST00000018186',
         'ENSMUST00000027675',
         'ENSMUST00000030944',
         'ENSMUST00000037811',
         'ENSMUST00000073490',
         'ENSMUST00000099946',
         'ENSMUST00000113270',
         'ENSMUST00000171239'
        ]

#### STEP2: Retreive cDNA and CDS code from Ensembl

In [4]:
# Ensemble REST API
api_prefix = "https://rest.ensembl.org"

In [5]:
genedata = dict()
for gene in genes:
    genedata[gene] = dict()
    genesubdata = {
        'cdna': f'/sequence/id/{gene}?type=cdna',
        'cds': f'/sequence/id/{gene}?type=cds'
    }
    for k, v in genesubdata.items():
        r = requests.get(f'{api_prefix}{v}', headers={ "Content-Type" : "text/plain"})
        if not r.ok:
            print(f'error: {gene}/{k} - {r.raise_for_status()}')
            continue
        genedata[gene][k] = r.text
        print(f'{api_prefix}{v}')

https://rest.ensembl.org/sequence/id/ENSMUST00000000312?type=cdna
https://rest.ensembl.org/sequence/id/ENSMUST00000000312?type=cds
https://rest.ensembl.org/sequence/id/ENSMUST00000018186?type=cdna
https://rest.ensembl.org/sequence/id/ENSMUST00000018186?type=cds
https://rest.ensembl.org/sequence/id/ENSMUST00000027675?type=cdna
https://rest.ensembl.org/sequence/id/ENSMUST00000027675?type=cds
https://rest.ensembl.org/sequence/id/ENSMUST00000030944?type=cdna
https://rest.ensembl.org/sequence/id/ENSMUST00000030944?type=cds
https://rest.ensembl.org/sequence/id/ENSMUST00000037811?type=cdna
https://rest.ensembl.org/sequence/id/ENSMUST00000037811?type=cds
https://rest.ensembl.org/sequence/id/ENSMUST00000073490?type=cdna
https://rest.ensembl.org/sequence/id/ENSMUST00000073490?type=cds
https://rest.ensembl.org/sequence/id/ENSMUST00000099946?type=cdna
https://rest.ensembl.org/sequence/id/ENSMUST00000099946?type=cds
https://rest.ensembl.org/sequence/id/ENSMUST00000113270?type=cdna
https://rest.ense

#### STEP3: Extract 3'UTR from cDNA and CDS

In [6]:
for k, v in genedata.items():
    cdna = genedata[k]['cdna']
    cds =  genedata[k]['cds']
    utr3 = cdna[cdna.find(cds) + len(cds):]
    genedata[k]['utr3'] = utr3

In [8]:
for k,v in genedata.items():
    print(f'>{k} utr3\n{genedata[k]["utr3"]}')

>ENSMUST00000000312 utr3
GGGACTAGCAAGTCTCCCCCGTGTGGCACCATGGGAGATGCAGAATAATTATATCAGTGGTCTTTCAGCTCCTTCCCTGAGTGTGTAGAAGAGAGACTGATCTGAGAAGTGTGCAGATTGCATAGTGGTCTCACTCTCCCTACTGGACTGTCTGTGTTAGGATGGTTTTCACTGATTGTTGAAATCTTTTTTTATTTTTTATTTTTACAGTGCTGAGATATAAACTGTGCCTTTTTTTGTTTGTTTGTTTCTGTTTTTGTTCTTTTGAGCTATGATCTGCCCCAGACACAACAGCCCCAAGCCCCTCACACCTCACTAATTTTTTACATTGTGTACTTGCCCTCAATTACCATGTTTGCTGTATTCTAATAGTCACTCATGTTCCTGAATTCTGTTGCCCTGCCCAGGTGATATTCTAGGATGCAGAAATGCCTGGGCCCTTTTATGGTGAGAGACAGGTATCTTGGTGTGGGTGCAACTGCGCTGGATAGTGTGTGTGTTCCCAAGATCTTTCGTGGTATTCCCTCTCCACCTCCAGAGAACTCATTTACAGTGGCATTCCTTGTTCGGCTATGTGTCTGGGGCAGAACAAAAAAAAGGGACCACTATGCATGCTGCACACGTCTCAGATTCTTAGGTACACACCTGATTCTTAGGTGCATGCCATAGTGGGATATGTTGCTTTGATCAGAACCTGCAGGGAGGTTTTCGGGCACCACTTAAGTTTCTTGGCGTTTCTTTCAAACCAAAACTAAAGAATGGTTGTTCTCTGAGAGAGACTGGAGTGCCACCACCAAAGACAGAGGAGAGAAAAGGAGAGAAACCAAACTTGGGGACAGCAACATCAGCGAACCCGGCTAGTTGGCACACCGATGGTGAGGGTACACAGGCGGTGAGACCTATCCCACAAGATTTCTGGAAGACTAGGCTTATCTCAACCAATGTTTTCTGGCTGGAATCTTTGTCCATGTATTC