In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import json
import time
import requests, sys

#### STEP1: Target list

In [3]:
genes = ['ENSMUST00000000312',
         'ENSMUST00000018186',
         'ENSMUST00000027675',
         'ENSMUST00000030944',
         'ENSMUST00000037811',
         'ENSMUST00000073490',
         'ENSMUST00000099946',
         'ENSMUST00000113270',
         'ENSMUST00000171239'
        ]

#### STEP2: Retreive cDNA and CDS code from Ensembl

In [4]:
# Ensemble REST API
api_prefix = "https://rest.ensembl.org"

In [6]:
genedata = dict()
for gene in genes:
    genedata[gene] = dict()
    genesubdata = {
        'cdna': f'/sequence/id/{gene}?type=cdna',
        'cds': f'/sequence/id/{gene}?type=cds'
    }
    for k, v in genesubdata.items():
        r = requests.get(f'{api_prefix}{v}', headers={ "Content-Type" : "text/plain"})
        if not r.ok:
            print(f'error: {gene}/{k} - {r.raise_for_status()}')
            continue
        genedata[gene][k] = r.text
        print(f'{api_prefix}{v}')



https://rest.ensembl.org/sequence/id/ENSMUST00000000312?type=cdna




https://rest.ensembl.org/sequence/id/ENSMUST00000000312?type=cds


#### STEP3: Extract 3'UTR from cDNA and CDS

In [15]:
for k, v in genedata.items():
    cdna = genedata[k]['cdna']
    cds =  genedata[k]['cds']
    utr3 = cdna[cdna.find(cds) + len(cds):]
    genedata[k]['utr3'] = utr3

In [19]:
for k,v in genedata.items():
    print(f'>{k} utr3\n{genedata[k]["utr3"]}\n\n')

>ENSMUST00000000312 utr3
GGGACTAGCAAGTCTCCCCCGTGTGGCACCATGGGAGATGCAGAATAATTATATCAGTGGTCTTTCAGCTCCTTCCCTGAGTGTGTAGAAGAGAGACTGATCTGAGAAGTGTGCAGATTGCATAGTGGTCTCACTCTCCCTACTGGACTGTCTGTGTTAGGATGGTTTTCACTGATTGTTGAAATCTTTTTTTATTTTTTATTTTTACAGTGCTGAGATATAAACTGTGCCTTTTTTTGTTTGTTTGTTTCTGTTTTTGTTCTTTTGAGCTATGATCTGCCCCAGACACAACAGCCCCAAGCCCCTCACACCTCACTAATTTTTTACATTGTGTACTTGCCCTCAATTACCATGTTTGCTGTATTCTAATAGTCACTCATGTTCCTGAATTCTGTTGCCCTGCCCAGGTGATATTCTAGGATGCAGAAATGCCTGGGCCCTTTTATGGTGAGAGACAGGTATCTTGGTGTGGGTGCAACTGCGCTGGATAGTGTGTGTGTTCCCAAGATCTTTCGTGGTATTCCCTCTCCACCTCCAGAGAACTCATTTACAGTGGCATTCCTTGTTCGGCTATGTGTCTGGGGCAGAACAAAAAAAAGGGACCACTATGCATGCTGCACACGTCTCAGATTCTTAGGTACACACCTGATTCTTAGGTGCATGCCATAGTGGGATATGTTGCTTTGATCAGAACCTGCAGGGAGGTTTTCGGGCACCACTTAAGTTTCTTGGCGTTTCTTTCAAACCAAAACTAAAGAATGGTTGTTCTCTGAGAGAGACTGGAGTGCCACCACCAAAGACAGAGGAGAGAAAAGGAGAGAAACCAAACTTGGGGACAGCAACATCAGCGAACCCGGCTAGTTGGCACACCGATGGTGAGGGTACACAGGCGGTGAGACCTATCCCACAAGATTTCTGGAAGACTAGGCTTATCTCAACCAATGTTTTCTGGCTGGAATCTTTGTCCATGTATTC

In [3]:
import os

In [4]:
os.chdir('..')

In [5]:
import pandas as pd
from rnamotif.connectors.get_ensembl import get_ensembl_seq_by_gene
from rnamotif.utility.ensembl_rna_handler import save_one_fasta_from_ensembl_api_return

In [6]:
df_s1 = pd.read_excel(r'E:/Users/byeungchun/Downloads/aan2399_table_S1.xlsx')

In [7]:
lst_gene = list(df_s1['target_id'])

In [8]:
res = get_ensembl_seq_by_gene(lst_gene,seq_type='cdna')

0 - https://rest.ensembl.org/sequence/id/ENSMUST00000087033?type=cdna
1 - https://rest.ensembl.org/sequence/id/ENSMUST00000178282?type=cdna
2 - https://rest.ensembl.org/sequence/id/ENSMUST00000029805?type=cdna
3 - https://rest.ensembl.org/sequence/id/ENSMUST00000103410?type=cdna
4 - https://rest.ensembl.org/sequence/id/ENSMUST00000073490?type=cdna
5 - https://rest.ensembl.org/sequence/id/ENSMUST00000206682?type=cdna
6 - https://rest.ensembl.org/sequence/id/ENSMUST00000029658?type=cdna
7 - https://rest.ensembl.org/sequence/id/ENSMUST00000029871?type=cdna
8 - https://rest.ensembl.org/sequence/id/ENSMUST00000082235?type=cdna
9 - https://rest.ensembl.org/sequence/id/ENSMUST00000175849?type=cdna
10 - https://rest.ensembl.org/sequence/id/ENSMUST00000109410?type=cdna
11 - https://rest.ensembl.org/sequence/id/ENSMUST00000166620?type=cdna
12 - https://rest.ensembl.org/sequence/id/ENSMUST00000113270?type=cdna
13 - https://rest.ensembl.org/sequence/id/ENSMUST00000178641?type=cdna
14 - https://res

117 - https://rest.ensembl.org/sequence/id/ENSMUST00000143083?type=cdna
118 - https://rest.ensembl.org/sequence/id/ENSMUST00000107392?type=cdna
119 - https://rest.ensembl.org/sequence/id/ENSMUST00000015278?type=cdna
120 - https://rest.ensembl.org/sequence/id/ENSMUST00000177601?type=cdna
121 - https://rest.ensembl.org/sequence/id/ENSMUST00000109542?type=cdna
122 - https://rest.ensembl.org/sequence/id/ENSMUST00000025161?type=cdna
123 - https://rest.ensembl.org/sequence/id/ENSMUST00000101295?type=cdna
124 - https://rest.ensembl.org/sequence/id/ENSMUST00000197072?type=cdna
125 - https://rest.ensembl.org/sequence/id/ENSMUST00000143074?type=cdna
126 - https://rest.ensembl.org/sequence/id/ENSMUST00000106899?type=cdna
127 - https://rest.ensembl.org/sequence/id/ENSMUST00000079957?type=cdna
128 - https://rest.ensembl.org/sequence/id/ENSMUST00000034588?type=cdna
129 - https://rest.ensembl.org/sequence/id/ENSMUST00000033824?type=cdna
130 - https://rest.ensembl.org/sequence/id/ENSMUST00000033169?ty

231 - https://rest.ensembl.org/sequence/id/ENSMUST00000110391?type=cdna
232 - https://rest.ensembl.org/sequence/id/ENSMUST00000194953?type=cdna
233 - https://rest.ensembl.org/sequence/id/ENSMUST00000072061?type=cdna
234 - https://rest.ensembl.org/sequence/id/ENSMUST00000172321?type=cdna
235 - https://rest.ensembl.org/sequence/id/ENSMUST00000087883?type=cdna
236 - https://rest.ensembl.org/sequence/id/ENSMUST00000138367?type=cdna
237 - https://rest.ensembl.org/sequence/id/ENSMUST00000025083?type=cdna
238 - https://rest.ensembl.org/sequence/id/ENSMUST00000080654?type=cdna
239 - https://rest.ensembl.org/sequence/id/ENSMUST00000171797?type=cdna
240 - https://rest.ensembl.org/sequence/id/ENSMUST00000200823?type=cdna
241 - https://rest.ensembl.org/sequence/id/ENSMUST00000048050?type=cdna
242 - https://rest.ensembl.org/sequence/id/ENSMUST00000017851?type=cdna
243 - https://rest.ensembl.org/sequence/id/ENSMUST00000034537?type=cdna
244 - https://rest.ensembl.org/sequence/id/ENSMUST00000110855?ty

345 - https://rest.ensembl.org/sequence/id/ENSMUST00000173665?type=cdna
346 - https://rest.ensembl.org/sequence/id/ENSMUST00000135569?type=cdna
347 - https://rest.ensembl.org/sequence/id/ENSMUST00000090195?type=cdna
348 - https://rest.ensembl.org/sequence/id/ENSMUST00000015894?type=cdna
349 - https://rest.ensembl.org/sequence/id/ENSMUST00000178477?type=cdna
350 - https://rest.ensembl.org/sequence/id/ENSMUST00000151252?type=cdna
351 - https://rest.ensembl.org/sequence/id/ENSMUST00000165532?type=cdna
352 - https://rest.ensembl.org/sequence/id/ENSMUST00000032073?type=cdna
353 - https://rest.ensembl.org/sequence/id/ENSMUST00000208773?type=cdna
354 - https://rest.ensembl.org/sequence/id/ENSMUST00000025844?type=cdna
355 - https://rest.ensembl.org/sequence/id/ENSMUST00000024944?type=cdna
356 - https://rest.ensembl.org/sequence/id/ENSMUST00000189430?type=cdna
357 - https://rest.ensembl.org/sequence/id/ENSMUST00000033133?type=cdna
358 - https://rest.ensembl.org/sequence/id/ENSMUST00000106766?ty

459 - https://rest.ensembl.org/sequence/id/ENSMUST00000168015?type=cdna
460 - https://rest.ensembl.org/sequence/id/ENSMUST00000071455?type=cdna
461 - https://rest.ensembl.org/sequence/id/ENSMUST00000024004?type=cdna
462 - https://rest.ensembl.org/sequence/id/ENSMUST00000033905?type=cdna
463 - https://rest.ensembl.org/sequence/id/ENSMUST00000052725?type=cdna
464 - https://rest.ensembl.org/sequence/id/ENSMUST00000054351?type=cdna
465 - https://rest.ensembl.org/sequence/id/ENSMUST00000115021?type=cdna
466 - https://rest.ensembl.org/sequence/id/ENSMUST00000070597?type=cdna
467 - https://rest.ensembl.org/sequence/id/ENSMUST00000204807?type=cdna
468 - https://rest.ensembl.org/sequence/id/ENSMUST00000034172?type=cdna
469 - https://rest.ensembl.org/sequence/id/ENSMUST00000076463?type=cdna
470 - https://rest.ensembl.org/sequence/id/ENSMUST00000016678?type=cdna
471 - https://rest.ensembl.org/sequence/id/ENSMUST00000202000?type=cdna
472 - https://rest.ensembl.org/sequence/id/ENSMUST00000068519?ty

573 - https://rest.ensembl.org/sequence/id/ENSMUST00000090006?type=cdna
574 - https://rest.ensembl.org/sequence/id/ENSMUST00000056623?type=cdna
575 - https://rest.ensembl.org/sequence/id/ENSMUST00000031542?type=cdna
576 - https://rest.ensembl.org/sequence/id/ENSMUST00000128712?type=cdna
577 - https://rest.ensembl.org/sequence/id/ENSMUST00000053033?type=cdna
578 - https://rest.ensembl.org/sequence/id/ENSMUST00000178280?type=cdna
579 - https://rest.ensembl.org/sequence/id/ENSMUST00000177506?type=cdna
580 - https://rest.ensembl.org/sequence/id/ENSMUST00000207738?type=cdna
581 - https://rest.ensembl.org/sequence/id/ENSMUST00000053766?type=cdna
582 - https://rest.ensembl.org/sequence/id/ENSMUST00000177474?type=cdna
583 - https://rest.ensembl.org/sequence/id/ENSMUST00000033956?type=cdna
584 - https://rest.ensembl.org/sequence/id/ENSMUST00000174699?type=cdna
585 - https://rest.ensembl.org/sequence/id/ENSMUST00000109941?type=cdna
586 - https://rest.ensembl.org/sequence/id/ENSMUST00000015486?ty

KeyboardInterrupt: 

In [32]:
save_one_fasta_from_ensembl_api_return(os.path.abspath(r'./samples'), 'sample1.fasta', res)

True