In [1]:
import pandas as pd

# Load TFs file
df_tfs = pd.read_csv('transcription_factors.csv')
df_tfs

Unnamed: 0,transcription_factors
0,NUPR1
1,E2F4
2,TCF3
3,ZFP36
4,CBX5
5,EPAS1
6,SMARCB1
7,TRPS1
8,NOTCH1
9,MXD1


In [2]:
# Sort TFs
df_tfs = df_tfs.sort_values('transcription_factors', ascending=True).reset_index(drop=True)
df_tfs

Unnamed: 0,transcription_factors
0,ARNT
1,CBX5
2,CREM
3,DACH1
4,E2F4
5,EPAS1
6,ETV3
7,HOXD3
8,KLF11
9,LEF1


In [3]:
import requests, sys, json

uniprot_ids = []
for tf in df_tfs['transcription_factors']:

  params = {
    "query": f"{tf} AND reviewed:true AND gene_exact:{tf}",
    "fields": [
      "accession",
      "protein_name"
    ]
  }
  headers = {
    "accept": "application/json"
  }
  base_url = "https://rest.uniprot.org/uniprotkb/search?query=(organism_id:9606)"

  response = requests.get(base_url, headers=headers, params=params)
  if not response.ok:
    response.raise_for_status()
    sys.exit()

  data = response.json()
  accs = data["results"][0]["primaryAccession"]
  uniprot_ids.append(accs)
  print(f"Gene: {tf} --> UniProt ID: {accs}")

Gene: ARNT --> UniProt ID: P27540
Gene: CBX5 --> UniProt ID: P45973
Gene: CREM --> UniProt ID: Q03060
Gene: DACH1 --> UniProt ID: Q9UI36
Gene: E2F4 --> UniProt ID: Q16254
Gene: EPAS1 --> UniProt ID: Q99814
Gene: ETV3 --> UniProt ID: P41162
Gene: HOXD3 --> UniProt ID: P31249
Gene: KLF11 --> UniProt ID: O14901
Gene: LEF1 --> UniProt ID: Q9UJU2
Gene: MXD1 --> UniProt ID: Q05195
Gene: NOTCH1 --> UniProt ID: P46531
Gene: NUPR1 --> UniProt ID: O60356
Gene: RBL1 --> UniProt ID: P28749
Gene: SIX2 --> UniProt ID: Q9NPC8
Gene: SMARCB1 --> UniProt ID: Q12824
Gene: TCF3 --> UniProt ID: Q9HCS4
Gene: TRIM24 --> UniProt ID: O15164
Gene: TRPS1 --> UniProt ID: Q9UHF7
Gene: ZFP36 --> UniProt ID: P26651


In [4]:
df_tfs['UniProt_ID'] = uniprot_ids
df_tfs

Unnamed: 0,transcription_factors,UniProt_ID
0,ARNT,P27540
1,CBX5,P45973
2,CREM,Q03060
3,DACH1,Q9UI36
4,E2F4,Q16254
5,EPAS1,Q99814
6,ETV3,P41162
7,HOXD3,P31249
8,KLF11,O14901
9,LEF1,Q9UJU2
