In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
from Bio import Entrez
from tqdm import tqdm
import pickle

In [2]:
data = pd.read_csv (r'data/SFARI-Genes.csv')
cols = data.columns
N, D = data.shape
print(cols)

Index(['status', 'gene-symbol', 'gene-name', 'ensembl-id', 'chromosome',
       'genetic-category', 'gene-score', 'syndromic', 'number-of-reports'],
      dtype='object')


In [3]:
def save_to_txt_file(contents, filename):
    DIR = os.getcwd() + "/results/" + filename
    
    with open(DIR, "w") as f:
        f.write(contents)
        
def save_to_pickle_file(contents, filename):
    DIR = os.getcwd() + "/data/" + filename
    
    with open(DIR, "wb") as handle:
        pickle.dump(contents, handle, protocol=pickle.HIGHEST_PROTOCOL)
        
def open_pickle_file(filename):
    DIR = os.getcwd() + "/data/" + filename
    
    with open(DIR, "rb") as handle:
        data = pickle.load(handle)
        
    return data

# Task 1

In [4]:
gene_symbols = data['gene-symbol'].unique()
print(gene_symbols.size)

1023


In [5]:
# Save Entrez scores

In [45]:
Entrez.email = "s1803764@ed.ac.uk"
output = ""
eid = {}

for i in tqdm(range(gene_symbols.size)):
    g = gene_symbols[i]
    handle = Entrez.esearch(db="gene", retmax=10, term=g + "[sym] homo sapiens[Organism]")
    record = Entrez.read(handle)
    #output += g + ": " + ", ".join(str(x) for x in record['IdList'])
    #output += "\n"
    eid[g] = record['IdList']
    
    if len(record["IdList"]) == 0:
        print(g)
    
#save_to_txt_file(output, "task2-1-eid.txt")

 46%|██████████████████▏                     | 466/1023 [04:42<05:37,  1.65it/s]


KeyboardInterrupt: 

In [22]:
save_to_pickle_file(eid, "eid.pickle")

# Task 2

In [6]:
gene2go = pd.read_csv(os.getcwd() + "/data/gene2go", dtype=str, delimiter="\t")
save_to_pickle_file(gene2go, "gene2go.pickle")

In [20]:
gene2go

Unnamed: 0,#tax_id,GeneID,GO_ID,Evidence,Qualifier,GO_term,PubMed,Category
0,3702,814629,GO:0005634,ISM,located_in,nucleus,-,Component
1,3702,814629,GO:0008150,ND,involved_in,biological_process,-,Process
2,3702,814630,GO:0003700,ISS,enables,DNA-binding transcription factor activity,11118137,Function
3,3702,814630,GO:0005634,ISM,located_in,nucleus,-,Component
4,3702,814630,GO:0006355,TAS,acts_upstream_of_or_within,"regulation of transcription, DNA-templated",11118137,Process
...,...,...,...,...,...,...,...,...
2084173,559292,65052912,GO:0005575,ND,is_active_in,cellular_component,-,Component
2084174,559292,65052912,GO:0008150,ND,involved_in,biological_process,-,Process
2084175,559292,65052913,GO:0003674,ND,enables,molecular_function,-,Function
2084176,559292,65052913,GO:0005575,ND,is_active_in,cellular_component,-,Component


In [8]:
gene2go = open_pickle_file("gene2go.pickle")
#eid = pd.read_pickle(os.getcwd() + "/data/eid.pickle")

In [43]:
go_terms = {}

tax_ids = gene2go["#tax_id"]
human_gene_indexes = tax_ids[tax_ids == "9606"].index
human_genes = gene2go.filter(items=human_gene_indexes, axis = 0)
human_gene_ids = human_genes["GeneID"]
print(human_gene_indexes.size)

for g in tqdm(gene_symbols):
    gene_entrez = eid[g]
    
    if len(gene_entrez) == 0:
        break
    else:
        gene_entrez = gene_entrez[0]

    gene_index = human_gene_ids[human_gene_ids == gene_entrez].unique()

    if gene_index.size >= 1:
        gene_index = int(gene_index[0])

    print(gene_index)
    go_term = gene2go.iloc[gene_index]["GO_term"]
    go_terms[g] = go_term

    print(g)
    print(gene_entrez)
    print(go_term)
    print()

save_to_pickle_file(go_terms, "go-terms.pickle")

331423


  0%|▏                                         | 5/1023 [00:00<00:22, 45.82it/s]

18
ABAT
18
mitochondrial electron transport, ubiquinol to cytochrome c

10349
ABCA10
10349
protein binding

154664
ABCA13
154664
DNA-7-methylguanine glycosylase activity

10347
ABCA7
10347
inward rectifier potassium channel activity

1636
ACE
1636
auxin-activated signaling pathway

43
ACHE
43
intracellular anatomical structure

60
ACTB
60
plastoglobule

51412
ACTL6B
51412
cytosol

81
ACTN4
81
L-alanine transmembrane transporter activity



  1%|▌                                        | 15/1023 [00:00<00:22, 44.80it/s]

95
ACY1
95
phospholipid metabolic process

100
ADA
100
response to UV-B

109
ADCY3
109
response to auxin

111
ADCY5
111
protein serine/threonine kinase activity

132
ADK
132
biosynthetic process

23394
ADNP
23394
protein binding

140
ADORA3
140
mRNA binding

158
ADSL
158
extracellular space

2334
AFF2
2334
cytosol

116987
AGAP1
116987
transmitting tissue development



  2%|█                                        | 25/1023 [00:00<00:22, 44.50it/s]

116986
AGAP2
116986
regulation of gene expression

159
ADSS2
159
endosome

84871
AGBL4
84871
nucleus

392636
AGMO
392636
molecular_function

26523
AGO1
26523
DNA repair

192669
AGO3
192669
Golgi membrane

192670
AGO4
192670
endoplasmic reticulum

186
AGTR2
186
biological_process

27245
AHDC1
27245
extracellular region



  3%|█▍                                       | 35/1023 [00:00<00:23, 42.66it/s]

54806
AHI1
54806
leaf senescence

10142
AKAP9
10142
cellular heat acclimation

220
ALDH1A3
220
molecular_function

7915
ALDH5A1
7915
nucleus

270
AMPD1
270
protein autophosphorylation

275
AMT
275
kinase activity

287
ANK2
287
integral component of endoplasmic reticulum membrane

288
ANK3
288
mitochondrion

29123
ANKRD11
29123
negative regulation of gene expression

729092
AGAP5
729092
extracellular space



  4%|█▊                                       | 45/1023 [00:01<00:22, 43.86it/s]

27161
AGO2
27161
nucleus

79026
AHNAK
79026
cell differentiation

26057
ANKRD17
26057
protein serine phosphatase activity

56899
ANKS1B
56899
biological_process

301
ANXA1
301
nucleus

8905
AP1S2
8905
protein binding

321
APBA2
321
response to phenylpropanoid

322
APBB1
322
copper ion binding

51107
APH1A
51107
response to far red light

89839
ARHGAP11B
89839
regulation of transcription, DNA-templated



  5%|██▏                                      | 56/1023 [00:01<00:20, 47.02it/s]

9743
ARHGAP32
9743
vacuole

394
ARHGAP5
394
clathrin-coated pit

9639
ARHGEF10
9639
transcription cis-regulatory region binding

23229
ARHGEF9
23229
jasmonate-amino synthetase activity

57492
ARID1B
57492
circadian regulation of gene expression

196528
ARID2
196528
GATOR2 complex

9915
ARNT2
9915
glycine decarboxylation via glycine cleavage system

170302
ARX
170302
cytoplasm

8853
ASAP2
8853
response to hormone

142686
ASB14
142686
molecular_function

55870
ASH1L
55870
glucosyltransferase activity



  6%|██▋                                      | 66/1023 [00:01<00:20, 47.49it/s]

438
ASMT
438
cytoplasm

259266
ASPM
259266
hydrolase activity

23245
ASTN2
23245
cis-regulatory region sequence-specific DNA binding

80816
ASXL3
80816
intracellular anatomical structure

57194
ATP10A
57194
transcription elongation from RNA polymerase II promoter

476
ATP1A1
476
microtubule

478
ATP1A3
478
cytoskeleton organization

493
ATP2B2
493
iron ion transmembrane transporter activity

23545
ATP6V0A2
23545
cold acclimation

546
ATRX
546
protein histidine kinase binding

26053
AUTS2
26053
pectin biosynthetic process

135
ADORA2A
135
NAD biosynthesis via nicotinamide riboside salvage pathway



  8%|███                                      | 77/1023 [00:01<00:20, 46.61it/s]

1175
AP2S1
1175
zinc ion binding

552
AVPR1A
552
hydrolase activity, hydrolyzing O-glycosyl compounds

563
AZGP1
563
biological_process

29994
BAZ2B
29994
omega-6 fatty acid desaturase activity

585
BBS4
585
phosphatidylinositol-3,4,5-trisphosphate 5-phosphatase activity

8537
BCAS1
8537
methyl salicylate esterase activity

10295
BCKDK
10295
lipase activity

53335
BCL11A
53335
defense response to fungus

154
ADRB2
154
biological_process



  9%|███▍                                     | 87/1023 [00:01<00:21, 43.92it/s]

29929
ALG6
29929
DNA-3-methyladenine glycosylase activity

367
AR
367
hyperosmotic salinity response

553
AVPR1B
553
extracellular region

63035
BCORL1
63035
nucleus

57448
BIRC6
57448
biological_process

673
BRAF
673
protein binding

675
BRCA2
675
plasma membrane

23476
BRD4
23476
L-ascorbic acid biosynthetic process

9024
BRSK2
9024
nucleus



  9%|███▉                                     | 97/1023 [00:02<00:21, 43.88it/s]

254065
BRWD3
254065
lysosomal lumen acidification

683
BST1
683
sodium:proton antiporter activity

9044
BTAF1
9044
nucleus

8945
BTRC
8945
chloroplast

113246
C12orf57
113246
response to salt stress

643338
C15orf62
643338
positive regulation of dendritic spine development

721
C4B
721
plant-type vacuole

765
CA6
765
DNA-binding transcription factor activity

773
CACNA1A
773
root development

774
CACNA1B
774
DNA binding

775
CACNA1C
775
chromatin binding



 10%|████▏                                   | 107/1023 [00:02<00:19, 46.31it/s]

776
CACNA1D
776
protein binding

777
CACNA1E
777
nucleus

778
CACNA1F
778
nucleus

8913
CACNA1G
8913
biological_process

8912
CACNA1H
8912
protein binding

8911
CACNA1I
8911
protein homooligomerization

781
CACNA2D1
781
cellular response to UV-C

55799
CACNA2D3
55799
positive regulation of leaf senescence

783
CACNB2
783
extracellular region

23705
CADM1
23705
membrane



 11%|████▌                                   | 117/1023 [00:02<00:22, 40.84it/s]

253559
CADM2
253559
protein secretion

8618
CADPS
8618
integral component of cytoplasmic side of endoplasmic reticulum membrane

93664
CADPS2
93664
solute:proton antiporter activity

815
CAMK2A
815
guanylate cyclase activity

816
CAMK2B
816
protein kinase activity

814
CAMK4
814
peptide receptor activity

147968
CAPN12
147968
suberin biosynthetic process

4076
CAPRIN1
4076
single-stranded DNA helicase activity

84433
CARD11
84433
protein binding



 12%|████▉                                   | 127/1023 [00:02<00:21, 42.31it/s]

8573
CASK
8573
nucleus

54897
CASZ1
54897
mRNA binding

54862
CC2D1A
54862
extracellular region

440193
CCDC88C
440193
cell-cell adhesion mediator activity

55297
CCDC91
55297
cytoplasm

881
CCIN
881
cytosol

900
CCNG1
900
nucleus

8812
CCNK
8812
chloroplast

[]
CCSER1
401145
Series([], Name: GO_term, dtype: object)

10575
CCT4
10575
copper ion transmembrane transport

80381
CD276
80381
cell redox homeostasis



 13%|█████▎                                  | 137/1023 [00:03<00:19, 44.45it/s]

952
CD38
952
protein homodimerization activity

9578
CDC42BPB
9578
ATP hydrolysis activity

339479
BRINP3
339479
positive regulation of transcription by RNA polymerase II

782
CACNB1
782
protein binding

83692
CD99L2
83692
DNA binding

1008
CDH10
1008
nucleus

1009
CDH11
1009
mitochondrion

1012
CDH13
1012
defense response

64405
CDH22
64405
extracellular region

1006
CDH8
1006
mRNA binding

1007
CDH9
1007
mRNA binding



 14%|█████▊                                  | 148/1023 [00:03<00:18, 46.51it/s]

8621
CDK13
8621
endoplasmic reticulum tubular network organization

23097
CDK19
23097
oxidoreductase activity, acting on paired donors, with oxidation of a pair of donors resulting in the reduction of molecular oxygen to two molecules of water

1024
CDK8
1024
ubiquitin-protein transferase activity

6792
CDKL5
6792
nucleus

29998
BICRA
29998
1-18:1-2-18:2-phosphatidylcholine synthase activity

92558
BICDL1
92558
transcription coregulator activity

27443
CECR2
27443
mitochondrion

56853
CELF4
56853
nucleus

60677
CELF6
60677
root hair elongation

9662
CEP135
9662
molybdate ion transport

80184
CEP290
80184
chloroplast



 16%|██████▏                                 | 159/1023 [00:03<00:19, 44.49it/s]

95681
CEP41
95681
chloroplast stroma

84952
CGNL1
84952
DNA-binding transcription factor activity

283489
CHAMP1
283489
membrane

1105
CHD1
1105
nucleus

1106
CHD2
1106
biological_process

1107
CHD3
1107
molecular_function

55636
CHD7
55636
ATP binding

57680
CHD8
57680
cytoplasm

1120
CHKB
1120
sucrose metabolic process



 17%|██████▌                                 | 169/1023 [00:03<00:19, 43.43it/s]

5119
CHMP1A
5119
phosphate-containing compound metabolic process

1131
CHRM3
1131
molecular_function

1139
CHRNA7
1139
molecular_function

1142
CHRNB3
1142
mitochondrion

10518
CIB2
10518
endocytosis

23152
CIC
23152
protein binding

23332
CLASP1
23332
tricarboxylic acid cycle

1183
CLCN4
1183
dATP phosphohydrolase activity

2055
CLN8
2055
chloroplast stroma

8218
CLTCL1
8218
regulation of cell cycle



 17%|██████▉                                 | 179/1023 [00:04<00:18, 44.43it/s]

50937
CDON
50937
L-proline biosynthetic process

4359
CHM
4359
transmembrane transporter activity

7461
CLIP2
7461
vacuole

80790
CMIP
80790
nucleus

129607
CMPK2
129607
glycine:2-oxoglutarate aminotransferase activity

54714
CNGB3
54714
cold acclimation

22866
CNKSR2
22866
cytoplasmic stress granule

4849
CNOT3
4849
molecular_function

1268
CNR1
1268
Golgi apparatus

5067
CNTN3
5067
cell wall

152330
CNTN4
152330
phosphatidylcholine biosynthetic process



 18%|███████▍                                | 189/1023 [00:04<00:18, 45.54it/s]

53942
CNTN5
53942
tubulin complex assembly

27255
CNTN6
27255
molecular_function

26047
CNTNAP2
26047
nucleus

79937
CNTNAP3
79937
protein binding

85445
CNTNAP4
85445
carbohydrate binding

129684
CNTNAP5
129684
proteolysis

340267
COL28A1
340267
cellular response to light stimulus

1376
CPT2
1376
Golgi apparatus

8532
CPZ
8532
hydrolase activity, acting on ester bonds

1387
CREBBP
1387
nucleus

7812
CSDE1
7812
abscisic acid-activated signaling pathway



 20%|███████▊                                | 200/1023 [00:04<00:17, 46.65it/s]

64478
CSMD1
64478
mitochondrial nucleoid

1454
CSNK1E
1454
cytoplasm

10664
CTCF
10664
ethylene-activated signaling pathway

1496
CTNNA2
1496
ubiquitin-dependent protein catabolic process

29119
CTNNA3
29119
molecular_function

1499
CTNNB1
1499
biological_process

1501
CTNND2
1501
nucleus

83992
CTTNBP2
83992
protein binding

8452
CUL3
8452
cytoplasm

23019
CNOT1
23019
intracellular membrane-bounded organelle

11151
CORO1A
11151
seed dormancy process



 21%|████████▎                               | 211/1023 [00:04<00:18, 44.47it/s]

53944
CSNK1G1
53944
plasmodesma

1457
CSNK2A1
1457
DNA-binding transcription factor activity

1460
CSNK2B
1460
nucleus

9820
CUL7
9820
regulation of photoperiodism, flowering

1523
CUX1
1523
magnesium ion transmembrane transport

23316
CUX2
23316
extracellular region

1524
CX3CR1
1524
molecular_function

23191
CYFIP1
23191
response to far red light

1539
CYLC2
1539
preprophase band

1593
CYP27A1
1593
cytoplasm



 22%|████████▋                               | 222/1023 [00:04<00:17, 45.68it/s]

747
DAGLA
747
protein binding

27071
DAPP1
27071
spliceosomal tri-snRNP complex

1644
DDC
1644
RNA binding

1654
DDX3X
1654
mitochondrion

168400
DDX53
168400
chloroplast

10522
DEAF1
10522
phototropism

8562
DENR
8562
hydrolase activity, acting on ester bonds

9681
DEPDC5
9681
cellular protein modification process

1717
DHCR7
1717
membrane

22907
DHX30
22907
apical plasma membrane



 23%|█████████                               | 233/1023 [00:05<00:17, 46.19it/s]

23181
DIP2A
23181
nucleus

22982
DIP2C
22982
nucleus

27185
DISC1
27185
nuclear-transcribed mRNA catabolic process, exonucleolytic, 3'-5'

85458
DIXDC1
85458
molecular_function

205428
DIPK2A
205428
glycolytic process

1739
DLG1
1739
cadmium ion transport

1740
DLG2
1740
membrane

1742
DLG4
1742
xenobiotic transmembrane transporter activity

9229
DLGAP1
9229
nucleus

9228
DLGAP2
9228
nucleus

58512
DLGAP3
58512
cytosol



 24%|█████████▌                              | 244/1023 [00:05<00:16, 46.79it/s]

28514
DLL1
28514
heme binding

1747
DLX3
1747
transmembrane transporter activity

1750
DLX6
1750
beta-galactosidase activity

1756
DMD
1756
biological_process

1760
DMPK
1760
transmembrane transporter activity

80315
CPEB4
80315
protein dimerization activity

1584
CYP11B1
1584
sulfotransferase activity

23259
DDHD2
23259
regulation of brassinosteroid mediated signaling pathway

9416
DDX23
9416
extracellular region

1746
DLX2
1746
membrane

1762
DMWD
1762
transmembrane transport



 25%|█████████▉                              | 255/1023 [00:05<00:15, 49.51it/s]

23312
DMXL2
23312
polygalacturonate 4-alpha-galacturonosyltransferase activity

196385
DNAH10
196385
protein serine/threonine kinase activity

8632
DNAH17
8632
molecular_function

55567
DNAH3
55567
cytoplasm

92737
DNER
92737
positive regulation of transcription, DNA-templated

1788
DNMT3A
1788
lateral root morphogenesis

1630
DCC
1630
regulation of root meristem growth

1793
DOCK1
1793
biological_process

9732
DOCK4
9732
chloroplast

81704
DOCK8
81704
endosome to lysosome transport

22845
DOLK
22845
phosphomannomutase activity

57628
DPP10
57628
protein import into chloroplast stroma



 26%|██████████▎                             | 265/1023 [00:05<00:16, 47.34it/s]

10072
DPP3
10072
peroxisome

1803
DPP4
1803
response to salt stress

1804
DPP6
1804
abscisic acid metabolic process

1806
DPYD
1806
protein ubiquitination

1808
DPYSL2
1808
ribonuclease activity

1809
DPYSL3
1809
ribonuclease activity

1812
DRD1
1812
rRNA processing

1813
DRD2
1813
RNA processing

1814
DRD3
1814
chloroplast

1826
DSCAM
1826
biological_process

667
DST
667
trans-Golgi network



 27%|██████████▊                             | 277/1023 [00:06<00:14, 50.26it/s]

128853
DUSP15
128853
biological_process

1857
DVL3
1857
cytoplasm

143241
DYDC1
143241
nucleic acid binding

84332
DYDC2
84332
protein phosphorylation

1778
DYNC1H1
1778
xenobiotic detoxification by transmembrane export across the plasma membrane

1859
DYRK1A
1859
Golgi apparatus

22924
EBF3
22924
molecular_function

1917
EEF1A2
1917
cytoplasm

23167
EFR3A
23167
DNA-binding transcription factor activity

1960
EGR3
1960
Golgi apparatus

79813
EHMT1
79813
apoplast

8666
EIF3G
8666
kinase activity



 28%|███████████▎                            | 288/1023 [00:06<00:18, 40.53it/s]

1977
EIF4E
1977
cytoplasm

1993
ELAVL2
1993
molecular_function

1995
ELAVL3
1995
biological_process

26610
ELP4
26610
histone H3-R17 methylation

2020
EN2
2020
actin filament binding

2033
EP300
2033
biological_process

57634
EP400
57634
zinc ion binding



 29%|███████████▋                            | 298/1023 [00:06<00:17, 41.95it/s]

56946
EMSY
56946
transcription regulator complex

26122
EPC2
26122
chloroplast

2048
EPHB2
2048
threonine-tRNA ligase activity

83481
EPPK1
83481
mitochondrion

57471
ERMN
57471
phototropism

2100
ESR2
2100
RNA polymerase II CTD heptapeptide repeat phosphatase activity

2103
ESRRB
2103
protein threonine phosphatase activity

2109
ETFB
2109
protein phosphorylation

11336
EXOC3
11336
sodium:proton antiporter activity

10640
EXOC5
10640
nucleoplasm

54536
EXOC6
54536
root hair cell tip growth



 30%|████████████                            | 308/1023 [00:06<00:16, 42.89it/s]

23233
EXOC6B
23233
nucleus

2131
EXT1
2131
photosynthesis, light harvesting in photosystem I

55914
ERBIN
55914
vacuolar membrane

2171
FABP5
2171
chlorophyll binding

[]
FAM47A
158724
Series([], Name: GO_term, dtype: object)

147965
FAM98C
147965
regulation of abscisic acid biosynthetic process

6624
FAN1
6624
biological_process

2195
FAT1
2195
endosome organization

2200
FBN1
2200
HOPS complex



 31%|████████████▍                           | 318/1023 [00:07<00:16, 43.25it/s]

80204
FBXO11
80204
SCF-dependent proteasomal ubiquitin-dependent protein catabolic process

51725
FBXO40
51725
response to osmotic stress

343413
FCRL6
343413
G protein-coupled receptor signaling pathway

55079
FEZF2
55079
vesicle organization

2243
FGA
2243
protein N-linked glycosylation

2272
FHIT
2272
biological_process

2332
FMR1
2332
cytosol

2290
FOXG1
2290
molecular_function

27086
FOXP1
27086
histone H3-K9 demethylation

93986
FOXP2
93986
FMN adenylyltransferase activity



 32%|████████████▊                           | 328/1023 [00:07<00:15, 44.50it/s]

2444
FRK
2444
nucleus

9758
FRMPD4
9758
protein binding

9568
GABBR2
9568
vacuole

2556
GABRA3
2556
plant-type cell wall

2557
GABRA4
2557
plasmodesma

2562
GABRB3
2562
plant-type secondary cell wall biogenesis

2567
GABRG3
2567
xyloglucan:xyloglucosyl transferase activity

114805
GALNT13
114805
endosome

79623
GALNT14
79623
biological_process

26290
GALNT8
26290
beta,beta digalactosyldiacylglycerol galactosyltransferase activity



 33%|█████████████▏                          | 338/1023 [00:07<00:15, 45.42it/s]

2628
GATM
2628
lipid droplet

1981
EIF4G1
1981
nucleotidyltransferase activity

54898
ELOVL2
54898
nucleus

55250
ELP2
55250
cytoplasm

5167
ENPP1
5167
nucleus

2078
ERG
2078
nucleus

2167
FABP4
2167
chloroplast envelope

57666
FBRSL1
57666
nucleus

254170
FBXO33
254170
nucleotide binding

2258
FGF13
2258
nuclear RNA-directed RNA polymerase complex

2259
FGF14
2259
production of siRNA involved in gene silencing by small RNA



 34%|█████████████▌                          | 348/1023 [00:07<00:14, 46.51it/s]

2483
FRG1
2483
RNA polymerase II complex binding

2395
FXN
2395
molecular_function

2561
GABRB2
2561
unidimensional cell growth

2590
GALNT2
2590
lipid catabolic process

2620
GAS2
2620
regulation of pollen tube growth

5047
GDA
5047
guanyl-nucleotide exchange factor activity

79893
GGNBP2
79893
nucleus

64599
GIGYF1
64599
terpene synthase activity

26058
GIGYF2
26058
protein threonine phosphatase activity

148979
GLIS1
148979
coumarin metabolic process

2739
GLO1
2739
mitochondrion



 35%|██████████████                          | 359/1023 [00:07<00:14, 47.36it/s]

2742
GLRA2
2742
mitochondrion

2778
GNAS
2778
biological_process

54584
GNB1L
54584
extracellular region

2239
GPC4
2239
chloroplast

10082
GPC6
10082
cell death

2820
GPD2
2820
molecular_function

10243
GPHN
10243
plasma membrane

2861
GPR37
2861
RNA helicase activity

54329
GPR85
54329
transcription by RNA polymerase I

2876
GPX1
2876
molecular_function

2890
GRIA1
2890
plant-type cell wall

2891
GRIA2
2891
plasmodesma



 36%|██████████████▍                         | 370/1023 [00:08<00:13, 47.85it/s]

2894
GRID1
2894
cytosolic ribosome

2895
GRID2
2895
cytosolic small ribosomal subunit

392862
GRID2IP
392862
negative regulation of translation

2898
GRIK2
2898
cellular_component

2899
GRIK3
2899
biological_process

2900
GRIK4
2900
transcription coactivator activity

2901
GRIK5
2901
protein binding

2902
GRIN1
2902
nucleus

2903
GRIN2A
2903
chromatin DNA binding

2260
FGFR1
2260
protein binding

55568
GALNT10
55568
phosphatidylinositol-3,4,5-trisphosphate 5-phosphatase activity



 37%|██████████████▊                         | 380/1023 [00:08<00:13, 47.61it/s]

2670
GFAP
2670
biological_process

2770
GNAI1
2770
proton-transporting ATP synthase activity, rotational mechanism

2904
GRIN2B
2904
chromatin DNA binding

10499
GRIP1
10499
root development

2915
GRM5
2915
molecular_function

2917
GRM7
2917
biological_process

2969
GTF2I
2969
molecular_function

2977
GUCY1A2
2977
chloroplast

3054
HCFC1
3054
transmembrane transporter activity

348980
HCN1
348980
DNA clamp loader activity



 38%|███████████████▏                        | 390/1023 [00:08<00:14, 44.70it/s]

9759
HDAC4
9759
nucleus

55869
HDAC8
55869
defense response to bacterium

3069
HDLBP
3069
Golgi apparatus

283450
HECTD4
283450
polarity specification of anterior/posterior axis

57520
HECW2
57520
extracellular region

220296
HEPACAM
220296
presynaptic membrane

8970
H2BC11
8970
DNA-binding transcription factor activity

8924
HERC2
8924
nucleus

3097
HIVEP2
3097
tRNA wobble uridine modification

59269
HIVEP3
59269
sterol biosynthetic process



 39%|███████████████▋                        | 400/1023 [00:08<00:14, 42.00it/s]

3115
HLA-DPB1
3115
RNA splicing, via endonucleolytic cleavage and ligation

3188
HNRNPH2
3188
defense response

3192
HNRNPU
3192
GDP-mannose transmembrane transport

9456
HOMER1
9456
hydroxyproline O-galactosyltransferase activity

3198
HOXA1
3198
1,3-beta-D-glucan synthase complex

3265
HRAS
3265
5'-3' exodeoxyribonuclease activity

3351
HTR1B
3351
chloroplast

3359
HTR3A
3359
protein phosphorylation

170572
HTR3C
170572
small GTPase binding



 40%|████████████████                        | 410/1023 [00:09<00:14, 43.79it/s]

10075
HUWE1
10075
heterotrimeric G-protein complex

54768
HYDIN
54768
protein folding

3382
ICA1
3382
extracellular space

7850
IL1R2
7850
response to sucrose

11141
IL1RAPL1
11141
response to low fluence blue light stimulus by blue low-fluence system

26280
IL1RAPL2
26280
chloroplast outer membrane

3608
ILF2
3608
protein homodimerization activity

83943
IMMP2L
83943
transcription coregulator activity

3628
INPP1
3628
lipid transport

26173
INTS1
26173
embryo development ending in seed dormancy



 41%|████████████████▍                       | 420/1023 [00:09<00:13, 45.38it/s]

26512
INTS6
26512
cobalt ion binding

128239
IQGAP3
128239
cellular_component

23096
IQSEC2
23096
lipid metabolic process

64207
IRF2BPL
64207
cyclin/CDK positive transcription elongation factor complex

3690
ITGB3
3690
cytoplasm

3708
ITPR1
3708
UDP-glycosyltransferase activity

6453
ITSN1
6453
DNA-binding transcription factor activity

2944
GSTM1
2944
molecular_function

3105
HLA-A
3105
biological_process

3184
HNRNPD
3184
Golgi apparatus



 42%|████████████████▊                       | 430/1023 [00:09<00:12, 45.69it/s]

3185
HNRNPF
3185
Golgi apparatus

3190
HNRNPK
3190
GDP-fucose transmembrane transport

10236
HNRNPR
10236
pollen germination

221092
HNRNPUL2
221092
translational initiation

3479
IGF1
3479
adenylylsulfate kinase activity

3720
JARID2
3720
glycosyltransferase activity

221037
JMJD1C
221037
kinetochore

23189
KANK1
23189
negative regulation of photomorphogenesis

284058
KANSL1
284058
DNA binding

8850
KAT2B
8850
biological_process

7994
KAT6A
7994
transmembrane transporter activity



 43%|█████████████████▏                      | 440/1023 [00:09<00:12, 46.16it/s]

3106
HLA-B
3106
molecular_function

3123
HLA-DRB1
3123
mRNA cap binding complex

3135
HLA-G
3135
malic enzyme activity

3150
HMGN1
3150
RNA polymerase II cis-regulatory region sequence-specific DNA binding

222537
HS3ST5
222537
ATP binding

3290
HSD11B1
3290
nucleus

84056
KATNAL1
84056
plasma membrane

83473
KATNAL2
83473
proton-transporting ATP synthase complex, catalytic core F(1)

3745
KCNB1
3745
response to oxidative stress



 44%|█████████████████▌                      | 450/1023 [00:09<00:12, 44.59it/s]

3746
KCNC1
3746
regulation of gene expression

3751
KCND2
3751
chloroplast stroma

3752
KCND3
3752
plasmodesmata-mediated intercellular transport

3766
KCNJ10
3766
mitochondrion

3772
KCNJ15
3772
chloroplast stroma

10089
KCNK7
10089
blue light signaling pathway

3778
KCNMA1
3778
nitrite reductase (NO-forming) activity

3785
KCNQ2
3785
biological_process

3786
KCNQ3
3786
DNA binding

3790
KCNS3
3790
protein dimerization activity



 45%|█████████████████▉                      | 460/1023 [00:10<00:12, 45.49it/s]

253980
KCTD13
253980
histone acetyltransferase complex

221656
KDM1B
221656
nucleus

51780
KDM3B
51780
regulation of tetrapyrrole metabolic process

23030
KDM4B
23030
ATP binding

23081
KDM4C
23081
protein ubiquitination

10765
KDM5B
10765
biological_process

8242
KDM5C
8242
biological_process

7403
KDM6A
7403
seed coat development

23135
KDM6B
23135
protein heterodimerization activity

202559
KHDRBS2
202559
cytoplasm

57691
KIAA1586
57691
nucleus



 46%|██████████████████▍                     | 470/1023 [00:10<00:12, 43.67it/s]

23303
KIF13B
23303
membrane insertase activity

9928
KIF14
9928
molecular_function

23522
KAT6B
23522
zinc ion transmembrane transporter activity

5927
KDM5A
5927
cytidine deamination

9778
KIAA0232
9778
histone phosphorylation

547
KIF1A
547
regulation of meristem development

3800
KIF5C
3800
mitochondrion

84623
KIRREL3
84623
protein autoubiquitination

83855
KLF16
83855
proteolysis

4297
KMT2A
4297
glycosyltransferase activity

58508
KMT2C
58508
nucleus



 47%|██████████████████▊                     | 481/1023 [00:10<00:11, 47.22it/s]

55904
KMT2E
55904
plant-type vacuole

11133
KPTN
11133
cytosol

11103
KRR1
11103
AMP dimethylallyltransferase activity

353288
KRT26
353288
aggrephagy

284217
LAMA1
284217
protein serine/threonine kinase activity

3912
LAMB1
3912
molecular_function

123169
LEO1
123169
protein binding

3952
LEP
3952
biological_process

10288
LILRB2
10288
peroxisome

64130
LIN7B
64130
molecular_function

51111
KMT5B
51111
mitotic cell cycle

80856
LNPK
80856
vesicle-mediated transport



 48%|███████████████████▏                    | 492/1023 [00:10<00:11, 47.14it/s]

987
LRBA
987
molecular_function

57497
LRFN2
57497
anthocyanin-containing compound metabolic process

145581
LRFN5
145581
pollen sperm cell differentiation

4035
LRP1
4035
defense response to fungus

4036
LRP2
4036
regulation of gene silencing by miRNA

55227
LRRC1
55227
positive regulation of autophagy

64101
LRRC4
64101
P-body

8216
LZTR1
8216
regulation of circadian rhythm

140733
MACROD2
140733
DNA repair

54551
MAGEL2
54551
1-acyl-2-lysophosphatidylserine acylhydrolase activity



 49%|███████████████████▋                    | 503/1023 [00:11<00:10, 47.96it/s]

4128
MAOA
4128
3-oxo-5-alpha-steroid 4-dehydrogenase activity

4129
MAOB
4129
mitochondrion

5595
MAPK3
5595
cytoplasm

4139
MARK1
4139
amidophosphoribosyltransferase activity

4152
MBD1
4152
nucleus

53615
MBD3
53615
beta-glucosidase activity

8930
MBD4
8930
root hair elongation

55777
MBD5
55777
cytosol

114785
MBD6
114785
inositol phosphate dephosphorylation

79143
MBOAT7
79143
nucleus



 50%|████████████████████                    | 513/1023 [00:11<00:11, 45.13it/s]

4173
MCM4
4173
cytosol

4175
MCM6
4175
plasma membrane

79648
MCPH1
79648
regulation of protein localization to cell surface

161357
MDGA2
161357
translation

4204
MECP2
4204
chloroplast

3827
KNG1
3827
peptidyl-prolyl cis-trans isomerase activity

81887
LAS1L
81887
mitochondrion

8861
LDB1
8861
monooxygenase activity

3949
LDLR
3949
carbohydrate transmembrane transporter activity

4010
LMX1B
4010
dihydrofolate reductase activity



 51%|████████████████████▍                   | 524/1023 [00:11<00:11, 45.22it/s]

57689
LRRC4C
57689
protein binding

84445
LZTS2
84445
cytoplasm

116931
MED12L
116931
nucleus

9969
MED13
9969
nucleus

23389
MED13L
23389
red light signaling pathway

4208
MEF2C
4208
cytosol

55857
KIZ
55857
Golgi apparatus

4137
MAPT
4137
dolichol biosynthetic process

100128977





IndexError: single positional indexer is out-of-bounds

In [39]:
human_gene_indexes[0]

643502

In [44]:
eid[""]

[]