In [4]:
from neo4j import GraphDatabase # import graph data base library. make sure neo4k is installed by doing "pip instal neo4j" on your terminal/command line
import pandas as pd
from tqdm import tqdm
import ast

url = "bolt://localhost:7687" # do ":server status" in neo4j desktop on YOUR own database

driver = GraphDatabase.driver(url, auth=("neo4j", "heart"))

In [5]:
def create_KG(tx):
    """
    Deploys protein and drug nodes
    """
    tx.run('''
    LOAD CSV WITH HEADERS FROM 'file:///edge_list.csv' AS row
    MERGE (p: Protein {UniProtID: row.UNIPROT_ID})
    MERGE (d: Drug {DrugbankID: row.DRUGBANK_ID})
    WITH row, p, d
    MERGE (p)-[:HAS_RELATED_DRUG]->(d)
    ''')

In [6]:
def create_categories(tx):
    """
    updates the drug categories
    """
    tx.run('''
    LOAD CSV WITH HEADERS FROM 'file:///node_list.csv' AS row
    UNWIND split(row.CATEGORIES, ',') AS Category
    MERGE (c:Category {name: Category})
    with row, c
    MATCH (d: Drug {DrugbankID: row.DRUGBANK_ID})
    MERGE (d)-[:IS_IN_CATEGORY]->(c)
    ''')

In [7]:
##########################
#Protein-pmid list
def get_unid_pmid_list(tx) -> any:
    """
    get list of proteins for each drug
    @param self
    @return a data table
    """
    query = ("LOAD CSV WITH HEADERS FROM 'file:///edge_list_unid_and_pmid.csv' AS row "
             "RETURN row.UNIPROT_ID as UNIPROT_ID, row.PMID AS PMID")
    result = tx.run(query)
    return result.data()

def process_unid_pmid_list() -> None:
    """
    creates a csv from the data
    @param self
    @return None
    """
    #grab the table from the query
    result = driver.session().write_transaction(get_unid_pmid_list)
    #create dataframe
    result = pd.DataFrame(result)
    
    #create the string to a list
    for element in tqdm(range(len(result['PMID'])), desc = "Parsing CSV"):
        result['PMID'][element] = ast.literal_eval(result['PMID'][element])
    return result

def unid_pmid_query_for_deploy(tx, pmid) -> None:
    query = ("MERGE (p: PMID{pmid : $pmid})")
    result = tx.run(query, pmid = pmid)
    
def unid_pmid_query_for_match(tx, unid, pmid) -> None:
    query = ("MATCH (p:Protein) WHERE p.UniProtID = $unid "
             "MATCH (s:PMID) WHERE s.pmid = $pmid "
             "MERGE (p)<-[:PMID_TARGET]-(s)")
    result = tx.run(query, unid = unid, pmid = pmid)

def create_unid_pmids() -> None:
    """
    deploys pmids
    """
    result = process_unid_pmid_list()

    #iterate throught the dataframe
    for i in tqdm(range(len(result)), "Deploying Protein PMIDs: "):
        for j in range(len(result["PMID"][i])):
            driver.session().write_transaction(unid_pmid_query_for_deploy, result["PMID"][i][j])
            driver.session().write_transaction(unid_pmid_query_for_match, result["UNIPROT_ID"][i], result["PMID"][i][j])
    

In [8]:
def get_dbid_pmid_list(tx) -> any:
    """
    get list of proteins for each drug
    @param self
    @return a data table
    """
    query = ("LOAD CSV WITH HEADERS FROM 'file:///edge_list_dbid_and_pmid.csv' AS row "
             "RETURN row.DRUGBANK_ID as DRUGBANK_ID, row.PMID AS PMID")
    result = tx.run(query)
    return result.data()

def process_dbid_pmid_list() -> None:
    """
    creates a csv from the data
    @param self
    @return None
    """
    #grab the table from the query
    result = driver.session().write_transaction(get_dbid_pmid_list)
    #create dataframe
    result = pd.DataFrame(result)
    
    #create the string to a list
    for element in tqdm(range(len(result['PMID'])), desc = "Parsing CSV"):
        result['PMID'][element] = ast.literal_eval(result['PMID'][element])
    return result

def dbid_pmid_query_for_deploy(tx, pmid) -> None:
    query = ("MERGE (p: PMID{pmid : $pmid})")
    result = tx.run(query, pmid = pmid)
    
def dbid_pmid_query_for_match(tx, dbid, pmid) -> None:
    query = ("MATCH (p:Drug) WHERE p.DrugbankID = $dbid "
             "MATCH (s:PMID) WHERE s.pmid = $pmid "
             "MERGE (p)<-[:PMID_TARGET]-(s)")
    result = tx.run(query, dbid = dbid, pmid = pmid)

def create_dbid_pmids() -> None:
    """
    deploys pmids
    """
    result = process_dbid_pmid_list()

    #iterate throught the dataframe
    for i in tqdm(range(len(result)), "Deploying Drug PMIDs: "):
        for j in range(len(result["PMID"][i])):
            if result["PMID"][i][j] != None:
                driver.session().write_transaction(dbid_pmid_query_for_deploy, result["PMID"][i][j])
                driver.session().write_transaction(dbid_pmid_query_for_match, result["DRUGBANK_ID"][i], result["PMID"][i][j])

In [9]:
def constraint_unique_uniprotID(tx):
    tx.run('''
    CREATE CONSTRAINT unique_uniprotID ON (p:Protein) ASSERT p.UniProtID IS UNIQUE
    ''')
def constraint_unique_drugbankID(tx):
    tx.run('''
    CREATE CONSTRAINT unique_drugbankID ON (d:Drug) ASSERT d.DrugbankID IS UNIQUE
    ''')
def constraing_unique_pmids(tx):
    tx.run('''
    CREATE CONSTRAINT unique_PMID ON (d:pmid) ASSERT d.pmid IS UNIQUE
    ''')
def constraint_unique_categories(tx):
    tx.run('''
    CREATE CONSTRAINT unique_categories ON (c:Category) ASSERT c.name IS UNIQUE
    ''')


In [10]:
with driver.session() as session: 
    session.write_transaction(constraint_unique_uniprotID) 
    session.write_transaction(constraint_unique_drugbankID)
    session.write_transaction(constraint_unique_categories)
    session.write_transaction(create_KG)
    session.write_transaction(create_categories)

In [11]:
#run pmids
create_unid_pmids()

Parsing CSV: 100%|██████████| 828/828 [00:00<00:00, 13481.01it/s]
Deploying Protein PMIDs: 100%|██████████| 828/828 [13:08<00:00,  1.05it/s]


In [12]:
#run pmids with dbid
create_dbid_pmids()

Parsing CSV: 100%|██████████| 207/207 [00:00<00:00, 16574.80it/s]
Deploying Drug PMIDs: 100%|██████████| 207/207 [00:59<00:00,  3.46it/s]


----------------Graph Algorithms--------------------

@cypher:

    CALL gds.graph.create(
    'graph',
    ['Category', 'Drug', 'Protein', 'PMID'], 
    ['HAS_RELATED_DRUG', 'IS_IN_CATEGORY', 'PMID_TARGET']
    )

    
    CALL gds.graph.create(
    '4th',
    ['Drug', 'Protein', 'PMID'], 
    ['HAS_RELATED_DRUG', 'PMID_TARGET']
    )

In [13]:
from neo4j import GraphDatabase # import graph data base library. make sure neo4k is installed by doing "pip instal neo4j" on your terminal/command line
import pandas as pd
from tqdm import tqdm
import ast
import numpy as np

url = "bolt://localhost:7687" # do ":server status" in neo4j desktop on YOUR own database

driver = GraphDatabase.driver(url, auth=("neo4j", "heart"))

In [16]:
class pagerank():
    """Class to run pageranke"""
    def __init__(self) -> None:
        self.driver = GraphDatabase.driver("neo4j://localhost:7687", auth=("neo4j", "heart"))

    def close(self) -> None:
        self.driver.close()

    #estimation for memory
    @classmethod
    def memory_estimation(cls, tx) -> any:
        """
        @param cls is the class
        @param tx is the transaction
        @returns the data for the memory
        """
        query = ("Call gds.pageRank.write.estimate('graph', {writeProperty: 'pageRank', maxIterations: 30, dampingFactor: 0.85}) YIELD nodeCount, relationshipCount, bytesMin, bytesMax, requiredMemory")
        result = tx.run(query)
        return result.single()

    @classmethod
    def pagerank(cls, tx) -> any:
        """
        @param cls is the class
        @param tx is the transaction
        @return result.data() is the data of the pagerank
        """
        query = ("Call gds.pageRank.stream('4th') YIELD nodeId, score RETURN gds.util.asNode(nodeId).DrugbankID AS DrugbankID, score ORDER BY score DESC, DrugbankID ASC")
        result = tx.run(query)
        return result.data()


    def estimate_pagerank(self) -> any:
        """
        @param self
        @return result is the result of the memory estimation
        """
        result = self.driver.session().write_transaction(self.memory_estimation)
        return result


    def run_pagerank(self) -> any:
        """
        @param self
        @return result is the dataframe from the pagerank
        """
        result = self.driver.session().write_transaction(self.pagerank)
        result = pd.DataFrame(result)
        return result

In [18]:
hello = pagerank()
result = hello.run_pagerank()
result

Unnamed: 0,DrugbankID,score
0,DB12010,458.105091
1,DB02709,82.677919
2,DB00945,52.978173
3,DB06154,27.763331
4,DB00421,27.372005
...,...,...
12724,,0.150000
12725,,0.150000
12726,,0.150000
12727,,0.150000


Local Clustering Coefficient requires all relationships to be undirected

@cypher:


    CALL gds.graph.create(
    '5th',
    ['Drug', 'Protein', 'PMID'], 
    ['HAS_RELATED_DRUG', 'PMID_TARGET']
    {
      HAS_RELATED_DRUG: {
        orientation: 'UNDIRECTED'
      }, 
      PMID_TARGET: {
        orientation: 'UNDIRECTED'
      }
    }
    )


    CALL gds.graph.create(
    'drug-protein',
    ['Drug', 'Protein'], 
    {
      HAS_RELATED_DRUG: {
        orientation: 'UNDIRECTED'
      }
    }
    )

In [21]:
class algo():
    """Class to run pageranke"""
    def __init__(self) -> None:
        self.driver = GraphDatabase.driver("neo4j://localhost:7687", auth=("neo4j", "heart"))

    def close(self) -> None:
        self.driver.close()

    @classmethod
    def algo(cls, tx) -> any:
        """
        @param cls is the class
        @param tx is the transaction
        @return result.data() is the data of the cluster
        """
        query = ("""
        CALL gds.nodeSimilarity.stream('drug-protein')
        YIELD node1, node2, similarity
        RETURN gds.util.asNode(node1).DrugbankID AS nodeID_1, gds.util.asNode(node2).DrugbankID AS nodeID_2, similarity
        ORDER BY similarity DESCENDING, nodeID_1, nodeID_2
        """)
        result = tx.run(query)
        return result.data()


    def run_algo(self) -> any:
        """
        @param self
        @return result is the dataframe from the pagerank
        """
        result = self.driver.session().write_transaction(self.algo)
        result = pd.DataFrame(result)
        return result

In [22]:
pd.set_option('display.max_rows', 30)
hello = algo()
result = hello.run_algo()
#extract nodes whose similarity is 1
result = result[result['similarity'] == 1]
#result.drop()
result

Unnamed: 0,nodeID_1,nodeID_2,similarity
0,DB00015,DB00029,1.0
1,DB00029,DB00015,1.0
2,DB00235,DB04880,1.0
3,DB00311,DB00703,1.0
4,DB00407,DB06271,1.0
...,...,...,...
4451,,,1.0
4452,,,1.0
4453,,,1.0
4454,,,1.0


## ADDING UNIREF

In [23]:
# ADD UNIREF 50
import pandas as pd
import numpy as np
df = pd.read_csv("/Users/josephramirez/Desktop/edge_list.csv")
all_unique_proteins = np.array(df.UNIPROT_ID.unique())
print(*all_unique_proteins, sep='\t')

import urllib.parse
import urllib.request
import csv

# FOR UNIREF 50
url = 'https://www.uniprot.org/uploadlists/'

params = {
'from': 'ACC+ID',
'to': 'NF50',
'format': 'tab',
'query': 'P00734	P05164	P00747	P02671	P05121	Q03405	P05120	P05154	P14543	P39900	P98164	Q9Y5Y6	P05452	P05787	P07355	P27797	P27824	Q07954	P04004	P05106	P08514	P12318	P31994	P00451	P02776	P07204	P07225	P12259	P35237	Q9UNN8	P25116	P47712	P06276	P12821	P46059	P46663	Q16348	O15439	O15440	O76074	P05181	P08183	P08684	P10635	P11712	P18545	P20815	P24462	P33261	Q13956	Q5T3U5	Q9Y6L6	P05177	P10632	P20813	Q9H244	P02768	P55011	Q13621	P43005	P54289	Q01650	P00915	P00918	P22748	Q14432	O00305	O60840	O60939	O95342	P02763	P08588	P25100	P35348	P35368	P35498	P35499	P48050	P54284	P63252	Q01118	Q01668	Q02641	Q07699	Q08289	Q13698	Q13936	Q14500	Q14524	Q15858	Q86VL8	Q8IWT1	Q96FL8	Q99250	Q9NY46	Q9NY72	Q9UI33	Q9UNX9	Q9UQD0	Q9Y5Y9	O15244	P07550	P05178	P08683	P15559	Q08257	Q9BQB6	O95180	Q9NY47	P00813	P04798	P09874	P27815	P29274	P29275	P30542	Q07343	Q15155	Q16678	Q7Z5B4	Q8TCT9	Q92769	Q99829	Q9Y4R7	Q9Y694	P19652	P07451	P43166	Q4U2R8	Q06432	P00797	P22309	P23219	P48048	P17405	Q00975	Q8IZS8	Q9P0X4	A0A024R8I1	P05023	P05108	P46721	Q6ZQN7	Q86UW1	Q86UW2	P08235	P35869	P01008	P05546	O60488	P11509	P19793	P28702	P37231	P48443	Q03181	Q07869	O00555	O43497	O60359	O75469	P04150	P04278	P05093	P06401	P10275	P15538	P18405	P19099	P31213	P62955	Q15878	Q7Z3S7	Q8WXS5	Q92887	Q9BXT2	Q9H8P0	Q9UBN1	Q9UF02	Q9Y698	P04731	P05091	P14902	P33402	P51580	P55017	Q12791	O15245	O75751	P08913	P18089	Q12809	Q9UNQ0	P28335	P30926	P31645	P32297	P61024	Q15822	Q14097	O15530	P35354	H3BUU9	P23141	P42898	O00591	O14764	O60656	P00374	P02708	P06133	P14416	P14867	P16662	P18505	P18507	P19224	P21728	P21918	P22310	P28223	P28472	P31644	P34903	P35367	P35503	P41145	P42261	P46098	P47869	P47870	P48169	P54855	P78334	Q16445	Q8N1C3	Q99928	Q9HAW7	Q9HAW8	Q9HAW9	Q9UN88	O43570	P23280	P35218	Q16790	Q8N1Q1	Q9ULX7	Q9Y2D0	P00742	P08908	P13945	P21397	P28222	Q8TCC7	P00749	P19634	P19801	P37088	P51168	P51170	P51172	P78348	Q16515	Q9H015	Q9TRC7	Q2M3G0	P54710	Q6FHJ7	P08172	P08173	P08912	P0DP23	P11229	P20309	P54750	Q01064	O15427	O76082	O94956	P05543	P40261	P49019	P53985	Q15274	Q8N695	Q8TDS4	P0DMS8	P55263	Q9HAS3	Q08493	Q08499	O15438	P33260	P33527	Q14654	O75795	P30556	Q96S37	Q9NRM0	P05230	P08620	P09038	P52209	Q92959	Q9HC97	Q9NSA0	P16066	P34995	P43116	Q9UIG8	Q9Y5Y4	P21589	P07099	P29972	O75762	Q7Z2W7	Q8NET8	P00176	P05183	P08473	P13569	Q14973	Q9H2X9	Q9UP95	P09210	P09211	Q9UJU2	O75493	P27338	P35219	Q06278	Q9NS85	P63316	Q15842	P18825	O14920	O43741	P01106	P04637	P11021	P11245	P12004	P24385	P25101	P25963	P27361	P28482	P29466	P31152	P42574	P51812	P54619	P54646	P98066	Q04828	Q13131	Q13164	Q16659	Q8TD08	Q9UGI9	Q9UGJ0	Q9Y478	P15144	P35610	Q9UHC9	P11511	Q15166	P53805	Q9NPD5	Q9Y233	P09488	P11766	P47989	P02585	P05412	P23786	P50416	P00750	P43119	O95750	P11362	P14210	P16109	P21802	P22455	Q9Y251	Q9UK17	P10827	P10828	P51589	Q86YN6	Q9H252	Q9NS40	P15104	O95298	P15692	P16581	P16860	P17302	P19320	Q16665	O95255	P12104	P08253	P09960	P14780	P09086	P14859	P23975	P25103	P28221	P28566	P29475	P30939	P35372	P41143	P41595	Q8TCU5	Q693P7	P03886	P23415	P98194	Q96ER9	Q16647	P51787	P13674	Q16853	P27169	P01375	Q13370	Q09013	O95622	P63092	Q08462	P02766	P03372	P05067	P08648	P09917	P11166	P16050	P16083	P16152	P31749	P37840	P48039	P49286	P54577	P68400	Q14994	Q86T13	Q8TCG2	Q92945	Q96EB6	P00378	P05089	P35222	Q13336	Q15849	Q8XB74	P19429	P01584	P05231	P19838	Q00653	P04275	P21964	P48551	P29474	P68871	P69905	O00748	Q9Y5N1	P10646	O75173	P13612	P48061	O75116	P17612	P61925	Q13464	P01100	P35228	Q05586	Q9UHC3	Q9UM01	Q9Y2I1	P31639	P24530	P06576	Q96PD7	O15554	Q16558	Q86W47	Q92952	Q9H2S1	Q9NPA1	Q9UGI6	Q9Y691	P04406	P39023	P40926	P50213	Q02218	Q13423	P53007	Q13183	Q86YT5	Q9BQT8	O60706	O00408	O60658	O76083	O95263	P09619	P16499	P35913	P51160	Q13946	Q14123	Q9HCR9	Q9NP56	P05186	P19827	P35558	P10415	Q07817	Q07820	O00238	O00329	O00444	O00519	O00750	O14578	O14730	O14757	O14936	O14965	O14976	O15075	O15146	O15197	O15264	O43283	O43293	O43353	O60285	O60674	O75385	O75460	O75716	O75747	O75914	O94768	O94804	O95382	O95747	O95819	O95835	O96013	O96017	P00519	P00533	P04049	P04626	P04629	P05129	P06213	P06239	P06241	P06493	P07332	P07333	P07498	P07711	P07947	P07948	P07949	P08581	P08631	P08922	P09769	P0C264	P10721	P11309	P11802	P12931	P14616	P15056	P16234	P16591	P17948	P19525	P19784	P21709	P22607	P22694	P23458	P25774	P27448	P29317	P29320	P29322	P29323	P29376	P29597	P30291	P30530	P33981	P35590	P35916	P35968	P36507	P36888	P36896	P36897	P37173	P41240	P41743	P42345	P42679	P42680	P42681	P42684	P42685	P43403	P43405	P45984	P46734	P48729	P48736	P49759	P49760	P49761	P49840	P49841	P51451	P51617	P51813	P51955	P51956	P51957	P52333	P52564	P53350	P53355	P53667	P53671	P53779	P54756	P54760	P54762	P54764	P57059	P57078	P62344	P78356	P80192	P9WI81	Q00532	Q00536	Q00537	Q02156	Q02763	Q02779	Q04759	Q04771	Q04912	Q05397	Q05655	Q06187	Q06418	Q07912	Q08345	Q08881	Q12851	Q12852	Q12866	Q13153	Q13163	Q13177	Q13188	Q13233	Q13237	Q13470	Q13523	Q13546	Q13554	Q13555	Q13557	Q13627	Q13873	Q13882	Q14012	Q14164	Q14289	Q14680	Q15139	Q15208	Q15303	Q15375	Q15418	Q15569	Q15746	Q16288	Q16512	Q16513	Q16539	Q16584	Q16620	Q16816	Q16832	Q2M2I8	Q32MK0	Q52WX2	Q56UN5	Q59H18	Q5S007	Q6DT37	Q6P3R8	Q6PHR2	Q6ZN16	Q7KZI7	Q7L7X3	Q7Z460	Q86UE8	Q86V86	Q86YV6	Q8IU85	Q8IVH8	Q8IW41	Q8IY84	Q8IYT8	Q8N4C8	Q8N568	Q8N5S9	Q8NEV4	Q8NFD2	Q8NG66	Q8NI60	Q8TBX8	Q8TD19	Q8TDR2	Q8WU08	Q92772	Q92918	Q96BR1	Q96D53	Q96GD4	Q96L34	Q96NX5	Q96PY6	Q96Q40	Q96RR4	Q99640	Q99759	Q99808	Q9BQI3	Q9BRS2	Q9BUB5	Q9BVS4	Q9BXA7	Q9BYT3	Q9C098	Q9H093	Q9H0K1	Q9H1R3	Q9H2G2	Q9H2K8	Q9H2X6	Q9H3Y6	Q9H422	Q9H4B4	Q9HAZ1	Q9HBH9	Q9NQU5	Q9NRH2	Q9NRP7	Q9NSY1	Q9NWZ3	Q9NYL2	Q9NYY3	Q9P0L2	Q9P286	Q9P289	Q9P2K8	Q9UBF8	Q9UEE5	Q9UEW8	Q9UF33	Q9UHD2	Q9UIK4	Q9UK32	Q9UKE5	Q9UKI8	Q9UL54	Q9UM73	Q9UPZ9	Q9UQB9	Q9UQM7	Q9Y2H1	Q9Y2H9	Q9Y2K2	Q9Y2U5	Q9Y463	Q9Y4K4	Q9Y616	Q9Y6E0	Q9Y6R4	Q99870	P00748	P05362	P06870	P07477	P00736	P03952	P09871	P04070	P00488	P00740	P02675	P08709	P00185	P04800	P11711	Q86DI9	P36956	Q5NUL3'
}

data = urllib.parse.urlencode(params)
data = data.encode('utf-8')
req = urllib.request.Request(url, data)
with urllib.request.urlopen(req) as f:
   response = f.read()
match_uniprot_to_uniref50 = (response.decode('utf-8'))
match_uniprot_to_uniref50 = match_uniprot_to_uniref50.split() # Use the split method to get every word
match_uniprot_to_uniref50.pop(0) # Get rid of the first and second entries since they only say "From" and "To"
match_uniprot_to_uniref50.pop(0) # Same line as above since the list's zeroth entry is now the first entry
print(match_uniprot_to_uniref50)
with open ('/Users/josephramirez/Desktop/uniprot_to_uniref50.csv', 'w', newline='') as f: # write to a given csv
    csvwriter = csv.writer(f) # set the csv writer
    csvwriter.writerow(["UNIPROT_ID", "UNIREF50", "UNIREF90", "UNIREF100"]) # write the heading columns
    for index in range(len(match_uniprot_to_uniref50) - 1):
        if "UniRef50" in match_uniprot_to_uniref50[index]:
            continue
        else:
            csvwriter.writerow([match_uniprot_to_uniref50[index], match_uniprot_to_uniref50[index + 1]]) 

FileNotFoundError: [Errno 2] No such file or directory: '/Users/josephramirez/Desktop/edge_list.csv'

In [14]:
# ADD UNIREF 90
url = 'https://www.uniprot.org/uploadlists/'

params = {
'from': 'ACC+ID',
'to': 'NF90',
'format': 'tab',
'query': 'P00734	P05164	P00747	P02671	P05121	Q03405	P05120	P05154	P14543	P39900	P98164	Q9Y5Y6	P05452	P05787	P07355	P27797	P27824	Q07954	P04004	P05106	P08514	P12318	P31994	P00451	P02776	P07204	P07225	P12259	P35237	Q9UNN8	P25116	P47712	P06276	P12821	P46059	P46663	Q16348	O15439	O15440	O76074	P05181	P08183	P08684	P10635	P11712	P18545	P20815	P24462	P33261	Q13956	Q5T3U5	Q9Y6L6	P05177	P10632	P20813	Q9H244	P02768	P55011	Q13621	P43005	P54289	Q01650	P00915	P00918	P22748	Q14432	O00305	O60840	O60939	O95342	P02763	P08588	P25100	P35348	P35368	P35498	P35499	P48050	P54284	P63252	Q01118	Q01668	Q02641	Q07699	Q08289	Q13698	Q13936	Q14500	Q14524	Q15858	Q86VL8	Q8IWT1	Q96FL8	Q99250	Q9NY46	Q9NY72	Q9UI33	Q9UNX9	Q9UQD0	Q9Y5Y9	O15244	P07550	P05178	P08683	P15559	Q08257	Q9BQB6	O95180	Q9NY47	P00813	P04798	P09874	P27815	P29274	P29275	P30542	Q07343	Q15155	Q16678	Q7Z5B4	Q8TCT9	Q92769	Q99829	Q9Y4R7	Q9Y694	P19652	P07451	P43166	Q4U2R8	Q06432	P00797	P22309	P23219	P48048	P17405	Q00975	Q8IZS8	Q9P0X4	A0A024R8I1	P05023	P05108	P46721	Q6ZQN7	Q86UW1	Q86UW2	P08235	P35869	P01008	P05546	O60488	P11509	P19793	P28702	P37231	P48443	Q03181	Q07869	O00555	O43497	O60359	O75469	P04150	P04278	P05093	P06401	P10275	P15538	P18405	P19099	P31213	P62955	Q15878	Q7Z3S7	Q8WXS5	Q92887	Q9BXT2	Q9H8P0	Q9UBN1	Q9UF02	Q9Y698	P04731	P05091	P14902	P33402	P51580	P55017	Q12791	O15245	O75751	P08913	P18089	Q12809	Q9UNQ0	P28335	P30926	P31645	P32297	P61024	Q15822	Q14097	O15530	P35354	H3BUU9	P23141	P42898	O00591	O14764	O60656	P00374	P02708	P06133	P14416	P14867	P16662	P18505	P18507	P19224	P21728	P21918	P22310	P28223	P28472	P31644	P34903	P35367	P35503	P41145	P42261	P46098	P47869	P47870	P48169	P54855	P78334	Q16445	Q8N1C3	Q99928	Q9HAW7	Q9HAW8	Q9HAW9	Q9UN88	O43570	P23280	P35218	Q16790	Q8N1Q1	Q9ULX7	Q9Y2D0	P00742	P08908	P13945	P21397	P28222	Q8TCC7	P00749	P19634	P19801	P37088	P51168	P51170	P51172	P78348	Q16515	Q9H015	Q9TRC7	Q2M3G0	P54710	Q6FHJ7	P08172	P08173	P08912	P0DP23	P11229	P20309	P54750	Q01064	O15427	O76082	O94956	P05543	P40261	P49019	P53985	Q15274	Q8N695	Q8TDS4	P0DMS8	P55263	Q9HAS3	Q08493	Q08499	O15438	P33260	P33527	Q14654	O75795	P30556	Q96S37	Q9NRM0	P05230	P08620	P09038	P52209	Q92959	Q9HC97	Q9NSA0	P16066	P34995	P43116	Q9UIG8	Q9Y5Y4	P21589	P07099	P29972	O75762	Q7Z2W7	Q8NET8	P00176	P05183	P08473	P13569	Q14973	Q9H2X9	Q9UP95	P09210	P09211	Q9UJU2	O75493	P27338	P35219	Q06278	Q9NS85	P63316	Q15842	P18825	O14920	O43741	P01106	P04637	P11021	P11245	P12004	P24385	P25101	P25963	P27361	P28482	P29466	P31152	P42574	P51812	P54619	P54646	P98066	Q04828	Q13131	Q13164	Q16659	Q8TD08	Q9UGI9	Q9UGJ0	Q9Y478	P15144	P35610	Q9UHC9	P11511	Q15166	P53805	Q9NPD5	Q9Y233	P09488	P11766	P47989	P02585	P05412	P23786	P50416	P00750	P43119	O95750	P11362	P14210	P16109	P21802	P22455	Q9Y251	Q9UK17	P10827	P10828	P51589	Q86YN6	Q9H252	Q9NS40	P15104	O95298	P15692	P16581	P16860	P17302	P19320	Q16665	O95255	P12104	P08253	P09960	P14780	P09086	P14859	P23975	P25103	P28221	P28566	P29475	P30939	P35372	P41143	P41595	Q8TCU5	Q693P7	P03886	P23415	P98194	Q96ER9	Q16647	P51787	P13674	Q16853	P27169	P01375	Q13370	Q09013	O95622	P63092	Q08462	P02766	P03372	P05067	P08648	P09917	P11166	P16050	P16083	P16152	P31749	P37840	P48039	P49286	P54577	P68400	Q14994	Q86T13	Q8TCG2	Q92945	Q96EB6	P00378	P05089	P35222	Q13336	Q15849	Q8XB74	P19429	P01584	P05231	P19838	Q00653	P04275	P21964	P48551	P29474	P68871	P69905	O00748	Q9Y5N1	P10646	O75173	P13612	P48061	O75116	P17612	P61925	Q13464	P01100	P35228	Q05586	Q9UHC3	Q9UM01	Q9Y2I1	P31639	P24530	P06576	Q96PD7	O15554	Q16558	Q86W47	Q92952	Q9H2S1	Q9NPA1	Q9UGI6	Q9Y691	P04406	P39023	P40926	P50213	Q02218	Q13423	P53007	Q13183	Q86YT5	Q9BQT8	O60706	O00408	O60658	O76083	O95263	P09619	P16499	P35913	P51160	Q13946	Q14123	Q9HCR9	Q9NP56	P05186	P19827	P35558	P10415	Q07817	Q07820	O00238	O00329	O00444	O00519	O00750	O14578	O14730	O14757	O14936	O14965	O14976	O15075	O15146	O15197	O15264	O43283	O43293	O43353	O60285	O60674	O75385	O75460	O75716	O75747	O75914	O94768	O94804	O95382	O95747	O95819	O95835	O96013	O96017	P00519	P00533	P04049	P04626	P04629	P05129	P06213	P06239	P06241	P06493	P07332	P07333	P07498	P07711	P07947	P07948	P07949	P08581	P08631	P08922	P09769	P0C264	P10721	P11309	P11802	P12931	P14616	P15056	P16234	P16591	P17948	P19525	P19784	P21709	P22607	P22694	P23458	P25774	P27448	P29317	P29320	P29322	P29323	P29376	P29597	P30291	P30530	P33981	P35590	P35916	P35968	P36507	P36888	P36896	P36897	P37173	P41240	P41743	P42345	P42679	P42680	P42681	P42684	P42685	P43403	P43405	P45984	P46734	P48729	P48736	P49759	P49760	P49761	P49840	P49841	P51451	P51617	P51813	P51955	P51956	P51957	P52333	P52564	P53350	P53355	P53667	P53671	P53779	P54756	P54760	P54762	P54764	P57059	P57078	P62344	P78356	P80192	P9WI81	Q00532	Q00536	Q00537	Q02156	Q02763	Q02779	Q04759	Q04771	Q04912	Q05397	Q05655	Q06187	Q06418	Q07912	Q08345	Q08881	Q12851	Q12852	Q12866	Q13153	Q13163	Q13177	Q13188	Q13233	Q13237	Q13470	Q13523	Q13546	Q13554	Q13555	Q13557	Q13627	Q13873	Q13882	Q14012	Q14164	Q14289	Q14680	Q15139	Q15208	Q15303	Q15375	Q15418	Q15569	Q15746	Q16288	Q16512	Q16513	Q16539	Q16584	Q16620	Q16816	Q16832	Q2M2I8	Q32MK0	Q52WX2	Q56UN5	Q59H18	Q5S007	Q6DT37	Q6P3R8	Q6PHR2	Q6ZN16	Q7KZI7	Q7L7X3	Q7Z460	Q86UE8	Q86V86	Q86YV6	Q8IU85	Q8IVH8	Q8IW41	Q8IY84	Q8IYT8	Q8N4C8	Q8N568	Q8N5S9	Q8NEV4	Q8NFD2	Q8NG66	Q8NI60	Q8TBX8	Q8TD19	Q8TDR2	Q8WU08	Q92772	Q92918	Q96BR1	Q96D53	Q96GD4	Q96L34	Q96NX5	Q96PY6	Q96Q40	Q96RR4	Q99640	Q99759	Q99808	Q9BQI3	Q9BRS2	Q9BUB5	Q9BVS4	Q9BXA7	Q9BYT3	Q9C098	Q9H093	Q9H0K1	Q9H1R3	Q9H2G2	Q9H2K8	Q9H2X6	Q9H3Y6	Q9H422	Q9H4B4	Q9HAZ1	Q9HBH9	Q9NQU5	Q9NRH2	Q9NRP7	Q9NSY1	Q9NWZ3	Q9NYL2	Q9NYY3	Q9P0L2	Q9P286	Q9P289	Q9P2K8	Q9UBF8	Q9UEE5	Q9UEW8	Q9UF33	Q9UHD2	Q9UIK4	Q9UK32	Q9UKE5	Q9UKI8	Q9UL54	Q9UM73	Q9UPZ9	Q9UQB9	Q9UQM7	Q9Y2H1	Q9Y2H9	Q9Y2K2	Q9Y2U5	Q9Y463	Q9Y4K4	Q9Y616	Q9Y6E0	Q9Y6R4	Q99870	P00748	P05362	P06870	P07477	P00736	P03952	P09871	P04070	P00488	P00740	P02675	P08709	P00185	P04800	P11711	Q86DI9	P36956	Q5NUL3'
}

data = urllib.parse.urlencode(params)
data = data.encode('utf-8')
req = urllib.request.Request(url, data)
with urllib.request.urlopen(req) as f:
   response = f.read()
match_uniprot_to_uniref90 = (response.decode('utf-8'))
match_uniprot_to_uniref90 = match_uniprot_to_uniref90.split() # Use the split method to get every word
match_uniprot_to_uniref90.pop(0) # Get rid of the first and second entries since they only say "From" and "To"
match_uniprot_to_uniref90.pop(0) # Same line as above since the list's zeroth entry is now the first entry
print(match_uniprot_to_uniref90)
with open ('/Users/josephramirez/Desktop/uniprot_to_uniref90.csv', 'w', newline='') as f: # write to a given csv
    csvwriter = csv.writer(f) # set the csv writer
    csvwriter.writerow(["UNIPROT_ID", "UNIREF90"]) # write the heading columns
    for index in range(len(match_uniprot_to_uniref90) - 1):
        if "UniRef90" in match_uniprot_to_uniref90[index]:
            continue
        else:
            csvwriter.writerow([match_uniprot_to_uniref90[index], match_uniprot_to_uniref90[index + 1]]) 

['P00734', 'UniRef90_P00734', 'P05164', 'UniRef90_P05164', 'P00747', 'UniRef90_P00747', 'P02671', 'UniRef90_P02671', 'P05121', 'UniRef90_P05121', 'Q03405', 'UniRef90_Q03405', 'P05120', 'UniRef90_P05120', 'P05154', 'UniRef90_P05154', 'P14543', 'UniRef90_P14543', 'P39900', 'UniRef90_P39900', 'P98164', 'UniRef90_P98164', 'Q9Y5Y6', 'UniRef90_Q9Y5Y6', 'P05452', 'UniRef90_P05452', 'P05787', 'UniRef90_P05787', 'P07355', 'UniRef90_P07355', 'P27797', 'UniRef90_P27797', 'P27824', 'UniRef90_P27824', 'Q07954', 'UniRef90_Q07954', 'P04004', 'UniRef90_P04004', 'P05106', 'UniRef90_P05106', 'P08514', 'UniRef90_P08514', 'P12318', 'UniRef90_P12318', 'P31994', 'UniRef90_P31994', 'P00451', 'UniRef90_P00451', 'P02776', 'UniRef90_P02776', 'P07204', 'UniRef90_P07204', 'P07225', 'UniRef90_P07225', 'P12259', 'UniRef90_P12259', 'P35237', 'UniRef90_P35237', 'Q9UNN8', 'UniRef90_Q9UNN8', 'P25116', 'UniRef90_P25116', 'P47712', 'UniRef90_P47712', 'P06276', 'UniRef90_P06276', 'P12821', 'UniRef90_P12821', 'P46059', 'Un

In [20]:
# ADD UNIREF 100
url = 'https://www.uniprot.org/uploadlists/'

params = {
'from': 'ACC+ID',
'to': 'NF100',
'format': 'tab',
'query': 'P00734	P05164	P00747	P02671	P05121	Q03405	P05120	P05154	P14543	P39900	P98164	Q9Y5Y6	P05452	P05787	P07355	P27797	P27824	Q07954	P04004	P05106	P08514	P12318	P31994	P00451	P02776	P07204	P07225	P12259	P35237	Q9UNN8	P25116	P47712	P06276	P12821	P46059	P46663	Q16348	O15439	O15440	O76074	P05181	P08183	P08684	P10635	P11712	P18545	P20815	P24462	P33261	Q13956	Q5T3U5	Q9Y6L6	P05177	P10632	P20813	Q9H244	P02768	P55011	Q13621	P43005	P54289	Q01650	P00915	P00918	P22748	Q14432	O00305	O60840	O60939	O95342	P02763	P08588	P25100	P35348	P35368	P35498	P35499	P48050	P54284	P63252	Q01118	Q01668	Q02641	Q07699	Q08289	Q13698	Q13936	Q14500	Q14524	Q15858	Q86VL8	Q8IWT1	Q96FL8	Q99250	Q9NY46	Q9NY72	Q9UI33	Q9UNX9	Q9UQD0	Q9Y5Y9	O15244	P07550	P05178	P08683	P15559	Q08257	Q9BQB6	O95180	Q9NY47	P00813	P04798	P09874	P27815	P29274	P29275	P30542	Q07343	Q15155	Q16678	Q7Z5B4	Q8TCT9	Q92769	Q99829	Q9Y4R7	Q9Y694	P19652	P07451	P43166	Q4U2R8	Q06432	P00797	P22309	P23219	P48048	P17405	Q00975	Q8IZS8	Q9P0X4	A0A024R8I1	P05023	P05108	P46721	Q6ZQN7	Q86UW1	Q86UW2	P08235	P35869	P01008	P05546	O60488	P11509	P19793	P28702	P37231	P48443	Q03181	Q07869	O00555	O43497	O60359	O75469	P04150	P04278	P05093	P06401	P10275	P15538	P18405	P19099	P31213	P62955	Q15878	Q7Z3S7	Q8WXS5	Q92887	Q9BXT2	Q9H8P0	Q9UBN1	Q9UF02	Q9Y698	P04731	P05091	P14902	P33402	P51580	P55017	Q12791	O15245	O75751	P08913	P18089	Q12809	Q9UNQ0	P28335	P30926	P31645	P32297	P61024	Q15822	Q14097	O15530	P35354	H3BUU9	P23141	P42898	O00591	O14764	O60656	P00374	P02708	P06133	P14416	P14867	P16662	P18505	P18507	P19224	P21728	P21918	P22310	P28223	P28472	P31644	P34903	P35367	P35503	P41145	P42261	P46098	P47869	P47870	P48169	P54855	P78334	Q16445	Q8N1C3	Q99928	Q9HAW7	Q9HAW8	Q9HAW9	Q9UN88	O43570	P23280	P35218	Q16790	Q8N1Q1	Q9ULX7	Q9Y2D0	P00742	P08908	P13945	P21397	P28222	Q8TCC7	P00749	P19634	P19801	P37088	P51168	P51170	P51172	P78348	Q16515	Q9H015	Q9TRC7	Q2M3G0	P54710	Q6FHJ7	P08172	P08173	P08912	P0DP23	P11229	P20309	P54750	Q01064	O15427	O76082	O94956	P05543	P40261	P49019	P53985	Q15274	Q8N695	Q8TDS4	P0DMS8	P55263	Q9HAS3	Q08493	Q08499	O15438	P33260	P33527	Q14654	O75795	P30556	Q96S37	Q9NRM0	P05230	P08620	P09038	P52209	Q92959	Q9HC97	Q9NSA0	P16066	P34995	P43116	Q9UIG8	Q9Y5Y4	P21589	P07099	P29972	O75762	Q7Z2W7	Q8NET8	P00176	P05183	P08473	P13569	Q14973	Q9H2X9	Q9UP95	P09210	P09211	Q9UJU2	O75493	P27338	P35219	Q06278	Q9NS85	P63316	Q15842	P18825	O14920	O43741	P01106	P04637	P11021	P11245	P12004	P24385	P25101	P25963	P27361	P28482	P29466	P31152	P42574	P51812	P54619	P54646	P98066	Q04828	Q13131	Q13164	Q16659	Q8TD08	Q9UGI9	Q9UGJ0	Q9Y478	P15144	P35610	Q9UHC9	P11511	Q15166	P53805	Q9NPD5	Q9Y233	P09488	P11766	P47989	P02585	P05412	P23786	P50416	P00750	P43119	O95750	P11362	P14210	P16109	P21802	P22455	Q9Y251	Q9UK17	P10827	P10828	P51589	Q86YN6	Q9H252	Q9NS40	P15104	O95298	P15692	P16581	P16860	P17302	P19320	Q16665	O95255	P12104	P08253	P09960	P14780	P09086	P14859	P23975	P25103	P28221	P28566	P29475	P30939	P35372	P41143	P41595	Q8TCU5	Q693P7	P03886	P23415	P98194	Q96ER9	Q16647	P51787	P13674	Q16853	P27169	P01375	Q13370	Q09013	O95622	P63092	Q08462	P02766	P03372	P05067	P08648	P09917	P11166	P16050	P16083	P16152	P31749	P37840	P48039	P49286	P54577	P68400	Q14994	Q86T13	Q8TCG2	Q92945	Q96EB6	P00378	P05089	P35222	Q13336	Q15849	Q8XB74	P19429	P01584	P05231	P19838	Q00653	P04275	P21964	P48551	P29474	P68871	P69905	O00748	Q9Y5N1	P10646	O75173	P13612	P48061	O75116	P17612	P61925	Q13464	P01100	P35228	Q05586	Q9UHC3	Q9UM01	Q9Y2I1	P31639	P24530	P06576	Q96PD7	O15554	Q16558	Q86W47	Q92952	Q9H2S1	Q9NPA1	Q9UGI6	Q9Y691	P04406	P39023	P40926	P50213	Q02218	Q13423	P53007	Q13183	Q86YT5	Q9BQT8	O60706	O00408	O60658	O76083	O95263	P09619	P16499	P35913	P51160	Q13946	Q14123	Q9HCR9	Q9NP56	P05186	P19827	P35558	P10415	Q07817	Q07820	O00238	O00329	O00444	O00519	O00750	O14578	O14730	O14757	O14936	O14965	O14976	O15075	O15146	O15197	O15264	O43283	O43293	O43353	O60285	O60674	O75385	O75460	O75716	O75747	O75914	O94768	O94804	O95382	O95747	O95819	O95835	O96013	O96017	P00519	P00533	P04049	P04626	P04629	P05129	P06213	P06239	P06241	P06493	P07332	P07333	P07498	P07711	P07947	P07948	P07949	P08581	P08631	P08922	P09769	P0C264	P10721	P11309	P11802	P12931	P14616	P15056	P16234	P16591	P17948	P19525	P19784	P21709	P22607	P22694	P23458	P25774	P27448	P29317	P29320	P29322	P29323	P29376	P29597	P30291	P30530	P33981	P35590	P35916	P35968	P36507	P36888	P36896	P36897	P37173	P41240	P41743	P42345	P42679	P42680	P42681	P42684	P42685	P43403	P43405	P45984	P46734	P48729	P48736	P49759	P49760	P49761	P49840	P49841	P51451	P51617	P51813	P51955	P51956	P51957	P52333	P52564	P53350	P53355	P53667	P53671	P53779	P54756	P54760	P54762	P54764	P57059	P57078	P62344	P78356	P80192	P9WI81	Q00532	Q00536	Q00537	Q02156	Q02763	Q02779	Q04759	Q04771	Q04912	Q05397	Q05655	Q06187	Q06418	Q07912	Q08345	Q08881	Q12851	Q12852	Q12866	Q13153	Q13163	Q13177	Q13188	Q13233	Q13237	Q13470	Q13523	Q13546	Q13554	Q13555	Q13557	Q13627	Q13873	Q13882	Q14012	Q14164	Q14289	Q14680	Q15139	Q15208	Q15303	Q15375	Q15418	Q15569	Q15746	Q16288	Q16512	Q16513	Q16539	Q16584	Q16620	Q16816	Q16832	Q2M2I8	Q32MK0	Q52WX2	Q56UN5	Q59H18	Q5S007	Q6DT37	Q6P3R8	Q6PHR2	Q6ZN16	Q7KZI7	Q7L7X3	Q7Z460	Q86UE8	Q86V86	Q86YV6	Q8IU85	Q8IVH8	Q8IW41	Q8IY84	Q8IYT8	Q8N4C8	Q8N568	Q8N5S9	Q8NEV4	Q8NFD2	Q8NG66	Q8NI60	Q8TBX8	Q8TD19	Q8TDR2	Q8WU08	Q92772	Q92918	Q96BR1	Q96D53	Q96GD4	Q96L34	Q96NX5	Q96PY6	Q96Q40	Q96RR4	Q99640	Q99759	Q99808	Q9BQI3	Q9BRS2	Q9BUB5	Q9BVS4	Q9BXA7	Q9BYT3	Q9C098	Q9H093	Q9H0K1	Q9H1R3	Q9H2G2	Q9H2K8	Q9H2X6	Q9H3Y6	Q9H422	Q9H4B4	Q9HAZ1	Q9HBH9	Q9NQU5	Q9NRH2	Q9NRP7	Q9NSY1	Q9NWZ3	Q9NYL2	Q9NYY3	Q9P0L2	Q9P286	Q9P289	Q9P2K8	Q9UBF8	Q9UEE5	Q9UEW8	Q9UF33	Q9UHD2	Q9UIK4	Q9UK32	Q9UKE5	Q9UKI8	Q9UL54	Q9UM73	Q9UPZ9	Q9UQB9	Q9UQM7	Q9Y2H1	Q9Y2H9	Q9Y2K2	Q9Y2U5	Q9Y463	Q9Y4K4	Q9Y616	Q9Y6E0	Q9Y6R4	Q99870	P00748	P05362	P06870	P07477	P00736	P03952	P09871	P04070	P00488	P00740	P02675	P08709	P00185	P04800	P11711	Q86DI9	P36956	Q5NUL3'
}

data = urllib.parse.urlencode(params)
data = data.encode('utf-8')
req = urllib.request.Request(url, data)
with urllib.request.urlopen(req) as f:
   response = f.read()
match_uniprot_to_uniref100 = (response.decode('utf-8'))
match_uniprot_to_uniref100 = match_uniprot_to_uniref100.split() # Use the split method to get every word
match_uniprot_to_uniref100.pop(0) # Get rid of the first and second entries since they only say "From" and "To"
match_uniprot_to_uniref100.pop(0) # Same line as above since the list's zeroth entry is now the first entry
print(match_uniprot_to_uniref100)
with open ('/Users/josephramirez/Desktop/uniprot_to_uniref100.csv', 'w', newline='') as f: # write to a given csv
    csvwriter = csv.writer(f) # set the csv writer
    csvwriter.writerow(["UNIPROT_ID", "UNIREF100"]) # write the heading columns
    for index in range(len(match_uniprot_to_uniref100) - 1):
        if "UniRef100" in match_uniprot_to_uniref100[index]:
            continue
        else:
            csvwriter.writerow([match_uniprot_to_uniref100[index], match_uniprot_to_uniref100[index + 1]]) 

['P00734', 'UniRef100_P00734', 'P05164', 'UniRef100_P05164', 'P00747', 'UniRef100_P00747', 'P02671', 'UniRef100_P02671', 'P05121', 'UniRef100_P05121', 'Q03405', 'UniRef100_Q03405', 'P05120', 'UniRef100_P05120', 'P05154', 'UniRef100_P05154', 'P14543', 'UniRef100_P14543', 'P39900', 'UniRef100_P39900', 'P98164', 'UniRef100_P98164', 'Q9Y5Y6', 'UniRef100_Q9Y5Y6', 'P05452', 'UniRef100_P05452', 'P05787', 'UniRef100_P05787', 'P07355', 'UniRef100_P07355', 'P27797', 'UniRef100_P27797', 'P27824', 'UniRef100_P27824', 'Q07954', 'UniRef100_Q07954', 'P04004', 'UniRef100_P04004', 'P05106', 'UniRef100_P05106', 'P08514', 'UniRef100_P08514', 'P12318', 'UniRef100_P12318', 'P31994', 'UniRef100_P31994', 'P00451', 'UniRef100_P00451', 'P02776', 'UniRef100_P02776', 'P07204', 'UniRef100_P07204', 'P07225', 'UniRef100_P07225', 'P12259', 'UniRef100_P12259', 'P35237', 'UniRef100_P35237', 'Q9UNN8', 'UniRef100_Q9UNN8', 'P25116', 'UniRef100_P25116', 'P47712', 'UniRef100_P47712', 'P06276', 'UniRef100_P06276', 'P12821',

In [28]:
def link_uniprot_to_uniref100(tx):
    tx.run('''
    LOAD CSV WITH HEADERS FROM 'file:///uniprot_to_uniref100.csv' AS row
    MERGE (u:Uniref100 {name: row.UNIREF100})
    WITH u, row
    MATCH (p:Protein {UniProtID: row.UNIPROT_ID})
    MERGE (p)-[:HAS_UNIREF_ID]->(u)
    ''')

In [29]:
def link_uniprot_to_uniref90(tx):
    tx.run('''
    LOAD CSV WITH HEADERS FROM 'file:///uniprot_to_uniref90.csv' AS row
    MERGE (u:Uniref90 {name: row.UNIREF90})
    WITH u, row
    MATCH (p:Protein {UniProtID: row.UNIPROT_ID})
    MERGE (p)-[:HAS_UNIREF_ID]->(u)
    ''')

In [30]:
def link_uniprot_to_uniref50(tx):
    tx.run('''
    LOAD CSV WITH HEADERS FROM 'file:///uniprot_to_uniref50.csv' AS row
    MERGE (u:Uniref50 {name: row.UNIREF50})
    WITH u, row
    MATCH (p:Protein {UniProtID: row.UNIPROT_ID})
    MERGE (p)-[:HAS_UNIREF_ID]->(u)
    ''')

In [31]:
with driver.session() as session: 
    session.write_transaction(link_uniprot_to_uniref50)
    session.write_transaction(link_uniprot_to_uniref90)
    session.write_transaction(link_uniprot_to_uniref100)