# Downloading SABIO-RK Data

In [None]:
# querying SABIO-RK database for data in years 2000-2021 (only 2011 kept in notebook)

%%timeit

import requests

QUERY_URL = 'http://sabiork.h-its.org/sabioRestWebServices/kineticlawsExportTsv'

# specify search fields and search terms

query_dict = {"Year":"2011"}
query_string = ' AND '.join(['%s:%s' % (k,v) for k,v in query_dict.items()])


# specify output fields and send request

query = {'fields[]':['EntryID', 'Reaction','Organism', 'ECNumber', 'Parameter', "UniProtKB_AC", 
                     "Enzyme Variant", "Enzymename",  "Publication", "PubMedID", "Substrate", "Product", "Temperature", "pH", "Rate Equation"], 'q':query_string}

request = requests.post(QUERY_URL, params = query)
request.raise_for_status()

# results

print(request.text)

with open("2011.xls", "wb") as file:
    file.write(request.content)
    
print("done")

In [110]:
# assembling dataframe

import os
import pandas as pd
import numpy as np

data_2000 = os.path.join("DBData/SABIO-RK/2000.xls")
data_2000_df = pd.read_csv(data_2000, sep="\t")
print(len(data_2000_df))

data_2001 = os.path.join("DBData/SABIO-RK/2001.xls")
data_2001_df = pd.read_csv(data_2001, sep="\t")
print(len(data_2001_df))

data_2002 = os.path.join("DBData/SABIO-RK/2002.xls")
data_2002_df = pd.read_csv(data_2002, sep="\t")
print(len(data_2002_df))

data_2003 = os.path.join("DBData/SABIO-RK/2003.xls")
data_2003_df = pd.read_csv(data_2003, sep="\t")
print(len(data_2003_df))

data_2004 = os.path.join("DBData/SABIO-RK/2004.xls")
data_2004_df = pd.read_csv(data_2004, sep="\t")
print(len(data_2004_df))

data_2005 = os.path.join("DBData/SABIO-RK/2005.xls")
data_2005_df = pd.read_csv(data_2005, sep="\t")
print(len(data_2005_df))

data_2006 = os.path.join("DBData/SABIO-RK/2006.xls")
data_2006_df = pd.read_csv(data_2006, sep="\t")
print(len(data_2006_df))

data_2007 = os.path.join("DBData/SABIO-RK/2007.xls")
data_2007_df = pd.read_csv(data_2007, sep="\t")
print(len(data_2007_df))

data_2008 = os.path.join("DBData/SABIO-RK/2008.xls")
data_2008_df = pd.read_csv(data_2008, sep="\t")
print(len(data_2008_df))

data_2009 = os.path.join("DBData/SABIO-RK/2009.xls")
data_2009_df = pd.read_csv(data_2009, sep="\t")
print(len(data_2009_df))

data_2010 = os.path.join("DBData/SABIO-RK/2010.xls")
data_2010_df = pd.read_csv(data_2010, sep="\t")
print(len(data_2010_df))

data_2011 = os.path.join("DBData/SABIO-RK/2011.xls")
data_2011_df = pd.read_csv(data_2011, sep="\t")
print(len(data_2011_df))

data_2012 = os.path.join("DBData/SABIO-RK/2012.xls")
data_2012_df = pd.read_csv(data_2012, sep="\t")
print(len(data_2011_df))

data_2013 = os.path.join("DBData/SABIO-RK/2013.xls")
data_2013_df = pd.read_csv(data_2013, sep="\t")

data_2014 = os.path.join("DBData/SABIO-RK/2014.xls")
data_2014_df = pd.read_csv(data_2014, sep="\t")

data_2015 = os.path.join("DBData/SABIO-RK/2015.xls")
data_2015_df = pd.read_csv(data_2015, sep="\t")

data_2016 = os.path.join("DBData/SABIO-RK/2016.xls")
data_2016_df = pd.read_csv(data_2016, sep="\t")

data_2017 = os.path.join("DBData/SABIO-RK/2017.xls")
data_2017_df = pd.read_csv(data_2017, sep="\t")

data_2018 = os.path.join("DBData/SABIO-RK/2018.xls")
data_2018_df = pd.read_csv(data_2018, sep="\t")

data_2019 = os.path.join("DBData/SABIO-RK/2019.xls")
data_2019_df = pd.read_csv(data_2019, sep="\t")

data_2020 = os.path.join("DBData/SABIO-RK/2020.xls")
data_2020_df = pd.read_csv(data_2020, sep="\t")

data_2021 = os.path.join("DBData/SABIO-RK/2021.xls")
data_2021_df = pd.read_csv(data_2021, sep="\t")


14171
12250
14897
23791
24325
18828
15336
11329
9977
6553
4218
3718
3718


In [112]:
df_list = [data_2000_df, data_2001_df, data_2002_df, data_2003_df, data_2004_df, data_2005_df, data_2006_df,
           data_2007_df, data_2008_df, data_2009_df, data_2010_df, data_2011_df, data_2012_df, data_2013_df, data_2014_df,
           data_2015_df, data_2016_df, data_2017_df, data_2018_df, data_2019_df, data_2020_df, data_2021_df]

rxn_df = pd.concat(df_list, ignore_index=True)
print(len(rxn_df))


rxn_df.drop_duplicates(inplace=True)
print(len(rxn_df))


print(rxn_df.head())

rxn_df.to_csv("DBData/sabio-rk_raw.csv", header=True, index=False, index_label=False)

179773
179639
   EntryID                                           Reaction  \
0      664  Acetyl-CoA + Histone = Coenzyme A + Acetylhistone   
1      664  Acetyl-CoA + Histone = Coenzyme A + Acetylhistone   
2      664  Acetyl-CoA + Histone = Coenzyme A + Acetylhistone   
3      664  Acetyl-CoA + Histone = Coenzyme A + Acetylhistone   
4      664  Acetyl-CoA + Histone = Coenzyme A + Acetylhistone   

                   Organism  ECNumber parameter.type  \
0  Saccharomyces cerevisiae  2.3.1.48             Km   
1  Saccharomyces cerevisiae  2.3.1.48  concentration   
2  Saccharomyces cerevisiae  2.3.1.48             Km   
3  Saccharomyces cerevisiae  2.3.1.48        kcat/Km   
4  Saccharomyces cerevisiae  2.3.1.48        kcat/Km   

  parameter.associatedSpecies  parameter.startValue  parameter.endValue  \
0                  Acetyl-CoA          2.500000e-06                 NaN   
1                      Enzyme          7.000000e-08                 NaN   
2                     Histone    