# ImpactDB Experimental Data Upload Demo

In [1]:
import json
import requests
import pandas as pd

from IPython.core.display import HTML

custom_css = """
<style>
    div {
        max-width: 1080px;
        overflow-x: auto;
        display: block;
    }
</style>
"""

display(HTML(custom_css))



### Load experimental data from a .json file
This example data is from a paper in [ImpactDB](https://impact-database.com/paper/rhodosporidium/production-of-galactitol-from-galactose-by-the-oleaginous-yeast-rhodosporidium-toruloides-ifo0880)

In [2]:
with open('json_template.json', 'r') as f:
    experimental_data = f.read()

experimental_data = json.loads(experimental_data)
experimental_data

[{'geneticNotes': '',
  'heterologousGenes': '',
  'sourceEmail': 'jondhudson.rpi@gmail.com',
  'integrationSites': '',
  'nitrogenLevel': 'sufficient',
  'media': 'YPD',
  'substrateConc1': '20',
  'substrateConc2': '',
  'vessel': 'shake flask',
  'yield': '0.16',
  'titer': '3.2',
  'pH': '',
  'maximumRate': '0.0267',
  'temperature': '30',
  'substrate1': 'galactose',
  'substrate2': '',
  'geneIds': '',
  'promoters': '',
  'parentStrain': 'IFO0880',
  'strain': '',
  'averageRate': '0.0267',
  'product': 'galactitol',
  'optimizedCodons': '',
  'geneNames': '',
  'directedEvolution': False,
  'originSpecies': '',
  'volume': '0.050',
  'engineeredStrain': '',
  'knockedOutGenes': '',
  'oxygenLevel': 'sufficient',
  'species': 'Rhodosporidium',
  'overexpressedGenes': '',
  'time': '120',
  'bioprocessNotes': ''},
 {'geneticNotes': '',
  'heterologousGenes': '',
  'sourceEmail': 'jondhudson.rpi@gmail.com',
  'integrationSites': '',
  'nitrogenLevel': 'sufficient',
  'substrateCo

### Manually define bioprocess data

In [3]:
DataGPT = [
  {
    "geneticNotes": "",
    "heterologousGenes": "",
    "sourceEmail": "zhengyang.xiao99@gmail.com",
    "integrationSites": "",
    # "nitrogenLevel": "",
    "nitrogenLevel": "sufficient",
    "media": "rich",
    "substrateConc1": "0",
    "substrateConc2": "",
    "vessel": "batch reactor",
    "yield": "",
    "titer": "0.02764",
    "pH": "7.4",
    "maximumRate": "",
    "temperature": "30",
    "substrate1": "Waste hydrolysate",
    "substrate2": "",
    "geneIds": "",
    "promoters": "",
    # "parentStrain": "",
    "parentStrain": "temp name",
    "strain": "Rhodosporidium toruloides",
    "averageRate": "",
    "product": "1,8-cineole",
    "optimizedCodons": "",
    "geneNames": "",
    "directedEvolution": False,
    "originSpecies": "",
    "volume": "2",
    "engineeredStrain": "",
    "knockedOutGenes": "",
    "oxygenLevel": "sufficient",
    "species": "Rhodosporidium",
    "overexpressedGenes": "",
    "time": "168",
    "bioprocessNotes": ""
  }
]

### Load data from an excel file

In [4]:
# load excel data into a dataframe
excel_file = pd.read_excel('excel_template.xlsx')

# replace NaN with empty strings
excel_file = excel_file.fillna('')
display(excel_file.head())

# convert dataframe to json string
json_str = excel_file.to_json(orient='records')
print(type(json_str), json_str)

# convert json string to json object
json_obj = json.loads(json_str)
print(type(json_obj), json_obj)

Unnamed: 0,species,strain,engineeredStrain,parentStrain,product,titer,averageRate,maximumRate,yield,volume,...,overexpressedGenes,heterologousGenes,promoters,integrationSites,originSpecies,optimizedCodons,directedEvolution,sourceEmail,geneticNotes,bioprocessNotes
0,Rhodosporidium toruloides,,,ABC,lipid,7.769,,,,100,...,,,,,,,,zhengyang.xiao99@gmail.com,,
1,Rhodosporidium toruloides,,,ABC,lipid,6.769,,,,100,...,,,,,,,,zhengyang.xiao99@gmail.com,,
2,Rhodosporidium toruloides,,,ABC,lipid,4.638,,,,100,...,,,,,,,,zhengyang.xiao99@gmail.com,,
3,Rhodosporidium toruloides,,,ABC,lipid,2.436,,,,100,...,,,,,,,,zhengyang.xiao99@gmail.com,,
4,Rhodosporidium toruloides,,,ABC,lipid,1.761,,,,100,...,,,,,,,,zhengyang.xiao99@gmail.com,,


<class 'str'> [{"species":"Rhodosporidium toruloides","strain":"","engineeredStrain":"","parentStrain":"ABC","product":"lipid","titer":7.769,"averageRate":"","maximumRate":"","yield":"","volume":100,"vessel":"shake flask","substrate1":"sucrose","substrateConc1":40,"substrate2":"","substrateConc2":"","media":"defined","time":24,"oxygenLevel":"sufficient","nitrogenLevel":"sufficient","pH":"","temperature":28,"geneIds":"","geneNames":"","knockedOutGenes":"","overexpressedGenes":"","heterologousGenes":"","promoters":"","integrationSites":"","originSpecies":"","optimizedCodons":"","directedEvolution":"","sourceEmail":"zhengyang.xiao99@gmail.com","geneticNotes":"","bioprocessNotes":""},{"species":"Rhodosporidium toruloides","strain":"","engineeredStrain":"","parentStrain":"ABC","product":"lipid","titer":6.769,"averageRate":"","maximumRate":"","yield":"","volume":100,"vessel":"shake flask","substrate1":"sucrose","substrateConc1":40,"substrate2":"","substrateConc2":"","media":"defined","time":

In [5]:
url = 'https://us-central1-impact-db.cloudfunctions.net/uploadExperimentalData'

# local endpoint for testing
# url = 'http://localhost:5001/impact-db/us-central1/uploadExperimentalData'

data = {
    'species': 'yarrowia',
    'slug': '-ketoglutaric-acid-production-by-yarrowia-lipolytica-and-its-regulation',
    # 'experimentalData': json_obj, # data from excel file
    'experimentalData': DataGPT, # data from python object
    # 'experimentalData': experimental_data, # data from json file
}

headers = {
    "Content-Type": "application/json"
}

response = requests.post(url, data=json.dumps(data), headers=headers)

print(response.text)

{"message":"Bad request. Papers with existing experimental data cannot be updated for now."}
