# ELISA - Explore the Data

## Python Environment

In [2]:
import os
import pandas as pd
import seaborn as sns
import mysql.connector
from sqlalchemy import create_engine
from sqlalchemy import (Column, DateTime, Integer, Numeric, Float, String, Text)

%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np

from pathlib import Path
from dotenv import load_dotenv

## User Environment

In [39]:
env_path = Path('.')/'.env'
load_dotenv(".env")
DB_USER = os.environ.get('DB_USER')
DB_PASSWORD = os.environ.get('DB_PASSWORD')
DB_SERVER = os.environ.get('DB_SERVER')
DB_DATABASE = os.environ.get('DB_DATABASE')
DATA_DIRECTORY = os.environ.get('DATA_DIRECTORY')
RUN_SQL = False 

## Database Connection

In [40]:
conn = f"mysql+mysqlconnector://{DB_USER}:{DB_PASSWORD}@{DB_SERVER}:3306/{DB_DATABASE}"
db_conn = create_engine(conn, echo=False)

# Retrieve ELISA Results

In [41]:
file_name = DATA_DIRECTORY + "/elisa_result.txt"

if RUN_SQL:
    sql = '''
SELECT l1.immunology_symbol,
       e1.analyte_reported,
       e1.unit_reported,
       e1.unit_preferred,
       e1.value_reported,
       e1.value_preferred,
       e1.study_accession,
       e1.arm_accession,
       e1.subject_accession,
       e1.biosample_accession,
       e1.experiment_accession,
       e1.expsample_accession,
       e1.study_time_collected,
       e1.study_time_collected_unit,
       b1.type as biosample_type,
       b1.planned_visit_accession,
       p1.name as planned_visit_name,
       p1.min_start_day,
       p1.max_start_day
  FROM elisa_result e1
    JOIN biosample b1
      ON e1.biosample_accession = b1.biosample_accession
    JOIN planned_visit p1
      ON b1.planned_visit_accession = p1.planned_visit_accession
    LEFT OUTER JOIN lk_analyte l1
      ON e1.analyte_preferred = l1.analyte_accession
LIMIT 10
'''
    elisa_result = pd.read_sql(sql, db_conn)
    elisa_result.immunology_symbol.fillna('')
    file_name = DATA_DIRECTORY + "/elisa_result.txt"
    elisa_result.to_csv(file_name, sep='\t')

else:
    elisa_result = pd.read_csv(file_name, sep="\t")
    
#elisa_result.immunology_symbol.fillna('')
    
print(elisa_result.dtypes)

Unnamed: 0                   int64
immunology_symbol          float64
analyte_reported            object
unit_reported               object
unit_preferred              object
                            ...   
biosample_type              object
planned_visit_accession     object
planned_visit_name          object
min_start_day              float64
max_start_day              float64
Length: 20, dtype: object


In [42]:
print(elisa_result.shape)
number_of_rows = len(elisa_result)
pd.set_option("display.max_rows",number_of_rows)
elisa_result.head(10)

(10, 20)


Unnamed: 0.1,Unnamed: 0,immunology_symbol,analyte_reported,unit_reported,unit_preferred,value_reported,value_preferred,study_accession,arm_accession,subject_accession,biosample_accession,experiment_accession,expsample_accession,study_time_collected,study_time_collected_unit,biosample_type,planned_visit_accession,planned_visit_name,min_start_day,max_start_day
0,0,,IgG-a Amb a,U/ml,,114.8,114.8,SDY1,ARM2,SUB73370,BS121576,EXP14863,ES96611,-7.0,Days,Whole blood,PV66,SDY1.Visit_-1.Screening.Week_-10.Protocol Day_...,-18.0,-1.0
1,1,,IgG-a Ragweed,ng/ml,ng/ml,100.0,100.0,SDY1,ARM2,SUB73370,BS121576,EXP14861,ES96612,-7.0,Days,Whole blood,PV66,SDY1.Visit_-1.Screening.Week_-10.Protocol Day_...,-18.0,-1.0
2,2,,Free IgE Concentration,ng/ml,,,,SDY1,ARM2,SUB73370,BS121577,EXP14861,ES96613,129.0,Days,Whole blood,PV81,SDY1.Visit_13.Omalizumab/Placebo+Immunotherapy...,123.0,129.0
3,3,,IgE-a Amb a,U/ml,,5.0,5.0,SDY1,ARM2,SUB73370,BS121577,EXP14863,ES96614,129.0,Days,Whole blood,PV81,SDY1.Visit_13.Omalizumab/Placebo+Immunotherapy...,123.0,129.0
4,4,,IgE-a Ragweed,kIUa/ml,,2.49,2.49,SDY1,ARM2,SUB73370,BS121577,EXP14864,ES96615,129.0,Days,Whole blood,PV81,SDY1.Visit_13.Omalizumab/Placebo+Immunotherapy...,123.0,129.0
5,5,,IgG-a Amb a,U/ml,,2112.0,2112.0,SDY1,ARM2,SUB73370,BS121577,EXP14863,ES96616,129.0,Days,Whole blood,PV81,SDY1.Visit_13.Omalizumab/Placebo+Immunotherapy...,123.0,129.0
6,6,,IgG-a Ragweed,ng/ml,ng/ml,2606.0,2606.0,SDY1,ARM2,SUB73370,BS121577,EXP14861,ES96617,129.0,Days,Whole blood,PV81,SDY1.Visit_13.Omalizumab/Placebo+Immunotherapy...,123.0,129.0
7,7,,Free IgE Concentration,ng/ml,,,,SDY1,ARM2,SUB73370,BS121578,EXP14861,ES96618,165.0,Days,Whole blood,PV85,SDY1.Visit_17.Follow-up (2003 ragweed season)....,151.0,159.0
8,8,,IgE-a Amb a,U/ml,,5.0,5.0,SDY1,ARM2,SUB73370,BS121578,EXP14863,ES96619,165.0,Days,Whole blood,PV85,SDY1.Visit_17.Follow-up (2003 ragweed season)....,151.0,159.0
9,9,,IgE-a Ragweed,kIUa/ml,,2.22,2.22,SDY1,ARM2,SUB73370,BS121578,EXP14864,ES96620,165.0,Days,Whole blood,PV85,SDY1.Visit_17.Follow-up (2003 ragweed season)....,151.0,159.0
