In [1]:
# Import Dependencies
##################################################
import numpy as np
import pandas as pd
from flask import Flask, render_template, jsonify, request, redirect


In [2]:
# Read in CSV Files
##################################################
samples_db = pd.read_csv("data/belly_button_biodiversity_samples.csv")
metadata_db = pd.read_csv("data/Belly_Button_Biodiversity_Metadata.csv")
otu_db = pd.read_csv("data/belly_button_biodiversity_otu_id.csv")


In [3]:
#Take the column headers from the samples_db table
sample_names = list(samples_db.columns.values)[1:]
sample_names[0:10]


['BB_940',
 'BB_941',
 'BB_943',
 'BB_944',
 'BB_945',
 'BB_946',
 'BB_947',
 'BB_948',
 'BB_949',
 'BB_950']

In [4]:
# def otu_descriptions():
otu_desc = list(otu_db['lowest_taxonomic_unit_found'])
otu_desc[0:5]


['Archaea;Euryarchaeota;Halobacteria;Halobacteriales;Halobacteriaceae;Halococcus',
 'Archaea;Euryarchaeota;Halobacteria;Halobacteriales;Halobacteriaceae;Halococcus',
 'Archaea;Euryarchaeota;Halobacteria;Halobacteriales;Halobacteriaceae;Natronorubrum',
 'Archaea;Euryarchaeota;Methanobacteria;Methanobacteriales;Methanobacteriaceae;Methanobrevibacter',
 'Archaea;Euryarchaeota;Methanobacteria;Methanobacteriales;Methanobacteriaceae;Methanobrevibacter']

In [8]:
sample = 'BB_940'
meta_id = int(sample[3:len(sample)])

# Need to find index value for SAMPLEID...
i = metadata_db.loc[metadata_db["SAMPLEID"] == meta_id].index[0]

meta_dict = {"Sample ID": sample,
             "BB Type": metadata_db.loc[metadata_db["SAMPLEID"] == meta_id,"BBTYPE"][i],
             "Age": metadata_db.loc[metadata_db["SAMPLEID"] == meta_id,"AGE"][i],
             "Gender": metadata_db.loc[metadata_db["SAMPLEID"] == meta_id,"GENDER"][i],
             "Ethnicity": metadata_db.loc[metadata_db["SAMPLEID"] == meta_id,"ETHNICITY"][i],
             "Wash Frequency": metadata_db.loc[metadata_db["SAMPLEID"] == meta_id,"WFREQ"][i],
             "Location": metadata_db.loc[metadata_db["SAMPLEID"] == meta_id,"LOCATION"][i]
             }
meta_dict


{'Age': 24.0,
 'BB Type': 'I',
 'Ethnicity': 'Caucasian',
 'Gender': 'F',
 'Location': 'Beaufort/NC',
 'Sample ID': 'BB_940',
 'Wash Frequency': 2.0}

In [9]:
#def washfreq():
meta_id = int(sample[3:len(sample)])
i = metadata_db.loc[metadata_db["SAMPLEID"] == meta_id].index[0]
wash_freq = int(metadata_db.loc[metadata_db['SAMPLEID'] == meta_id, 'WFREQ'][i])
wash_freq


2

In [10]:
samples_db.head()

Unnamed: 0,otu_id,BB_940,BB_941,BB_943,BB_944,BB_945,BB_946,BB_947,BB_948,BB_949,...,BB_1562,BB_1563,BB_1564,BB_1572,BB_1573,BB_1574,BB_1576,BB_1577,BB_1581,BB_1601
0,1,0.0,0,0,0,0,0,0,0.0,0,...,0,0,0,0,0,0,0,0,0,0
1,2,0.0,0,0,0,0,0,0,0.0,0,...,0,0,0,0,0,0,0,0,0,0
2,3,0.0,0,0,0,0,0,0,0.0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0.0,0,0,0,0,0,0,0.0,0,...,0,0,0,0,0,0,0,0,0,0
4,5,0.0,0,0,0,0,0,0,0.0,0,...,0,0,0,0,0,0,0,0,0,0


In [11]:
sample = "BB_940"

In [12]:
otu_id = samples_db["otu_id"]
sample_value = samples_db[sample]
otu_desc = list(otu_db['lowest_taxonomic_unit_found'])

In [13]:
print("Length of otu_desc variable: " + str(len(otu_desc)))
print("Length of otu_id variable:   " + str(len(otu_id)))
print("Length of sample_values var: " + str(len(sample_value)))

Length of otu_desc variable: 3674
Length of otu_id variable:   3674
Length of sample_values var: 3674


In [14]:
# Sort your Pandas DataFrame (OTU ID and Sample Value)
# Return a list of dictionaries containing sorted lists  for `otu_ids` and `sample_values`
# For bubble chart: X = otu_id | Y = Sample (a.k.a. Column) Value | Size = Y???

otu_id = list(samples_db["otu_id"])
sample_value = list(samples_db[sample])
otu_desc = list(otu_db['lowest_taxonomic_unit_found'])

n = len(otu_desc)

sample_list = []

for i in range(0, n-1):

    if (sample_value[i] != 0 and pd.isnull(sample_value[i]) != True):
    
        samples_dict = {"otu_ids": otu_id[i],
                         "sample_values": int(sample_value[i]),
                         "otu_desc": otu_desc[i]
                        }

        sample_list.append(samples_dict)

sample_list

[{'otu_desc': 'Bacteria', 'otu_ids': 11, 'sample_values': 1},
 {'otu_desc': 'Bacteria', 'otu_ids': 19, 'sample_values': 1},
 {'otu_desc': 'Bacteria', 'otu_ids': 22, 'sample_values': 1},
 {'otu_desc': 'Bacteria', 'otu_ids': 24, 'sample_values': 1},
 {'otu_desc': 'Bacteria', 'otu_ids': 39, 'sample_values': 1},
 {'otu_desc': 'Bacteria', 'otu_ids': 41, 'sample_values': 71},
 {'otu_desc': 'Bacteria', 'otu_ids': 42, 'sample_values': 1},
 {'otu_desc': 'Bacteria', 'otu_ids': 79, 'sample_values': 1},
 {'otu_desc': 'Bacteria', 'otu_ids': 81, 'sample_values': 1},
 {'otu_desc': 'Bacteria', 'otu_ids': 84, 'sample_values': 1},
 {'otu_desc': 'Bacteria', 'otu_ids': 93, 'sample_values': 1},
 {'otu_desc': 'Bacteria', 'otu_ids': 102, 'sample_values': 1},
 {'otu_desc': 'Bacteria', 'otu_ids': 121, 'sample_values': 2},
 {'otu_desc': 'Bacteria', 'otu_ids': 122, 'sample_values': 1},
 {'otu_desc': 'Bacteria', 'otu_ids': 133, 'sample_values': 1},
 {'otu_desc': 'Bacteria', 'otu_ids': 154, 'sample_values': 1},
 {

In [15]:
otu_id = list(samples_db["otu_id"])
sample_value = list(samples_db[sample])
otu_desc = list(otu_db['lowest_taxonomic_unit_found'])

n = len(otu_desc)

otu_list = []
value_list = []
desc_list = []

for i in range(0, n-1):

    if (sample_value[i] != 0 and pd.isnull(sample_value[i]) != True):
    
        otu_list.append(int(otu_id[i]))
        value_list.append(int(sample_value[i]))
        desc_list.append(otu_desc[i])
        
samples_dict = {"otu_ids": otu_list,
                "sample_values": value_list,
                "otu_desc": desc_list
                }
samples_dict

{'otu_desc': ['Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria;Actinobacteria;Actinobacteria;Actinomycetales',
  'Bacteria;Actinobacteria;Actinobacteria;Actinomycetales',
  'Bacteria;Actinobacteria;Actinobacteria;Actinomycetales',
  'Bacteria;Actinobacteria;Actinobacteria;Actinomycetales',
  'Bacteria;Actinobacteria;Actinobacteria;Actino