# Project Goals
* Access JSON file from Amazon Server - Create a dataframe
* Data Augmentation using Aleatoric uncertainty.

* Anomaly / Outlier Detection 
* Predict Peak Heat Release Rate
    * Regression Analysis with tabular data ( ensemble methods)
    
* Predict Fuel Type
    * Classification with tabular data (ensemble methods
    
* Shapley Value


In [38]:
import json
import urllib.request
import pandas as pd
import numpy as np

import matplotlib
import matplotlib.pyplot as plt

import seaborn as sns

In [39]:
json_url = "https://nist-el-nfrlhrr.s3.amazonaws.com/HRR/JSON_FILES/EXPERIMENTS/1528807400.json"

with urllib.request.urlopen(json_url) as url:
    json_data = json.loads(url.read().decode())

In [40]:
print(json.dumps(json_data, indent=4))

{
    "id": "1528807400",
    "publish": 1,
    "created": 1528807400,
    "updated": 1621023884,
    "name": "B6-1",
    "date": "6/12/2018 8:43:06 AM",
    "description": "Fire Barrier: B6, Cover Fabric: Polypropylene (3kg), Filling Material: Polyethalene (6kg) and Polyester (4 kg)",
    "specimen": "upholstered chair mockup",
    "ignition": "18 kW propane square burner",
    "project_ids": [
        {
            "name": "BarrierFabricMockup"
        },
        {
            "name": "null"
        }
    ],
    "test_director": "Zammarano",
    "timelapse_video_with_heat_release_rate_plot": "https://nist-el-nfrlhrr.s3.amazonaws.com/HRR/ASSET_FILES/Chair_Mockup/video/1528807400.mp4",
    "csv_data_file": "https://nist-el-nfrlhrr.s3.amazonaws.com/HRR/ASSET_FILES/Chair_Mockup/data/1528807400_B6-1.csv",
    "peak_heat_release_rate": {
        "value": 1080.02,
        "combined_expanded_uncertainty": 65.06385841722715,
        "unit": "kW",
        "label": "Peak Heat Release Rate, PHRR

In [41]:
json_url_filename = "S3_AWS_JSON_LIST.csv" # Small Dataset 197 experiments
json_url_filename = "HTTP_NFRL1_JSON_FileList.csv" # Large Dataset with over 1000 experiments
json_urls = pd.read_csv(json_url_filename)["url_name"].values

In [42]:
cols = [
        "id",
        "url_name",
        "Peak_Heat_Release_Rate", "Peak_Heat_Release_Rate - Uncertainty",
        "time_to_peak_heat_release_rate", "time_to_peak_heat_release_rate - Uncertainty",
        "total_heat_released", "total_heat_released - Uncertainty",
        "total_fuel_mass_burned", "total_fuel_mass_burned - Uncertainty",
        "net_effective_heat_of_combustion", "net_effective_heat_of_combustion - Uncertainty",
        "net_heat_of_combustion_per_mass_fuel", "net_heat_of_combustion_per_mass_fuel - Uncertainty",
        "net_heat_of_combustion_per_mass_o2", "net_heat_of_combustion_per_mass_o2 - Uncertainty",
        "o2_yield", "o2_yield - Uncertainty",
        "co2_yield", "co2_yield - Uncertainty",
        "co_yield", "co_yield - Uncertainty",
        "soot_yield", "soot_yield - Uncertainty",
        "test_duration",  "test_duration - Uncertainty", 
        "fuel_type", "fuel_type - Uncertainty",
        "net_fuel_mass", "net_fuel_mass - Uncertainty"
       ]
FCDSummaryValues = pd.DataFrame(columns=cols)

In [43]:
# set to True, if you want to read a short dataset for testing (save time).
# else set it to False

read_short_dataset = False

num_of_datasets =0
for json_url in json_urls:
    num_of_datasets += 1
    if (num_of_datasets%100==0):
        print(f'Reading json file : {num_of_datasets} : {json_url}')

    if ((num_of_datasets > 10) & read_short_dataset): 
        break
    with urllib.request.urlopen(json_url) as url:
        json_data = json.loads(url.read().decode())
        new_df = pd.DataFrame([[
                                json_data["id"],
                                json_url, 
                                json_data["peak_heat_release_rate"]["value"], json_data["peak_heat_release_rate"]["combined_expanded_uncertainty"],
                                json_data["time_to_peak_heat_release_rate"]["value"], json_data["time_to_peak_heat_release_rate"]["combined_expanded_uncertainty"],
                                json_data["total_heat_released"]["value"], json_data["total_heat_released"]["combined_expanded_uncertainty"],
                                json_data["total_fuel_mass_burned"]["value"], json_data["total_fuel_mass_burned"]["combined_expanded_uncertainty"],
                                json_data["net_effective_heat_of_combustion"]["value"], json_data["net_effective_heat_of_combustion"]["combined_expanded_uncertainty"],
                                json_data["net_heat_of_combustion_per_mass_fuel"]["value"], json_data["net_heat_of_combustion_per_mass_fuel"]["combined_expanded_uncertainty"],
                                json_data["net_heat_of_combustion_per_mass_o2"]["value"], json_data["net_heat_of_combustion_per_mass_o2"]["combined_expanded_uncertainty"],
                                json_data["o2_yield"]["value"], json_data["o2_yield"]["combined_expanded_uncertainty"],
                                json_data["co2_yield"]["value"], json_data["co2_yield"]["combined_expanded_uncertainty"],
                                json_data["co_yield"]["value"], json_data["co_yield"]["combined_expanded_uncertainty"],
                                json_data["soot_yield"]["value"], json_data["soot_yield"]["combined_expanded_uncertainty"],
                                json_data["test_duration"]["value"], json_data["test_duration"]["combined_expanded_uncertainty"],
                                json_data["fuel_type"]["value"], json_data["fuel_type"]["combined_expanded_uncertainty"],
                                json_data["net_fuel_mass"]["value"], json_data["net_fuel_mass"]["combined_expanded_uncertainty"]
                               ]],
                                columns=cols, index={str(num_of_datasets-1)})

        FCDSummaryValues = pd.concat([FCDSummaryValues, new_df])

Reading json file : 100 : http://nfrl1.el.nist.gov/HRR/JSON_FILES/EXPERIMENTS/1639582032.json
Reading json file : 200 : http://nfrl1.el.nist.gov/HRR/JSON_FILES/EXPERIMENTS/1631716945.json
Reading json file : 300 : http://nfrl1.el.nist.gov/HRR/JSON_FILES/EXPERIMENTS/1628188870.json
Reading json file : 400 : http://nfrl1.el.nist.gov/HRR/JSON_FILES/EXPERIMENTS/1433355203.json
Reading json file : 500 : http://nfrl1.el.nist.gov/HRR/JSON_FILES/EXPERIMENTS/1617632578.json
Reading json file : 600 : http://nfrl1.el.nist.gov/HRR/JSON_FILES/EXPERIMENTS/1616509830.json
Reading json file : 700 : http://nfrl1.el.nist.gov/HRR/JSON_FILES/EXPERIMENTS/1614872787.json
Reading json file : 800 : http://nfrl1.el.nist.gov/HRR/JSON_FILES/EXPERIMENTS/1580496324.json
Reading json file : 900 : http://nfrl1.el.nist.gov/HRR/JSON_FILES/EXPERIMENTS/1534358181.json
Reading json file : 1000 : http://nfrl1.el.nist.gov/HRR/JSON_FILES/EXPERIMENTS/1575397738.json


In [44]:
FCDSummaryValues.head()

Unnamed: 0,id,url_name,Peak_Heat_Release_Rate,Peak_Heat_Release_Rate - Uncertainty,time_to_peak_heat_release_rate,time_to_peak_heat_release_rate - Uncertainty,total_heat_released,total_heat_released - Uncertainty,total_fuel_mass_burned,total_fuel_mass_burned - Uncertainty,...,co_yield,co_yield - Uncertainty,soot_yield,soot_yield - Uncertainty,test_duration,test_duration - Uncertainty,fuel_type,fuel_type - Uncertainty,net_fuel_mass,net_fuel_mass - Uncertainty
0,1647008482,http://nfrl1.el.nist.gov/HRR/JSON_FILES/EXPERI...,96.46,6.237851,21.45,0.166667,173.80297,12.23537,10.284199,0.786984,...,0.112912,0.009549,0.00976,0.003991,55.816667,0.033333,"PVC, Natural Gas",[],,
1,1646936290,http://nfrl1.el.nist.gov/HRR/JSON_FILES/EXPERI...,127.17,7.774227,11.38,0.166667,126.85369,9.133598,7.506136,0.585486,...,0.087455,0.007513,0.018994,0.004511,39.666667,0.033333,"PVC, Natural Gas",[],,
2,1646410394,http://nfrl1.el.nist.gov/HRR/JSON_FILES/EXPERI...,44.72,3.272653,15.55,0.166667,36.46506,3.146284,2.157696,0.197103,...,0.026711,0.002623,0.011638,0.001902,26.15,0.033333,"PVC, Natural Gas",[],,
3,1646317604,http://nfrl1.el.nist.gov/HRR/JSON_FILES/EXPERI...,279.98,15.619808,9.73,0.166667,199.53617,12.05316,4.988404,0.336445,...,0.094687,0.007239,0.03949,0.008568,33.75,0.033333,"Plastics, Natural Gas",[],,
4,1646236342,http://nfrl1.el.nist.gov/HRR/JSON_FILES/EXPERI...,476.98,25.109751,18.9,0.166667,260.05008,15.068432,6.501252,0.424206,...,0.020452,0.001524,0.052682,0.008195,34.866667,0.033333,"Plastics, Natural Gas",[],,


In [45]:
FCDSummaryValues.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1021 entries, 0 to 1020
Data columns (total 30 columns):
 #   Column                                              Non-Null Count  Dtype  
---  ------                                              --------------  -----  
 0   id                                                  1021 non-null   object 
 1   url_name                                            1021 non-null   object 
 2   Peak_Heat_Release_Rate                              1021 non-null   float64
 3   Peak_Heat_Release_Rate - Uncertainty                1021 non-null   float64
 4   time_to_peak_heat_release_rate                      1021 non-null   float64
 5   time_to_peak_heat_release_rate - Uncertainty        1021 non-null   float64
 6   total_heat_released                                 1021 non-null   float64
 7   total_heat_released - Uncertainty                   1021 non-null   float64
 8   total_fuel_mass_burned                              1021 non-null   float64
 9   to

In [46]:
#sns.pairplot(FCDSummaryValues)

In [47]:
FCDSummaryValues.to_csv('FCDSummaryValues.csv')
df= pd.read_csv("FCDSummaryValues.csv")
#df.head()