# Nitrogen Oxide Simulated data

The data is generated using some assumed parameters per state.
The assumption considers the level of agriculture, industrial (including mining), and domestic activities per state. For demonstration purpose, we only used 20 states.

In [1]:
import pandas as pd
import numpy as np

The JSON structure; holds assumed average monthly nitrogen oxide emissions in parts/billion, a guessed standard deviation, and a random seed for each state (to make data reproducible by state). 

In [2]:
assumed_data = [
    {"rnd_seed": 244, "state_code": "AL", "assumed_std": 4.0, "assumed_monthly_avg": 280.0}, 
    {"rnd_seed": 246, "state_code": "AR", "assumed_std": 3.0, "assumed_monthly_avg": 180.0}, 
    {"rnd_seed": 245, "state_code": "AZ", "assumed_std": 6.0, "assumed_monthly_avg": 400.0},
    {"rnd_seed": 247, "state_code": "CA", "assumed_std": 6.0, "assumed_monthly_avg": 290.0},
    {"rnd_seed": 248, "state_code": "CO", "assumed_std": 8.0, "assumed_monthly_avg": 230.0},
    {"rnd_seed": 252, "state_code": "FL", "assumed_std": 5.0, "assumed_monthly_avg": 240.0},
    {"rnd_seed": 253, "state_code": "GA", "assumed_std": 6.0, "assumed_monthly_avg": 410.0}, 
    {"rnd_seed": 259, "state_code": "KS", "assumed_std": 5.0, "assumed_monthly_avg": 130.0}, 
    {"rnd_seed": 261, "state_code": "LA", "assumed_std": 4.0, "assumed_monthly_avg": 150.0}, 
    {"rnd_seed": 265, "state_code": "MI", "assumed_std": 5.0, "assumed_monthly_avg": 290.0 }, 
    {"rnd_seed": 276, "state_code": "ND", "assumed_std": 2.0, "assumed_monthly_avg": 70.0 }, 
    {"rnd_seed": 273, "state_code": "NM", "assumed_std": 4.0, "assumed_monthly_avg": 180.0}, 
    {"rnd_seed": 270, "state_code": "NV", "assumed_std": 8.0, "assumed_monthly_avg": 400.0}, 
    {"rnd_seed": 274, "state_code": "NY", "assumed_std": 7.0, "assumed_monthly_avg": 330.0}, 
    {"rnd_seed": 277, "state_code": "OH", "assumed_std": 6.0, "assumed_monthly_avg": 270.0}, 
    {"rnd_seed": 278, "state_code": "OK", "assumed_std": 5.0, "assumed_monthly_avg": 230.0}, 
    {"rnd_seed": 280, "state_code": "PA", "assumed_std": 5.0, "assumed_monthly_avg": 180.0}, 
    {"rnd_seed": 285, "state_code": "TX", "assumed_std": 6.0, "assumed_monthly_avg": 190.0}, 
    {"rnd_seed": 286,  "state_code": "UT", "assumed_std": 7.0, "assumed_monthly_avg": 220.0}, 
    {"rnd_seed": 292, "state_code": "WV", "assumed_std": 6.0, "assumed_monthly_avg": 200.0}
]


The function is to simulate the monthly average nitrogen for a stated year range by state.

In [3]:
def simulated_nitrogen_oxide(state_assumed, start_year=2019, end_year=2022, samples=12):
    '''
    :param start_year: The start year for data generation
    :param end_year: The end year for data generation
    :param state_assumed: JSON object from the assumed_data array of JSON objects, representing a US state. 
    :param samples: The default is 12 (12 months in a year). Samples for a US state are generated 12 times in a year.
    :return: A stacked NumPy array of years, monthly average (parts/billion), and state_codes.
    '''
    np.random.seed(state_assumed['rnd_seed'])
     
    yrs, m_avg, s_code = [], [], []
    for i in range(start_year, end_year):
        # randomly generate the state monthly average NOx
        monthly_average=np.random.normal(state_assumed['assumed_monthly_avg'], state_assumed['assumed_std'], samples)
        # add the state_codes and years
        state_code = np.full((samples), state_assumed['state_code'])
        years = np.full((samples), i)
        # append data to the individual arrays
        m_avg.append(monthly_average)
        yrs.append(years)
        s_code.append(state_code)
        
    m_avg = np.concatenate(m_avg)
    yrs = np.concatenate(yrs)
    s_code = np.concatenate(s_code)
    results = np.vstack((yrs, m_avg, s_code))
    
    return np.array(results)

Generate data for all the 20 US state we considered

In [4]:
all_data = []
for state_assumed in assumed_data:
    # we assumed monthly average data so the samples=12
    state_monthly = simulated_nitrogen_oxide(state_assumed).T
    all_data.append(state_monthly)
    
all_data = np.concatenate(all_data)

Save the NumPy array to a CSV file for future analysis and dashboard display.

In [5]:
np.savetxt("nitrogen_oxide.csv", all_data, fmt="%s", delimiter=",")