---
Eli Schwat

elilouis@uw.edu

Created for Professor Michael Brett's CEWA547 Course, Winter 2021

---

# Analyze Salish Sea Model Point Source Inputs

This notebook walks you through an analysis of non-river point sources to to the Salish Sea model.

User input is required when you see this...

**<span style="color:red">USER INPUT REQUIRED</span>**

In [1]:
import pandas as pd
import altair as alt
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

# Input Variables:

**<span style="color:red">USER INPUT REQUIRED</span>**

Put in the path to the file `ssm_pnt_wq.dat` (or similar) that should come with your packaging of the SSM model.

In [2]:
input_file = "/Users/elischwat/Google Drive/UW/Classes Winter 2021/Watershed MGMT/salish sea model/SSM_WQM_model_inputs/inputs/ssm_pnt_wq.dat"
# input_file = "/Users/elischwat/Google Drive/UW/Classes Winter 2021/Watershed MGMT/salish sea model/SSM_WQM_model_inputs/inputs/ssm_pnt_wqMODIFIED.dat"

In [3]:
variable_name_dict = {
    0: "Flow (OFF, b/c from FVCOM)",
    1: "Temperature (OFF, b/c from FVCOM)",
    2: "Salinity (OFF, b/c from FVCOM)",
    3: "TSS",
    4: "Algal 1 (Algal group 1)",
    5: "Algal 2 (Algal group 2)",
    6: "Algal 3 (Algal group 3) (unused)",
    7: "Zooplankton 1 (Zooplankton – species 1)",
    8: "Zooplankton 2 (Zooplankton species 2)",
    9: "Labile DOC (Labile dissolved organic carbon)",
    10: "Refractory DOC (Refractory dissolved organic carbon)",
    11: "Labile POC (Labile particulate organic carbon)",
    12: "Refractory POC (Refractory particulate organic carbon)",
    13: "Ammonium (NH4)",
    14: "Nitrate + Nitrite (NO3+NO2)",
    15: "Urea",
    16: "Labile DON (Labile dissolved organic nitrogen)",
    17: "Refractory DON (Refractory dissolved organic nitrogen)",
    18: "Labile PON (Labile particular organic nitrogen)",
    19: "Refractory PON (Refractory particulate organic nitrogen)",
    20: "Total PO4 (Total phosphate)",
    21: "Labile DOP (Labile dissolved organic phosphate)",
    22: "Refractory DOP (Refractory dissolved organic phosphate)",
    23: "Labile POP (Labile particulate organic phosphate)",
    24: "Refractory POP (Refractory particulate organic phosphate)",
    25: "Particulate inorganic P (Particulate inorganic phosphate)",
    26: "COD (Chemical oxygen demand)",
    27: "DO (Dissolved Oxygen)",
    28: "Particulate Silica",
    29: "Dissolved Silica",
    30: "internal P group for Alga 1, Droop model (currently off)",
    31: "internal P group for Alga 2, Droop model (currently off)",
    32: "internal P group for Alga 3, Droop model (currently off)",
    33: "DIC",
    34: "Alkalinity"
}

In [4]:
def read_data(input_file, num_params):
    """
    Params:
    input_file (str): path to input file
    num_params (int): number of parameters contained in the file. Usually 35.
    
    Returns:
    (df, header_lines): df is a dataframe containing the data separated by parameter, point source, 
        and date. header_lines is a list of strings containing all the header data that must be 
        written to the new file.
    """
    with open(input_file) as f:
        lines = [line.rstrip() for line in f]
    num_point_sources = int(lines[1])
    print(f"Found {num_point_sources} point sources")
    header_lines = lines[:num_point_sources*2+3]
    data_lines =  lines[num_point_sources*2+3:]
    num_daily_data = int(header_lines[-1])
    print(f"Found {num_daily_data} days of data")
    df_list = []
    for n_day in range(0, num_daily_data):
        day_num = data_lines[n_day*(num_params+1)]
        lines = data_lines[n_day*(num_params+1) + 1: n_day*(num_params+1) + 1 + num_params]
        df_list.append(__extract_daily_data(lines, day_num))
    df = pd.concat(df_list)
    df = df.reset_index(drop=True)
    df['hour'] = df['hour'].astype('float')
    return df, header_lines
          
def write_data(output_file, df, header_lines):
    """
    Params:
    output_file (str): path to output file.
    df (pandas.DataFrame): a dataframe containing data. such as is returned by the read_data function
                defined above.
    header_lines: list of strings, such as is returned by the read_data function defined above.
    """
    writer = open(output_file, "w")
    writer.write("\n".join(header_lines))
    for day, day_df in df.groupby('hour'):
        #generate a days worth of data which is composed of:
        #1. a first single line with the julian day
        writer.write("\n")
        day_line_string = "     {:.2f}".format(day)
        writer.write(day_line_string)
        print(f"Writing data for day: {day_line_string}")
        #2. num_params lines of data, each line is a series of single-space-separated floats (formatted in sci notation),
        #    each line is num_point_sources floats long. make sure lines are written in the order of the parameter number.    
        for index, row in day_df.iloc[:,2:].iterrows():
            line_string = ' ' + ' '.join([ #add a space here because that's how the original file is
                '{:.3E}'.format(single_param_vals) for single_param_vals in row
            ])
            writer.write("\n")
            writer.write(line_string)
    writer.write("\n") #to put an empty line at the beginning, as the original files have
    writer.close()
          
def __extract_daily_data(lines, day_num):
    assert len(lines)==num_params, f"Expecting {num_params} lines of data"
    arr_list = []
    for i in range(0, len(lines)):
        line = lines[i]
        param_index = i
        arr_list.append(
            [float(x) for x in line.strip().split(' ')]
        )
    df = pd.DataFrame(arr_list)
    df.insert(0, 'hour', day_num)
    df.insert(0, 'param', df.index)
    return df

## Read Data 

In [5]:
num_params = 35

In [6]:
df, header_lines = read_data(input_file, num_params)
source_lines = header_lines[2:195]
source_names_series = pd.Series(source_lines).apply(lambda x: x.split(',')[1].split('---')[0].strip())
point_source_types = source_names_series.apply(lambda x: x.split(' - ')[1].split(' (')[0].strip())

Found 193 point sources
Found 365 days of data


In [7]:
len(source_names_series), len(point_source_types)

(193, 193)

## Create human-usable dataframe

In [8]:
hr_df = df.copy().rename(mapper=source_names_series.to_dict(), axis=1)

hr_df = pd.melt(hr_df, id_vars=['hour', 'param']).rename({'variable': 'source'}, axis=1)
params_of_interest = [0,1,2,13,14,15,16,17,18,19]
hr_df = hr_df[hr_df.param.isin(params_of_interest)]
hr_df.param = hr_df.param.apply(lambda x: variable_name_dict.get(x).split('(')[0].strip())

In [9]:
hr_df['source_name'] = hr_df.source.apply(lambda x: x.split('-')[0].strip())
hr_df['type'] = hr_df.source.apply(lambda x: x.split(' - ')[1].split('(')[0].strip())
hr_df['ECY ID'] = hr_df.source.apply(lambda x: x.split(' - ')[1].split('(')[1].split(':')[1].split(')')[0].strip())

In [10]:
hr_df.head(3)

Unnamed: 0,hour,param,source,value,source_name,type,ECY ID
0,0.0,Flow,Fraser - River (ECY ID: 258),640.0,Fraser,River,258
1,0.0,Temperature,Fraser - River (ECY ID: 258),3.73,Fraser,River,258
2,0.0,Salinity,Fraser - River (ECY ID: 258),0.0,Fraser,River,258


Pivot the table...

In [11]:
hr_df = hr_df.pivot_table(
    index=['hour','source','source_name','type','ECY ID'], 
    columns='param', 
    values='value'
).rename_axis(None, axis=1).reset_index()

In [12]:
hr_df.head(3)

Unnamed: 0,hour,source,source_name,type,ECY ID,Ammonium,Flow,Labile DON,Labile PON,Nitrate + Nitrite,Refractory DON,Refractory PON,Salinity,Temperature,Urea
0,0.0,Alderbrook - Point Source (ECY ID: 234),Alderbrook,Point Source,234,4.261,0.000832,0.4684,0.2431,6.148,0.0,0.0,0.0,15.06,0.0
1,0.0,Alderwood - Point Source (ECY ID: 235),Alderwood,Point Source,235,25.52,0.4004,0.3158,0.1224,6.852,0.0,0.0,0.0,15.06,0.0
2,0.0,Anacortes - Point Source (ECY ID: 236),Anacortes,Point Source,236,25.52,0.1147,0.3158,0.1224,6.852,0.0,0.0,0.0,15.06,0.0


In [13]:
hr_df = hr_df[hr_df.type=='Point Source']
hr_df['day'] = hr_df['hour']/24

## Plot Effluent Concentrations of top 20 point sources by Flow

In [14]:
top_sources = hr_df.groupby('source_name').Flow.mean().sort_values(ascending=False).head(20)
alt.Chart(
    hr_df[hr_df.source_name.isin(top_sources.index)]
).transform_fold(
    ['Ammonium', 'Nitrate + Nitrite', 'Urea', 'Labile DON', 'Refractory DON', 'Labile PON', 'Refractory PON'],
).mark_line().encode(
    x = 'day:Q',
    y = alt.Y('value:Q', title='N Concentrations (mg N/L)'),
    color='key:N'
).properties(
    width = 300,
    height = 150
).facet(
    'source_name',
    columns=3
).resolve_scale(
    y = 'independent'
)

## Convert Effluent Concentrations to Effluent Mass Fluxes

#### First convert units of the concentration values (mg/L -> mg/m^3)

In [15]:
hr_flux_df = hr_df.copy()

In [16]:
def convert_per_liter_to_per_cubic_meter(mg_per_liter_value):
    cubic_meter_in_1_liter = 0.001
    return mg_per_liter_value * (1 / cubic_meter_in_1_liter)

for col in [
    'Ammonium', 'Nitrate + Nitrite', 'Urea', 'Labile DON', 
    'Refractory DON', 'Labile PON', 'Refractory PON']:
    hr_flux_df[col] = hr_flux_df[col].apply(convert_per_liter_to_per_cubic_meter)

#### Then multiply the concentration values by flow (mg/m^3 * m^3/s = mg/s)

In [17]:
def convert_concentrations_to_mass_flux(df):
    flow_values = df.loc[df.param == 'Flow', 'value']
    assert len(flow_values) == 1, "More than 1 flow value per location and hour...that's a problem"
    flow_value = flow_values.iloc[0]
    df.loc[df.param != 'Flow', 'value'] = df.loc[df.param != 'Flow', 'value'] * flow_value
    return df

In [18]:
for col in [
    'Ammonium', 'Nitrate + Nitrite', 'Urea', 'Labile DON', 
    'Refractory DON', 'Labile PON', 'Refractory PON']:
    hr_flux_df[col] = hr_flux_df[col] * hr_flux_df['Flow']

In [19]:
hr_flux_df.head(3)

Unnamed: 0,hour,source,source_name,type,ECY ID,Ammonium,Flow,Labile DON,Labile PON,Nitrate + Nitrite,Refractory DON,Refractory PON,Salinity,Temperature,Urea,day
0,0.0,Alderbrook - Point Source (ECY ID: 234),Alderbrook,Point Source,234,3.546004,0.000832,0.389802,0.202308,5.116366,0.0,0.0,0.0,15.06,0.0,0.0
1,0.0,Alderwood - Point Source (ECY ID: 235),Alderwood,Point Source,235,10218.208,0.4004,126.44632,49.00896,2743.5408,0.0,0.0,0.0,15.06,0.0,0.0
2,0.0,Anacortes - Point Source (ECY ID: 236),Anacortes,Point Source,236,2927.144,0.1147,36.22226,14.03928,785.9244,0.0,0.0,0.0,15.06,0.0,0.0


## Plot Effluent Mass Fluxes of top 20 point sources by Flow

In [20]:
top_sources = hr_flux_df.groupby('source_name').Flow.mean().sort_values(ascending=False).head(20)
alt.Chart(
    hr_flux_df[hr_flux_df.source_name.isin(top_sources.index)]
).transform_fold(
    ['Ammonium', 'Nitrate + Nitrite', 'Urea', 'Labile DON', 'Refractory DON', 'Labile PON', 'Refractory PON'],
).mark_line().encode(
    x = 'day:Q',
    y = alt.Y('value:Q', title='N Concentrations (mg N/second)'),
    color='key:N'
).properties(
    width = 300,
    height = 150
).facet(
    'source_name',
    columns=3
).resolve_scale(
    y = 'independent'
)

## Calculate Total Nitrogen Output per Source & Find Top Polluters

In [21]:
hr_flux_df['Total Nitrogen'] = hr_flux_df[
    ['Ammonium', 'Nitrate + Nitrite', 'Urea', 'Labile DON', 'Refractory DON', 'Labile PON', 'Refractory PON']
].sum(axis='columns')

In [22]:
hr_flux_df.head(3)

Unnamed: 0,hour,source,source_name,type,ECY ID,Ammonium,Flow,Labile DON,Labile PON,Nitrate + Nitrite,Refractory DON,Refractory PON,Salinity,Temperature,Urea,day,Total Nitrogen
0,0.0,Alderbrook - Point Source (ECY ID: 234),Alderbrook,Point Source,234,3.546004,0.000832,0.389802,0.202308,5.116366,0.0,0.0,0.0,15.06,0.0,0.0,9.25448
1,0.0,Alderwood - Point Source (ECY ID: 235),Alderwood,Point Source,235,10218.208,0.4004,126.44632,49.00896,2743.5408,0.0,0.0,0.0,15.06,0.0,0.0,13137.20408
2,0.0,Anacortes - Point Source (ECY ID: 236),Anacortes,Point Source,236,2927.144,0.1147,36.22226,14.03928,785.9244,0.0,0.0,0.0,15.06,0.0,0.0,3763.32994


## Find top polluters by average total N mass flux (mg N/second)

In [37]:
top_polluters = hr_flux_df.groupby('source_name')['Total Nitrogen'].mean().sort_values(ascending=False).head(20)

In [24]:
top_polluters

source_name
Annacis                 162260.783233
Iona                    140610.901315
South King              116980.716537
West Point              111711.313268
Chambers Creek           32179.255196
Clover Point             27298.658820
Lions Gate               26140.490477
Lulu                     25126.127393
Brightwater              24524.558886
Tacoma Central           24239.568656
Macaulay                 23930.775800
Bellingham               17155.535960
Everett Snohomish        15841.723694
OF100                    15195.041196
Lakota                    9388.072164
Bremerton                 6009.885951
Edmonds                   5948.571243
Marysville Snohomish      5827.215547
Puyallup                  5737.058525
Lynnwood                  5655.577917
Name: Total Nitrogen, dtype: float64

## Find top polluters by total N mass output annually (kg/year)

In [25]:
(top_polluters * 365*24*60*60 * 1e-6).astype('int')

source_name
Annacis                 5117056
Iona                    4434305
South King              3689103
West Point              3522927
Chambers Creek          1014804
Clover Point             860890
Lions Gate               824366
Lulu                     792377
Brightwater              773406
Tacoma Central           764419
Macaulay                 754680
Bellingham               541016
Everett Snohomish        499584
OF100                    479190
Lakota                   296062
Bremerton                189527
Edmonds                  187594
Marysville Snohomish     183767
Puyallup                 180923
Lynnwood                 178354
Name: Total Nitrogen, dtype: int64

## Calculate percentage contribution of each non-river point source to total non-river point source N contributions

In [26]:
def convert_mg_per_second_to_kg_per_year(x):
    return x*365*24*60*60 * 1e-6
sources_and_annual_n_output = hr_flux_df.groupby('source_name')['Total Nitrogen'].mean().apply(convert_mg_per_second_to_kg_per_year).astype('int')

In [27]:
sources_and_percent_contribution_n = sources_and_annual_n_output / sources_and_annual_n_output.sum()

In [28]:
sources_and_percent_contribution_n.sum()

1.0

In [29]:
top_20_sources_and_percent_contribution_n = sources_and_percent_contribution_n.sort_values(ascending=False).head(20)
top_20_sources_and_percent_contribution_n

source_name
Annacis                 0.186893
Iona                    0.161956
South King              0.134739
West Point              0.128669
Chambers Creek          0.037064
Clover Point            0.031443
Lions Gate              0.030109
Lulu                    0.028940
Brightwater             0.028247
Tacoma Central          0.027919
Macaulay                0.027564
Bellingham              0.019760
Everett Snohomish       0.018247
OF100                   0.017502
Lakota                  0.010813
Bremerton               0.006922
Edmonds                 0.006852
Marysville Snohomish    0.006712
Puyallup                0.006608
Lynnwood                0.006514
Name: Total Nitrogen, dtype: float64

In [30]:
top_20_sources_and_percent_contribution_n =pd.DataFrame(top_20_sources_and_percent_contribution_n).reset_index().rename({'Total Nitrogen': 'Percent N Contributed'}, axis='columns')
top_20_sources_and_percent_contribution_n

Unnamed: 0,source_name,Percent N Contributed
0,Annacis,0.186893
1,Iona,0.161956
2,South King,0.134739
3,West Point,0.128669
4,Chambers Creek,0.037064
5,Clover Point,0.031443
6,Lions Gate,0.030109
7,Lulu,0.02894
8,Brightwater,0.028247
9,Tacoma Central,0.027919


In [31]:
top_20_sources_and_percent_contribution_n['Percent N Contributed'] = top_20_sources_and_percent_contribution_n['Percent N Contributed'] * 100

In [32]:
total_percent_us_contributes = pd.DataFrame(
    top_20_sources_and_percent_contribution_n[top_20_sources_and_percent_contribution_n.source_name.isin(us_p)]
).reset_index().rename({'Total Nitrogen': 'Percent N Contributed'}, axis='columns')['Percent N Contributed'].sum()

NameError: name 'top_7_us_polluters' is not defined

In [None]:
top_15_sources_and_percent_contribution_n = top_20_sources_and_percent_contribution_n.head(15)
top_15_sources_and_percent_contribution_n['Country'] = [
    "Canada",
    "Canada",
    "US",
    "US",
    "US",
    "Canada",
    "Canada",
    "Canada",
    "US",
    "US",
    "Canada",
    "US",
    "US",
    "US",
    "US",
]
top_15_sources_and_percent_contribution_n

In [None]:
total_percent_canada_contributes  = top_15_sources_and_percent_contribution_n["Percent N Contributed"].sum() - total_percent_us_contributes
total_percent_canada_contributes

In [None]:
top_15_sources_and_percent_contribution_n
alt.Chart(
    pd.DataFrame(
        top_15_sources_and_percent_contribution_n
    )
).mark_bar().encode(
    x = alt.X('source_name', title='Point Source', sort=alt.EncodingSortField("Percent N Contributed", order='descending'), axis=alt.Axis(labelAngle=30)),
    y = alt.Y('Percent N Contributed'),
    color='Country'
).properties(
    width=500,
    height = 250,
    title=f'Top 15 Point Sources and Percent N Contributed to Salish Sea (Canada: {round(total_percent_canada_contributes)}%, US: {round(total_percent_us_contributes,1)}%)'
)

In [None]:
top_7_us_polluters = [
    "South King",
    "West Point",
    "Chambers Creek",
    "Brightwater",
    "Tacoma Central",
    "Bellingham",
    "Everett Snohomish",
    "Lakota",
]
alt.Chart(
    pd.DataFrame(
        top_20_sources_and_percent_contribution_n[top_20_sources_and_percent_contribution_n.source_name.isin(top_7_us_polluters)]
    ).reset_index().rename({'Total Nitrogen': 'Percent N Contributed'}, axis='columns')
).mark_bar(color='darkorange').encode(
    x = alt.X('source_name', title='Point Source', sort=alt.EncodingSortField("Percent N Contributed", order='descending'), axis=alt.Axis(labelAngle=30)),
    y = alt.Y('Percent N Contributed')
).properties(
    width=500,
    height = 250,
    title=f'US Point Sources and Percent N Contributed to Salish Sea ({round(total_percent_us_contributes,1)}%)'
)

In [None]:
sources_and_percent_contribution_n.sort_values(ascending=False).head(20).sum(), \
sources_and_percent_contribution_n.sort_values(ascending=False).head(10).sum(), \
sources_and_percent_contribution_n.sort_values(ascending=False).head(7).sum()

## Plot Effluent Mass Fluxes of top 20 point sources by Total Nitrogen Output

In [None]:
alt.Chart(
    hr_flux_df[hr_flux_df.source_name.isin(top_polluters.index)]
).transform_fold(
    ['Ammonium', 'Nitrate + Nitrite', 'Urea', 'Labile DON', 'Refractory DON', 'Labile PON', 'Refractory PON'],
).mark_line().encode(
    x = 'day:Q',
    y = alt.Y('value:Q', title='N Concentrations (N mg/L)'),
    color='key:N'
).properties(
    width = 300,
    height = 150
).facet(
    'source_name',
    columns=3
).resolve_scale(
    y = 'independent'
)

## Calculate and Plot the Component Breakdowns for selected plants

In [None]:
avg_df = hr_df.groupby(['source', 'source_name', 'type', 'ECY ID']).mean()
avg_df = avg_df.reset_index()

In [None]:
alt.Chart(
    avg_df[avg_df.source_name.isin(top_polluters.index)]
).mark_bar().transform_fold(
    ['Ammonium', 'Labile DON', 'Labile PON',
       'Nitrate + Nitrite', 'Refractory DON', 'Refractory PON', 'Urea']
).encode(
    x = alt.X('key:N', axis=alt.Axis(labelAngle=30)),
    y = alt.Y('value:Q', title='Effluent Concentration (mg N/L)')
).properties(
    width=250,
    height = 125,
    title='Average Effluent Measurement, N Component Concentrations'
).facet(
    alt.Facet(
    'source_name',
    title=None),
        columns=4,
    
).resolve_scale(y='independent')

In [None]:
two_plants = avg_df[avg_df.source_name.isin(['West Point', 'South King', "Chambers Creek", "Brightwater"])]
two_plants

In [None]:
two_plants['Total Nitrogen'] = two_plants[
    ['Ammonium', 'Nitrate + Nitrite', 'Urea', 'Labile DON', 'Refractory DON', 'Labile PON', 'Refractory PON']
].sum(axis='columns')

In [None]:
two_plants

In [None]:
two_plants_percents = two_plants.copy()

for col in [
    'Ammonium', 'Nitrate + Nitrite', 'Urea', 'Labile DON', 
    'Refractory DON', 'Labile PON', 'Refractory PON']:
    two_plants_percents[col] = two_plants_percents[col] / two_plants_percents['Total Nitrogen']

In [None]:
conc_chart = alt.Chart(
    avg_df[avg_df.source_name.isin(['West Point', 'South King', "Chambers Creek", "Brightwater"])],
).mark_bar().transform_fold(
    ['Ammonium', 'Labile DON', 'Labile PON',
       'Nitrate + Nitrite', 'Refractory DON', 'Refractory PON', 'Urea']
).encode(
    alt.X('key:N', axis=alt.Axis(labelAngle=30), title=None),
    alt.Y('value:Q', title='Effluent Concentration (mg N/L)')
#     color = 'key:N'
).properties(
    width=300,
    height = 200
).facet(
    alt.Facet('source_name', title=None),
    title='Average Effluent Measurement, N Component Concentrations'
)
conc_chart

In [None]:
percent_chart = alt.Chart(
    two_plants_percents[two_plants_percents.source_name.isin(['West Point', 'South King', "Chambers Creek", "Brightwater"])],
).mark_bar().transform_fold(
    ['Ammonium', 'Labile DON', 'Labile PON',
       'Nitrate + Nitrite', 'Refractory DON', 'Refractory PON', 'Urea']
).encode(
    x = alt.X('key:N', axis=alt.Axis(labelAngle=30), title=None),
    y = alt.Y('value:Q', title='Percent of Total N'),
#     color = 'key:N'
).properties(
    width=300,
    height = 200
).facet(
    alt.Facet('source_name', title=None),
    title='Average Effluent Measurement, N Component Percentages of Total N'
)
percent_chart

In [None]:
two_plants_percents.columns

## Find High End Member Component Percentages

In [None]:
top_2_us_contributors = [
	'South King',
	'West Point',
]
average_breakdown_df = avg_df[avg_df.source_name.isin(top_2_us_contributors)]
average_breakdown_df = pd.DataFrame(average_breakdown_df[ 
        ['Ammonium', 'Labile DON', 'Labile PON',
           'Nitrate + Nitrite', 'Refractory DON', 'Refractory PON', 'Urea']].mean()).reset_index().rename({0:'value'}, axis='columns')

In [None]:
average_breakdown_df['percent'] = average_breakdown_df.value / average_breakdown_df.value.sum()
average_breakdown_df

In [None]:
average_breakdown_df.value.sum()

In [None]:
alt.Chart(
    average_breakdown_df
).mark_bar().encode(
    x = alt.X('index:N', axis=alt.Axis(labelAngle=30), title=None),
    y = alt.Y('value:Q', title='Effluent Concentration (mg N/L)'),
).properties(
    width=300,
    height = 200,
    title='High End N Component Breakdown'
) | alt.Chart(
    average_breakdown_df
).mark_bar().encode(
    x = alt.X('index:N', axis=alt.Axis(labelAngle=30), title=None),
    y = alt.Y('percent:Q', title='Percent of Total N'),
).properties(
    width=300,
    height = 200,
    title='High End N Component Breakdown'
)

# Examine Component Breakdown for the low end member data

This data was provided by Professor Brett.

In [None]:
low_end_df = pd.DataFrame({
    'Total Nitrogen': [2.45],
    'Labile DON': [0.31],
    'Ammonium': [0.31],
    'Nitrate + Nitrite': [1.73],
    'Labile PON': [0.1],
    'Urea': [0],
    'Refractory DON': [0],
    'Refractory PON': [0],
})
low_end_df

In [None]:
low_end_percent_df = pd.DataFrame({
    'Total Nitrogen': [1.0],
    'Labile DON': [.12],
    'Ammonium': [.13],
    'Nitrate + Nitrite': [.70],
    'Labile PON': [.04],
    'Urea': [0],
    'Refractory DON': [0],
    'Refractory PON': [0],
})
low_end_percent_df.transpose()

In [None]:
conc_plot = alt.Chart(
    low_end_df
).mark_bar().transform_fold(
    ['Ammonium', 'Labile DON', 'Labile PON',
       'Nitrate + Nitrite', 'Refractory DON', 'Refractory PON', 'Urea']
).encode(
    x = alt.X('key:N', axis=alt.Axis(labelAngle=30), title=None),
    y = alt.Y('value:Q', title='Effluent Concentration (mg N/L)'),
#     color = 'key:N'
).properties(
    width=300,
    height = 200,
    title='Low End N Component Breakdown'
)

In [None]:
percent_chart = alt.Chart(
    low_end_percent_df
).mark_bar().transform_fold(
    ['Ammonium', 'Labile DON', 'Labile PON',
       'Nitrate + Nitrite', 'Refractory DON', 'Refractory PON', 'Urea']
).encode(
    x = alt.X('key:N', axis=alt.Axis(labelAngle=30), title=None),
    y = alt.Y('value:Q', title='Percent of Total N'),
#     color = 'key:N'
).properties(
    width=300,
    height = 200,
    title='Low End N Component Breakdown'
)

In [None]:
percent_chart | conc_plot