In [1]:
import pandas as pd
import numpy as np

In [2]:
data = pd.read_csv("wyoming.csv")

In [3]:
analyte_data = data[data.columns[1:]]

melted_analytes = pd.melt(analyte_data, id_vars = ['TT or TH Sample ID', 'Date  Sampled'],
                          var_name = ["analyte"], value_name = "concentration")

melted_analytes["concentration"] = melted_analytes["concentration"].replace("ND", 0.0)

melted_analytes["concentration"] = [float(x.replace(" J", "")) if isinstance(x, str) else float(x) 
                                    for x in list(melted_analytes["concentration"])]

melted_analytes["analyte"] = (melted_analytes["analyte"].str.replace(", ng/L", "")
                              .str.replace("Total ", "").str.replace(",  ng/L", "")
                             )

melted_analytes["sampling_date"] = pd.to_datetime(melted_analytes['Date  Sampled'], format='%b-%y')
melted_analytes.drop(columns = ["Date  Sampled"], inplace = True)

num_rows = melted_analytes.shape[0]

melted_analytes["units"] = ["ng/L"]*num_rows
melted_analytes["is_detected"] = melted_analytes["concentration"].apply(lambda x: x > 0.0)
melted_analytes["source_type"] = ["groundwater"] * num_rows
melted_analytes["state"] = ["Wyoming"]*num_rows
melted_analytes["sampling_location"] = ["Jackson Hole Airport"]*num_rows
melted_analytes["latitude"] = [43.6088]*num_rows
melted_analytes["longitude"] = [110.7376]*num_rows
melted_analytes["data_source"] = ["Jackson Hole Airport"]*num_rows

melted_analytes.rename(columns = {'TT or TH Sample ID': "sample_id"}, inplace = True)
melted_analytes

Unnamed: 0,sample_id,analyte,concentration,sampling_date,units,is_detected,source_type,state,sampling_location,latitude,longitude,data_source
0,JW0121,PFOA,1.1,2020-06-01,ng/L,True,groundwater,Wyoming,Jackson Hole Airport,43.6088,110.7376,Jackson Hole Airport
1,JW0122,PFOA,1.5,2020-06-01,ng/L,True,groundwater,Wyoming,Jackson Hole Airport,43.6088,110.7376,Jackson Hole Airport
2,JW0127,PFOA,0.0,2020-06-01,ng/L,False,groundwater,Wyoming,Jackson Hole Airport,43.6088,110.7376,Jackson Hole Airport
3,JW0128,PFOA,0.0,2020-06-01,ng/L,False,groundwater,Wyoming,Jackson Hole Airport,43.6088,110.7376,Jackson Hole Airport
4,JW0129,PFOA,0.0,2020-06-01,ng/L,False,groundwater,Wyoming,Jackson Hole Airport,43.6088,110.7376,Jackson Hole Airport
...,...,...,...,...,...,...,...,...,...,...,...,...
550,WW-7,PFOA + PFOS,0.0,2020-08-01,ng/L,False,groundwater,Wyoming,Jackson Hole Airport,43.6088,110.7376,Jackson Hole Airport
551,WW-8,PFOA + PFOS,47.0,2020-08-01,ng/L,True,groundwater,Wyoming,Jackson Hole Airport,43.6088,110.7376,Jackson Hole Airport
552,WW-9,PFOA + PFOS,32.0,2020-08-01,ng/L,True,groundwater,Wyoming,Jackson Hole Airport,43.6088,110.7376,Jackson Hole Airport
553,WW-9,PFOA + PFOS,25.0,2021-08-01,ng/L,True,groundwater,Wyoming,Jackson Hole Airport,43.6088,110.7376,Jackson Hole Airport


In [4]:
melted_analytes.to_csv("wyoming_cleaned.csv")