Import libraries

In [None]:
import math
import matplotlib
import numpy as np
import json
import pandas as pd
import netCDF4
from netCDF4 import Dataset
import csv
import datetime, time
from datetime import date, timedelta, datetime
import os.path

Formatting TOAR ship data

In [None]:
# Data only starts from row 91 (headers on row 90)
skip_rows = 89

# Define the path to the CSV file
file_path = '/home/ajp255/nethome/Data/PartIII_23/toar2_oceans_ship_buoy_data_v0_94r.csv'
flight_data = pd.read_csv(file_path,low_memory=False, skiprows=skip_rows)

# Rename columns and select desired variables
column_name_mapping = {
    'time_UTC[yyyy/m/d h:mm:ss]': 'time',
    'lat[deg]': 'latitude',
    'lon2[0-360degE]': 'longitude',
    'O3_final[ppb]': 'ozone',
    'CO_final[ppb]': 'CO'
}

flight_data = flight_data.rename(columns=column_name_mapping)

selected_columns = ['time', 'latitude', 'longitude', 'ozone','CO']
flight_data = flight_data[selected_columns]
flight_data['altitude'] = 300

# Define a function to convert the time column into Unix epoch format
def convert_to_unix_timestamp(date_string):
    # Define the date format
    date_format = "%Y/%m/%d %H:%M"
    
    # Convert the date string to a datetime object
    datetime_obj = datetime.strptime(date_string, date_format)
    
    # Get the Unix epoch timestamp (number of seconds since January 1, 1970)
    unix_timestamp_sec = datetime_obj.timestamp()

    # Convert to days
    sec_per_day = 24*60*60
    unix_timestamp_days = unix_timestamp_sec/sec_per_day
    
    return unix_timestamp_days

flight_data['time'] = flight_data['time'].apply(convert_to_unix_timestamp)
flight_data = flight_data.sort_values(by='time', ascending=True)

# Define a function to loop over each day in the file range (assuming chronological order)
def daterange(start_date, end_date):
    for n in range(int((end_date - start_date).days)):
        yield start_date + timedelta(n) 
        
# Define start date from the beginning of the day
sec_per_day = 24*60*60 
start_date = datetime.fromtimestamp(flight_data['time'].values[0]*sec_per_day)
starting_point = start_date.hour*60*60+start_date.minute*60+start_date.second 
start_date = start_date - timedelta(0,starting_point)

# Define end date
sec_per_day = 24*60*60
end_date=datetime.fromtimestamp(flight_data['time'].values[-1]*sec_per_day)
end_point=end_date.hour*60*60+end_date.minute*60+ end_date.second
end_date=end_date-timedelta(0,starting_point)+timedelta(1)

# Iterate through each day and create a daily netCDF file with desired variables
sec_per_day = 24*60*60
for single_date in daterange(start_date, end_date):
    dt = single_date
    s = time.mktime(single_date.timetuple())
    dtt = single_date+timedelta(1)
    s2 = time.mktime(dtt.timetuple())
    daily_data=flight_data.loc[(flight_data['time']*sec_per_day < s2) & (flight_data['time']*sec_per_day >= s)]
    
    #Save the netCDF files to the remote server
    if not daily_data.empty:
        obs=np.shape(daily_data)[0]
        date_string=dt.strftime('%Y%m%d')
        
        ncout = Dataset('/home/ajp255/nethome/Data/Output/TOAR_ship/TOAR_ship_noalt_data_'+ date_string  +'.nc','w','NETCDF4');
        ncout.createDimension('obs',obs);
        
        timevar=ncout.createVariable('time','float64',('obs')); timevar[:]=daily_data['time']
        timevar.setncattr('units','days since 1970-01-01')
        
        latitude=ncout.createVariable('latitude','float32',('obs')); latitude[:]=daily_data['latitude']
        latitude.setncattr('units','degrees north')
        longitude=ncout.createVariable('longitude','float32',('obs')); longitude[:]=daily_data['longitude']
        
        altitude=ncout.createVariable('altitude','float32',('obs')); altitude[:]=daily_data['altitude']
        altitude.setncattr('units','m asl')
        
        ozone=ncout.createVariable('mole_fraction_of_ozone_in_air','float32',('obs')); ozone[:]=daily_data['ozone']
        ozone.setncattr('units', 'ppbv')
        
        # Create a variable to store a tag
        tag_variable = ncout.createVariable('tag', 'str', ('obs'))
        tags = ['TOAR_ship'  for tag in range(obs)]
        tag_variable[:] = np.array(tags)
        
        ncout.close();

Formatting TOAR flight data

In [None]:
# Data only starts from row 62 (headers on row 61)
skip_rows = 60 

# Define the path to the CSV file
file_path = '/home/ajp255/nethome/Data/PartIII_23/toar2_oceans_airborne_data_5000m_v0_91r.csv'
flight_data = pd.read_csv(file_path,low_memory=False, skiprows=skip_rows)

# Rename columns and select desired variables
column_name_mapping = {
    'time_UTC[yyyy/m/d h:mm:ss]': 'time',
    'lat[deg]': 'latitude',
    'lon2[0-360degE]': 'longitude',
    'alt[m]': 'altitude', 
    'Ozone[ppbv]': 'ozone'
}

flight_data = flight_data.rename(columns=column_name_mapping)

selected_columns = ['time', 'latitude', 'longitude', 'altitude', 'ozone']
flight_data = flight_data[selected_columns]

# Define a function to convert the time column into Unix epoch format
def convert_to_unix_timestamp(date_string):
    # Define the date format
    date_format = "%Y/%m/%d %H:%M"
    
    # Convert the date string to a datetime object
    datetime_obj = datetime.strptime(date_string, date_format)
    
    # Get the Unix epoch timestamp (number of seconds since January 1, 1970)
    unix_timestamp_sec = datetime_obj.timestamp()

    # Convert to days
    sec_per_day = 24*60*60
    unix_timestamp_days = unix_timestamp_sec/sec_per_day
    
    return unix_timestamp_days

flight_data['time'] = flight_data['time'].apply(convert_to_unix_timestamp)
flight_data = flight_data.sort_values(by='time', ascending=True)

# Define a function to loop over each day in the file range (assuming chronological order)
def daterange(start_date, end_date):
    for n in range(int((end_date - start_date).days)):
        yield start_date + timedelta(n) 
        
# Define start date from the beginning of the day
sec_per_day = 24*60*60 
start_date = datetime.fromtimestamp(flight_data['time'].values[0]*sec_per_day)
starting_point = start_date.hour*60*60+start_date.minute*60+start_date.second 
start_date = start_date - timedelta(0,starting_point)

# Define end date
sec_per_day = 24*60*60
end_date=datetime.fromtimestamp(flight_data['time'].values[-1]*sec_per_day)
end_point=end_date.hour*60*60+end_date.minute*60+ end_date.second
end_date=end_date-timedelta(0,starting_point)+timedelta(1)

# Iterate through each day and create a daily netCDF file with desired variables
sec_per_day = 24*60*60
for single_date in daterange(start_date, end_date):
    dt = single_date
    s = time.mktime(single_date.timetuple())
    dtt = single_date+timedelta(1)
    s2 = time.mktime(dtt.timetuple())
    daily_data=flight_data.loc[(flight_data['time']*sec_per_day < s2) & (flight_data['time']*sec_per_day >= s)]
    
    #The netCDF file is saved on the remote server
    if not daily_data.empty:
        obs=np.shape(daily_data)[0]
        date_string=dt.strftime('%Y%m%d')
        
        ncout = Dataset('/home/ajp255/nethome/Data/Output/TOAR_airborne/TOAR_airborne_data_'+ date_string  +'.nc','w','NETCDF4');
        ncout.createDimension('obs',obs);
        
        timevar=ncout.createVariable('time','float64',('obs')); timevar[:]=daily_data['time']
        timevar.setncattr('units','days since 1970-01-01')
        
        latitude=ncout.createVariable('latitude','float32',('obs')); latitude[:]=daily_data['latitude']
        latitude.setncattr('units','degrees north')
        longitude=ncout.createVariable('longitude','float32',('obs')); longitude[:]=daily_data['longitude']
        
        altitude=ncout.createVariable('altitude','float32',('obs')); altitude[:]=daily_data['altitude']
        altitude.setncattr('units','m asl')
        
        ozone=ncout.createVariable('mole_fraction_of_ozone_in_air','float32',('obs')); ozone[:]=daily_data['ozone']
        ozone.setncattr('units', 'ppbv')
        
        # Create a variable to store a tag
        tag_variable = ncout.createVariable('tag', 'str', ('obs'))
        tags = ['TOAR_airborne'  for tag in range(obs)]
        tag_variable[:] = np.array(tags)
        
        ncout.close();