In [1]:
from matplotlib import cm
import torch
import gpytorch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split


def data_import(path, delimiter):
    # Read the data, treating empty strings as NaN
    if delimiter == 'none':
        data = pd.read_csv(path, na_values=["", " ", "nan", "NaN", "NA"])
    else:
        data = pd.read_csv(path, delimiter=delimiter, na_values=["", " ", "nan", "NaN", "NA"])
    
    # Drop rows with any NaN values
    data.dropna(axis=0, how='any', inplace=True)
    return data



def data_frequency(file1, desired_frequency: str):
    # Ensure 'Time(UTC)' is in datetime format
    file1['Time(UTC)'] = pd.to_datetime(file1['Time(UTC)'])

    # Set 'Time(UTC)' as the index
    file1.set_index('Time(UTC)', inplace=True)

    if desired_frequency == 'ten_minute':
        file1 = file1.resample('10T').mean()
    elif desired_frequency == '4_hourly':
        file1 = file1.resample('4H').mean()
    elif desired_frequency == 'hourly':
        file1 = file1.resample('H').mean()
    elif desired_frequency == 'twelve_hourly':
        file1 = file1.resample('12H').mean()
    elif desired_frequency == 'daily':
        file1 = file1.resample('D').mean()
    elif desired_frequency == '8_hourly':
        file1 = file1.resample('8H').mean()

    # Drop any rows with NaN values after resampling
    # Drop rows with any NaN values
    file1.dropna(axis=0, how='any', inplace=True)    
    # Reset the index and name it 'Time(UTC)'
    file1.reset_index(inplace=True)
    file1.rename(columns={file1.index.name: 'Time(UTC)'}, inplace=True)

    return file1

# Update the function to add a sequence instead of sin and cos values
def add_sequence(df):
    n = len(df)
    df['Sequence'] = np.arange(1, n + 1)
    return df

# Rest of the code remains the same, except for adding the sequence column
s2_site_data = data_import("C:\\Users\\396760\\lanl\\data\\ARMSAILS2_cleaned.csv", 'none')
m1_site_data = data_import("C:\\Users\\396760\\lanl\\data\\ARMSAILM1_cleaned.csv", 'none')

s2_site_data = data_frequency(s2_site_data, 'twelve_hourly')
m1_site_data = data_frequency(m1_site_data, 'twelve_hourly') 

time_s2 = s2_site_data['Time(UTC)']
time_m1 = m1_site_data['Time(UTC)']

s2_site_data.drop(columns=['sample_pres_mmHg'], inplace=True)
m1_site_data.drop(columns=['sample_pres_mmHg'], inplace=True)

# Add sequence numbers to the data
s2_site_data = add_sequence(s2_site_data)
m1_site_data = add_sequence(m1_site_data)

collumns = ['Sequence', 'sample_rh_pct', 'sample_temp_C', 'pm_1_ug_per_m3']
s2_site_data = torch.tensor(s2_site_data[collumns].values, dtype=torch.float32)
m1_site_data = torch.tensor(m1_site_data[collumns].values, dtype=torch.float32)

In [2]:
import pandas as pd

def count_points_in_season(time_series, season, year):
    # Convert the time series to datetime format
    time_series = pd.to_datetime(time_series)
    
    # Define the months corresponding to each season
    season_months = {
        "winter": [12, 1, 2],
        "spring": [3, 4, 5],
        "summer": [6, 7, 8],
        "fall": [9, 10, 11]
    }
    
    # Filter the series for the given season and year
    if season == "winter":
        mask = ((time_series.dt.month == 12) & (time_series.dt.year == year - 1)) | \
               ((time_series.dt.month.isin([1, 2])) & (time_series.dt.year == year))
    else:
        mask = (time_series.dt.month.isin(season_months[season])) & (time_series.dt.year == year)
        
    filtered_series = time_series[mask]
    
    # Count the number of data points in the filtered series
    count = filtered_series.shape[0]
    
    return count

# Example usage
# Count the number of data points for each season in s2 data


points_in_summer_2022 = count_points_in_season(time_s2, "summer", 2022)
print(f'Number of data points in Summer 2022: {points_in_summer_2022}')

points_in_fall_2022 = count_points_in_season(time_s2, "fall", 2022)
print(f'Number of data points in Fall 2022: {points_in_fall_2022}')

points_in_winter_2023 = count_points_in_season(time_s2, "winter", 2023)
print(f'Number of data points in Winter 2023: {points_in_winter_2023}')

points_in_spring_2023 = count_points_in_season(time_s2, "spring", 2023)
print(f'Number of data points in Spring 2023: {points_in_spring_2023}')


Number of data points in Summer 2022: 146
Number of data points in Fall 2022: 152
Number of data points in Winter 2023: 159
Number of data points in Spring 2023: 184
