<a href="https://colab.research.google.com/github/drfperez/openair/blob/main/timeVariarion.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:

# Install required packages if not already installed
if (!require("openair")) install.packages("openair")
if (!require("tidyr")) install.packages("tidyr")
if (!require("dplyr")) install.packages("dplyr")
if (!require("reticulate")) install.packages("reticulate")

# Load libraries
library(openair)
library(tidyr)
library(dplyr)
library(reticulate)

# Assume data is in a CSV file with columns: date, pollutant, value
# Upload your 'data.csv' to the Colab session via the Files panel
data_long <- read.csv("data.csv")

# Convert value to numeric (handles factors/characters; introduces NA for non-numerics)
data_long$value <- as.numeric(as.character(data_long$value))

# Convert date to POSIXct (adjust format if needed)
data_long$date <- as.POSIXct(data_long$date)  # Auto-detects; specify format="%Y-%m-%d %H:%M:%S" if issues

# Optional: Inspect data for debugging
print(str(data_long))
print(head(data_long))
print(sum(is.na(data_long$value)))  # Count any NAs introduced

# Pivot to wide format for openair (handle duplicates with mean, ignoring NAs)
data_wide <- data_long %>%
  pivot_wider(names_from = pollutant, values_from = value, values_fn = list(value = function(x) mean(x, na.rm = TRUE)))

# Run timeVariation (suppress initial plotting)
pollutants <- setdiff(names(data_wide), "date")
tv <- timeVariation(data_wide, pollutant = pollutants, plot = FALSE)

# Save each plot as PNG
for (i in seq_along(tv$plot)) {
  png(filename = paste0("time_variation_plot_", i, ".png"), width = 800, height = 600)
  print(tv$plot[[i]])
  dev.off()
}

# Use reticulate to call Python for downloading files
use_python("/usr/bin/python3")
py_run_string("from google.colab import files")

# Download each PNG
for (i in seq_along(tv$plot)) {
  py_run_string(paste0("files.download('time_variation_plot_", i, ".png')"))
}

Loading required package: openair

Loading required package: tidyr

Loading required package: dplyr


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


Loading required package: reticulate

“NAs introduced by coercion”


'data.frame':	1703544 obs. of  3 variables:
 $ date     : POSIXct, format: "1991-11-20 00:00:00" "1991-11-20 00:00:00" ...
 $ pollutant: chr  "co" "h2s" "hcnm" "no" ...
 $ value    : num  NA 2 NA 6 10 NA 10 NA 2 NA ...
NULL
        date pollutant value
1 1991-11-20        co    NA
2 1991-11-20       h2s     2
3 1991-11-20      hcnm    NA
4 1991-11-20        no     6
5 1991-11-20       no2    10
6 1991-11-20        o3    NA
[1] 495699
[1] "day.hour" "hour"     "day"      "month"   


ERROR: 'NoneType' object has no attribute 'kernel'