In [None]:
"""
Data source:
https://mesonet.agron.iastate.edu/request/download.phtml?network=AU__ASOS
Hourly temperature data from Perth Airport for 2024.
"""
import polars as pl

first_week = pl.read_csv("YPPH.csv", try_parse_dates=True).with_columns(
    pl.col("valid").alias("Datetime"), pl.col("tmpc").alias("Temperature")
).select("Datetime", "Temperature")


first_week.head(10)

Datetime,Temperature
datetime[μs],f64
2024-01-01 00:00:00,23.0
2024-01-01 01:00:00,24.0
2024-01-01 02:00:00,25.0
2024-01-01 03:00:00,28.0
2024-01-01 04:00:00,30.0
2024-01-01 05:00:00,27.0
2024-01-01 06:00:00,28.0
2024-01-01 07:00:00,29.0
2024-01-01 08:00:00,27.0
2024-01-01 09:00:00,27.0


In [None]:
first_week.select("Temperature").describe()

statistic,Temperature
str,f64
"""count""",8758.0
"""null_count""",7.0
"""mean""",19.881251
"""std""",6.843347
"""min""",2.0
"""25%""",15.0
"""50%""",19.0
"""75%""",24.0
"""max""",43.0


In [None]:
import altair as alt
first_week.head(24*7).plot.line("Datetime", "Temperature").encode(
    x='Datetime:T',
    y=alt.Y('Temperature:Q', scale=alt.Scale(domain=[10, 40]))
).properties(
        title='Hourly Temperature at Perth Airport (YPPH) - First 7 Days of 2024',
        width=600,
        height=400
    )

In [None]:
n_obs_week = 24 * 7
first_week = first_week.head(n_obs_week)
first_week_mean = first_week.select(pl.col("Temperature").mean()).item()
first_week_std = first_week.select(pl.col("Temperature").std()).item()
print(f"First week mean: {first_week_mean:.2f} °C")
print(f"First week std: {first_week_std:.2f} °C")
# Standard error of the mean
first_week_sem = first_week_std / (n_obs_week) ** 0.5
print(f"First week SEM: {first_week_sem:.2f} °C")

First week mean: 24.92 °C
First week std. dev: 4.85 °C
First week SEM: 0.37 °C


In [None]:
# Upsample to ten minute intervals
df_10min = first_week.upsample("Datetime", every= "10m").with_columns(
    pl.col("Temperature").interpolate().alias("Interpolated Temperature")
)
df_10min.head(10)

Datetime,Temperature,Interpolated Temperature
datetime[μs],f64,f64
2024-01-01 00:00:00,23.0,23.0
2024-01-01 00:10:00,,23.166667
2024-01-01 00:20:00,,23.333333
2024-01-01 00:30:00,,23.5
2024-01-01 00:40:00,,23.666667
2024-01-01 00:50:00,,23.833333
2024-01-01 01:00:00,24.0,24.0
2024-01-01 01:10:00,,24.166667
2024-01-01 01:20:00,,24.333333
2024-01-01 01:30:00,,24.5


In [None]:
hourly = first_week.head(n_obs_week).plot.line("Datetime", "Temperature").encode(
    x='Datetime:T',
    y=alt.Y('Temperature:Q', scale=alt.Scale(domain=[10, 40])),
    color=alt.value('blue'),
)

_10min = df_10min.head(n_obs_week * 6).plot.line("Datetime", "Interpolated Temperature").encode(
    x='Datetime:T',
    y=alt.Y('Interpolated Temperature:Q', scale=alt.Scale(domain=[10 , 40])),
    color=alt.value('red') ,
    strokeDash=alt.value([5,5])
)

# Layer them together
combined_chart = (hourly + _10min).properties(
    title='Temperature at Perth Airport (YPPH) - First 7 Days of 2024: Hourly vs 10-min Interpolated',
    width=600,
    height=400
).resolve_scale(color='independent')

combined_chart

In [45]:
# Now get statistics for the 10-min data
first_week_10min = df_10min.head(n_obs_week * 6)
first_week_10min_mean = first_week_10min.select(pl.col("Interpolated Temperature").mean()).item()
first_week_10min_std = first_week_10min.select(pl.col("Interpolated Temperature").std()).item()
print(f"First week (10-min) mean: {first_week_10min_mean:.2f} °C")
print(f"First week (10-min) std: {first_week_10min_std:.2f} °C")
# Standard error of the mean
first_week_10min_sem = first_week_10min_std / (n_obs_week * 6) ** 0.5
print(f"First week (10-min) SEM: {first_week_10min_sem:.2f} °C")

First week (10-min) mean: 24.94 °C
First week (10-min) std: 4.78 °C
First week (10-min) SEM: 0.15 °C


In [76]:
first_week.remove(
    pl.col("Temperature").is_nan()
).with_columns( 
    pl.col("Temperature").shift(1).alias("Temperature (t-1)")
).select(
    pl.corr("Temperature", "Temperature (t-1)")
).to_series(
).alias("Hourly autocorrelation")



Hourly autocorrelation
f64
0.946466


In [None]:
first_week_10min.remove(
    pl.col("Interpolated Temperature").is_nan()
).with_columns(
     pl.col("Interpolated Temperature").shift(1).alias("Interpolated Temperature (t-1)")
).select(
    pl.corr("Interpolated Temperature", "Interpolated Temperature (t-1)")
).to_series(
).alias("10-min autocorrelation")


10-min autocorrelation
f64
0.998547


In [82]:
autocorr_hourly = 0.946466
autocorr_10min = 0.998305

n_hourly = n_obs_week * (1 - autocorr_hourly) / (1 + autocorr_hourly)
n_10min = n_obs_week * 6 * (1 - autocorr_10min) / (1 + autocorr_10min)

print(f"Effective sample size (hourly): {n_hourly:.2f}")
print(f"Effective sample size (10-min): {n_10min:.2f}")

sem_hourly = first_week_std / (n_hourly) ** 0.5
sem_10min = first_week_10min_std / (n_10min) ** 0.5
print(f"SEM (hourly): {sem_hourly:.2f} °C")
print(f"SEM (10-min): {sem_10min:.2f} °C")

Effective sample size (hourly): 4.62
Effective sample size (10-min): 0.86
SEM (hourly): 2.26 °C
SEM (10-min): 5.17 °C
