In [None]:
# Import required libraries
import pandas as pd
import glob
import os

# For plotting
import matplotlib.pyplot as plt


Matplotlib is building the font cache; this may take a moment.


In [None]:
# Load all CSV files
folder_path = "temperatures"  # Folder where all station_group_YYYY.csv are kept
all_files = glob.glob(os.path.join(folder_path, "*.csv"))

all_files[:5]  # preview first few file names


['temperatures/stations_group_2004.csv',
 'temperatures/stations_group_2005.csv',
 'temperatures/stations_group_2002.csv',
 'temperatures/stations_group_2003.csv',
 'temperatures/stations_group_2001.csv']

In [None]:
# Read and merge all CSV files
df_list = []
for file in all_files:
    df = pd.read_csv(file)
    year = os.path.basename(file).split("_")[-1].split(".")[0]
    df["Year"] = int(year)
    df_list.append(df)

data = pd.concat(df_list, ignore_index=True)
data.head()


Unnamed: 0,STATION_NAME,STN_ID,LAT,LON,January,February,March,April,May,June,July,August,September,October,November,December,Year
0,ADELAIDE-KENT-TOWN,23090,-34.92,138.62,30.04,29.68,26.92,24.07,19.83,16.63,16.2,17.31,20.01,22.73,25.54,27.97,2004
1,ALBANY-AIRPORT-COMPARISON,9741,-34.94,117.8,25.55,26.91,26.09,24.41,20.68,18.74,17.22,17.82,19.45,20.41,22.36,24.48,2004
2,ALICE-SPRINGS-AIRPORT,15590,-23.8,133.89,39.04,38.58,35.31,30.35,25.28,21.89,21.45,25.95,32.31,34.02,36.62,38.15,2004
3,AMBERLEY-AMO,40004,-27.63,152.71,33.02,32.16,30.93,28.58,25.72,23.15,23.36,24.21,28.36,30.11,31.05,32.01,2004
4,BARCALDINE-POST-OFFICE,36007,-23.55,145.29,38.0,37.34,36.28,32.77,29.16,26.36,26.03,27.18,30.87,34.11,35.83,37.92,2004


In [None]:
# Basic checks
print("Years in dataset:", data["Year"].unique())
print("Shape of data:", data.shape)
data.describe(include="all").T.head(15)


Years in dataset: [2004 2005 2002 2003 2001 2000 1988 1989 1999 1998 1995 1994 1996 1997
 1993 1987 1986 1992 1990 1991]
Shape of data: (2240, 17)


Unnamed: 0,count,unique,top,freq,mean,std,min,25%,50%,75%,max
STATION_NAME,2240.0,112.0,ADELAIDE-KENT-TOWN,20.0,,,,,,,
STN_ID,2240.0,,,,40784.669643,28283.71852,1019.0,15476.25,36520.5,65318.0,96003.0
LAT,2240.0,,,,-29.541071,7.403726,-43.49,-34.925,-30.94,-24.6175,-10.58
LON,2240.0,,,,139.137143,11.886118,113.67,133.0275,143.52,148.3575,153.47
January,2240.0,,,,32.725308,5.652049,18.84,28.6375,33.485,37.09,46.15
February,2240.0,,,,32.115089,5.054605,19.16,28.53,33.025,35.92,45.03
March,2240.0,,,,30.419683,5.238562,16.74,26.755,30.93,34.4625,44.37
April,2240.0,,,,27.570513,5.318702,12.76,24.07,27.34,31.265,42.61
May,2240.0,,,,23.970616,5.374964,9.42,20.23,23.13,27.7025,38.07
June,2240.0,,,,21.031862,5.480244,5.11,17.19,20.06,24.5875,35.77


In [None]:
# Handle missing values (ignore NaNs)
data = data.dropna()
print("After dropping NaNs:", data.shape)
# Check if ANY NaN exists at all
print("Any NaN present?", data.isna())


After dropping NaNs: (2240, 17)
Any NaN present?       STATION_NAME  STN_ID    LAT    LON  January  February  March  April  \
0            False   False  False  False    False     False  False  False   
1            False   False  False  False    False     False  False  False   
2            False   False  False  False    False     False  False  False   
3            False   False  False  False    False     False  False  False   
4            False   False  False  False    False     False  False  False   
...            ...     ...    ...    ...      ...       ...    ...    ...   
2235         False   False  False  False    False     False  False  False   
2236         False   False  False  False    False     False  False  False   
2237         False   False  False  False    False     False  False  False   
2238         False   False  False  False    False     False  False  False   
2239         False   False  False  False    False     False  False  False   

        May   June   July 

In [None]:
# Define Australian seasons
seasons = {
    "Summer": ["December", "January", "February"],
    "Autumn": ["March", "April", "May"],
    "Winter": ["June", "July", "August"],
    "Spring": ["September", "October", "November"]
}

In [None]:
# Seasonal averages across ALL stations and ALL years
seasonal_means = {}
for season, months in seasons.items():
    seasonal_means[season] = data[months].mean().mean()

seasonal_means


{'Summer': np.float64(32.10375148809524),
 'Autumn': np.float64(27.320270833333336),
 'Winter': np.float64(21.067285714285713),
 'Spring': np.float64(27.433477678571425)}

In [None]:
# Save seasonal averages to file
with open("average_temp.txt", "w") as f:
    for season, avg in seasonal_means.items():
        f.write(f"{season}: {avg:.1f}°C\n")

# Also print here for sanity check
for season, avg in seasonal_means.items():
    print(f"{season}: {avg:.1f}°C")


Summer: 32.1°C
Autumn: 27.3°C
Winter: 21.1°C
Spring: 27.4°C


In [None]:
# Temperature Range (per station)
months = sum(seasons.values(), [])
melted = data.melt(id_vars=["STATION_NAME"], value_vars=months, 
                   var_name="Month", value_name="Temp")

station_stats = melted.groupby("STATION_NAME")["Temp"].agg(["min", "max"])
station_stats["range"] = station_stats["max"] - station_stats["min"]

largest_range = station_stats["range"].max()
largest_range_stations = station_stats[station_stats["range"] == largest_range]
largest_range_stations


Unnamed: 0_level_0,min,max,range
STATION_NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
BOURKE-AIRPORT-AWS,18.53,42.98,24.45


In [None]:
# Save largest range result
with open("largest_temp_range_station.txt", "w") as f:
    for station, row in largest_range_stations.iterrows():
        f.write(f"{station}: Range {row['range']:.1f}°C "
                f"(Max: {row['max']:.1f}°C, Min: {row['min']:.1f}°C)\n")

largest_range_stations


Unnamed: 0_level_0,min,max,range
STATION_NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
BOURKE-AIRPORT-AWS,18.53,42.98,24.45


In [None]:
# Temperature Stability (StdDev)
station_std = melted.groupby("STATION_NAME")["Temp"].std()

min_std = station_std.min()
max_std = station_std.max()

most_stable = station_std[station_std == min_std]
most_variable = station_std[station_std == max_std]

print("Most Stable:\n", most_stable)
print("\nMost Variable:\n", most_variable)

Most Stable:
 STATION_NAME
DARWIN-AIRPORT    1.22119
Name: Temp, dtype: float64

Most Variable:
 STATION_NAME
WAGGA-WAGGA-AMO    6.907227
Name: Temp, dtype: float64


In [None]:
# Save stability results
with open("temperature_stability_stations.txt", "w") as f:
    for station, val in most_stable.items():
        f.write(f"Most Stable: {station}: StdDev {val:.1f}°C\n")
    for station, val in most_variable.items():
        f.write(f"Most Variable: {station}: StdDev {val:.1f}°C\n")
