# Transformation of map data for proportional symbol map

The following code transforms the input weather data into an appropriate form for the proportional symbol map.

In [68]:
import pandas as pd
import numpy as np
import os

In [69]:
clusters = pd.read_csv("data/SuburbClustered.csv")

In [35]:
# Obtain only relevant columns for clusters
clusters = clusters[['officialnamesuburb', 'officialnamestate', 'lattitude', 'longtitude', 'clusterid']]
clusters.head()

Unnamed: 0,officialnamesuburb,officialnamestate,lattitude,longtitude,clusterid
0,Adaminaby,New South Wales,-36.011932,148.78632,100412
1,Albury,New South Wales,-36.073698,146.913468,101407
2,Alectown,New South Wales,-32.914255,148.268021,92411
3,Alpine,New South Wales,-34.408694,150.536771,96417
4,Argoon,New South Wales,-34.917979,145.657248,97404


In [36]:
# Obtain non-duplicate clusters:
clusters = clusters.drop_duplicates(subset="clusterid", keep="first")
clusters.head()

Unnamed: 0,officialnamesuburb,officialnamestate,lattitude,longtitude,clusterid
0,Adaminaby,New South Wales,-36.011932,148.78632,100412
1,Albury,New South Wales,-36.073698,146.913468,101407
2,Alectown,New South Wales,-32.914255,148.268021,92411
3,Alpine,New South Wales,-34.408694,150.536771,96417
4,Argoon,New South Wales,-34.917979,145.657248,97404


In [37]:
weather_clustered = pd.read_parquet("data/WeatherData.parquet")

In [38]:
# Convert datetime into dates:
weather_clustered['Datetime'] = pd.to_datetime(weather_clustered['Datetime'])

# Filter for only 2023 data:
weather_clustered = weather_clustered[weather_clustered['Datetime'].dt.year == 2023]

In [39]:
# Get the average temperature, maximum temperature, minimum temperature, total rain:
weather_clustered_agg = weather_clustered.groupby(['ClusterID']).agg({'TemperatureMean': ['mean'],
                                              'TemperatureMax': ['max'],
                                              'TemperatureMin': ['min'],
                                              'RainSum': ['sum']}).reset_index().rename(
                                                  columns={"TemperatureMeanmean": "TemperatureMean"}
                                              )

# Rename columns:
weather_clustered_agg.columns = [
    'ClusterID',
    'AverageTemperature',
    'MaxTemperature',
    'MinTemperature',
    'TotalRainfall'
]

In [40]:
weather_clustered_agg.head()

Unnamed: 0,ClusterID,AverageTemperature,MaxTemperature,MinTemperature,TotalRainfall
0,26394,26.620479,31.9,23.1,2020.600002
1,27395,26.843721,31.05,23.3,2061.800005
2,27398,26.939732,30.5,23.8,2015.00001
3,28394,26.807192,30.15,23.1,2090.700012
4,28396,26.864424,30.1,23.1,1936.000002


In [41]:
# Join clusters onto weather_cluster_agg to obtain latitude and longitude data:
weather_clustered_coords = weather_clustered_agg.merge(clusters, how = "inner", left_on="ClusterID", right_on="clusterid")

In [42]:
# Export final data
weather_clustered_coords.to_csv(os.path.join("..", "data/weather_coord_data.csv"))

In [43]:
weather_clustered_coords['AverageTemperature'].max()

29.14945369589041

In [59]:
weather_coord_final = pd.read_csv("../data/weather_coord_data.csv")

In [60]:
weather_coord_final['TotalRainfall'].describe()

count    1798.000000
mean      625.829089
std       426.561239
min        35.500000
25%       327.400001
50%       537.550000
75%       781.300003
max      3506.100008
Name: TotalRainfall, dtype: float64

In [61]:
# Define a mapping to group the states and territories
mapping = {
    'Queensland': 'Queensland',
    'New South Wales': 'New South Wales',
    'Western Australia': 'Western Australia',
    'South Australia': 'South Australia',
    'Victoria': 'Victoria',
    'Northern Territory': 'Northern Territory',
    'Tasmania': 'Tasmania',
    'New South Wales,Queensland': 'New South Wales',
    'New South Wales,Victoria': 'New South Wales',
    'Northern Territory,Western Australia': 'Western Australia',
    'Northern Territory,Queensland': 'Queensland',
    'New South Wales,South Australia': 'New South Wales',
    'Australian Capital Territory': 'Australian Capital Territory',
    'South Australia,Victoria': 'South Australia',
    'Northern Territory,South Australia,Western Australia': 'South Australia',
    'South Australia,Western Australia': 'South Australia',
    'Other Territories': 'Other Territories'
}


In [62]:
weather_coord_final['officialnamestate'].value_counts()

Queensland                                              472
New South Wales                                         425
Western Australia                                       318
South Australia                                         223
Victoria                                                182
Northern Territory                                       94
Tasmania                                                 63
Other Territories                                         3
New South Wales,Queensland                                3
New South Wales,Victoria                                  3
Northern Territory,Western Australia                      2
Northern Territory,Queensland                             2
New South Wales,South Australia                           2
Australian Capital Territory                              2
South Australia,Victoria                                  2
Northern Territory,South Australia,Western Australia      1
South Australia,Western Australia       

In [63]:
# Apply the mapping
weather_coord_final['officialnamestate'] = weather_coord_final['officialnamestate'].map(mapping)

# Set Mungindi to Queensland:
condition = weather_coord_final['officialnamesuburb'] == 'Mungindi (Qld)'
weather_coord_final.loc[condition, 'officialnamestate'] = 'Queensland'

In [64]:
# Drop other terriorities outside of main Australian land mass:
weather_coord_final = weather_coord_final[(weather_coord_final['officialnamestate'] != "Other Territories") & (weather_coord_final['officialnamesuburb'] != "Lord Howe Island")]

In [66]:
# Export final data
weather_coord_final.to_csv(os.path.join("..", "data/weather_final_map_data.csv"))

In [67]:
weather_coord_final['officialnamestate'].value_counts()

Queensland                      475
New South Wales                 431
Western Australia               320
South Australia                 227
Victoria                        182
Northern Territory               94
Tasmania                         63
Australian Capital Territory      2
Name: officialnamestate, dtype: int64