In [2]:
#import pydytuesday
import pandas as pd
#pydytuesday.get_date('2025-05-20')

# Option 2: Read directly from GitHub and assign to an object

water_quality = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-05-20/water_quality.csv')
weather = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-05-20/weather.csv')

In [3]:
water_quality['date'] = pd.to_datetime(water_quality['date'])
weather['date'] = pd.to_datetime(weather['date'])

# Save the DataFrames to CSV files
water_quality.to_csv('water_quality.csv', index=False)
weather.to_csv('weather.csv', index=False)
water_quality.head()

Unnamed: 0,region,council,swim_site,date,time,enterococci_cfu_100ml,water_temperature_c,conductivity_ms_cm,latitude,longitude
0,Western Sydney,Hawkesbury City Council,Windsor Beach,2025-04-28,11:00:00,620.0,20.0,248.0,-33.604483,150.817039
1,Sydney Harbour,North Sydney Council,Hayes Street Beach,2025-04-28,11:40:00,64.0,21.0,45250.0,-33.841715,151.219382
2,Sydney Harbour,Willoughby City Council,Northbridge Baths,2025-04-28,10:54:00,160.0,21.0,48930.0,-33.806043,151.222754
3,Sydney Harbour,Northern Beaches Council,Fairlight Beach,2025-04-28,09:28:00,54.0,21.0,52700.0,-33.800731,151.274778
4,Western Sydney,Hawkesbury City Council,Yarramundi Reserve,2025-04-28,10:35:00,720.0,18.0,64.0,-33.609123,150.697886


In [4]:
water_quality.sample(10)

Unnamed: 0,region,council,swim_site,date,time,enterococci_cfu_100ml,water_temperature_c,conductivity_ms_cm,latitude,longitude
51639,Sydney City,Randwick City Council,Maroubra Beach,2010-04-06,08:08:00,3.0,21.0,53700.0,-33.948896,151.257459
78000,Sydney Harbour,Mosman Municipal Council,Clifton Gardens,2003-06-19,09:15:00,4.0,,,-33.839154,151.253349
65834,Southern Sydney,Sutherland Shire Council,Shelly Beach (Sutherland),2006-07-25,08:10:00,6.0,,,-34.06412,151.15542
14501,Northern Sydney,Northern Beaches Council,Palm Beach,2021-04-29,08:18:00,1.0,19.0,54700.0,-33.598779,151.325824
59290,Sydney City,Randwick City Council,Clovelly Beach,2008-03-16,08:08:00,16.0,,,-33.914578,151.267659
29455,Sydney Harbour,City of Canada Bay Council,Chiswick Baths,2017-01-13,06:51:00,0.0,25.0,51600.0,-33.847168,151.142892
69836,Sydney Harbour,Mosman Municipal Council,Little Sirius Cove,2005-07-20,13:05:00,4.0,,,-33.84167,151.23581
41378,Northern Sydney,Northern Beaches Council,Avalon Beach,2013-05-31,10:45:00,0.0,,,-33.635658,151.332569
77824,Sydney Harbour,Northern Beaches Council,Fairlight Beach,2003-07-08,10:25:00,0.0,,,-33.800731,151.274778
17936,Northern Sydney,Northern Beaches Council,Turimetta Beach,2020-05-07,11:20:00,0.0,20.0,54600.0,-33.698084,151.310937


In [5]:
weather.sample(10)

Unnamed: 0,date,max_temp_C,min_temp_C,precipitation_mm,latitude,longitude
6940,2010-01-01,25.7,21.0,7.0,-33.848858,151.19551
7946,2012-10-03,25.0,12.4,0.0,-33.848858,151.19551
919,1993-07-08,17.4,14.2,20.7,-33.848858,151.19551
8574,2014-06-23,17.7,10.5,0.0,-33.848858,151.19551
5027,2004-10-06,18.1,10.9,0.2,-33.848858,151.19551
1740,1995-10-07,22.8,13.4,0.2,-33.848858,151.19551
4301,2002-10-11,16.9,10.6,0.2,-33.848858,151.19551
12087,2024-02-04,31.2,23.8,0.0,-33.848858,151.19551
8877,2015-04-22,18.3,16.3,35.2,-33.848858,151.19551
4692,2003-11-06,27.6,15.9,0.0,-33.848858,151.19551


In [6]:
# rename columns for clarity
water_quality.rename(columns={
    'swim_site': 'beach',
    'enterococci_cfu_100ml': 'enterococci',
    'water_temperature_c': 'water_temperature',
    'conductivity_ms_cm': 'conductivity'}, inplace=True)
water_quality.head()

# Which swim sites consistently have high enterococci levels?
high_enterococci_sites = water_quality[water_quality['enterococci'] > 104].groupby('beach').size().reset_index(name='count')
high_enterococci_sites = high_enterococci_sites.sort_values(by='count', ascending=False)
high_enterococci_sites.head(10)

Unnamed: 0,beach,count
4,Boat Harbour,513
18,Darling Harbour,359
37,Malabar Beach,339
19,Davidson Reserve,266
28,Greenhills Beach,258
66,Tambourine Bay,227
17,Coogee Beach,216
55,Queenscliff Beach,213
60,South Cronulla Beach,207
76,Woolwich Baths,201


In [7]:
# How does water quality vary by season or month?
water_quality['month'] = water_quality['date'].dt.month

# Function to map month to season
def month_to_season(month):
    if month in [12, 1, 2]:
        return 'Summer'
    elif month in [3, 4, 5]:
        return 'Autumn'
    elif month in [6, 7, 8]:
        return 'Winter'
    else:
        return 'Spring'

water_quality['season'] = water_quality['month'].apply(month_to_season)
seasonal_quality = water_quality.groupby(['season', 'beach']).agg({'enterococci': 'mean',
                                                                      'water_temperature': 'mean',
                                                                      'conductivity': 'mean'}).reset_index()
seasonal_quality = seasonal_quality.sort_values(by='enterococci', ascending=False)
seasonal_quality.head(10)


Unnamed: 0,season,beach,enterococci,water_temperature,conductivity
66,Autumn,Tambourine Bay,3326.58312,21.188406,42973.584416
76,Autumn,Woolwich Baths,2430.580247,21.306569,44975.732026
75,Autumn,Woodford Bay,2375.720988,21.395349,45903.777778
57,Autumn,Sangrado Baths,1173.320574,20.222222,49050.0
151,Spring,Wentworth Falls Lake - Jetty,855.682927,18.444444,1980.25
129,Spring,Northbridge Baths,750.720812,19.765152,51134.90566
145,Spring,Tambourine Bay,729.519582,19.819549,47416.469136
18,Autumn,Darling Harbour,665.755319,20.889764,45161.006579
35,Autumn,Little Sirius Cove,583.04,20.305556,54505.636364
19,Autumn,Davidson Reserve,573.377551,21.266667,41097.496732


In [10]:
!pip install seaborn



In [23]:
# How does weather affect water quality?
merged_data = pd.merge(water_quality, weather, on='date', how='left')
merged_data.sample(10)
weather_effects = merged_data.groupby('beach').agg({
    'enterococci': 'mean',
    'water_temperature': 'mean',
    'precipitation_mm': 'mean',
    'conductivity': 'mean',
}).reset_index()
weather_effects = weather_effects.sort_values(by='enterococci', ascending=False)
weather_effects.head(30)

Unnamed: 0,beach,enterococci,water_temperature,precipitation_mm,conductivity
66,Tambourine Bay,1197.154309,20.939096,2.229686,45448.380952
76,Woolwich Baths,858.609456,20.988571,2.242275,47331.385841
75,Woodford Bay,812.172211,21.018634,2.248861,47928.934397
57,Sangrado Baths,535.246506,20.645161,1.913469,51150.0
72,Wentworth Falls Lake - Jetty,516.489474,20.556452,3.316754,953.747826
19,Davidson Reserve,439.664204,21.079151,2.261662,43501.304813
18,Darling Harbour,425.973315,20.718615,2.296353,47823.650273
50,Northbridge Baths,346.143133,21.203629,2.219253,49497.065836
35,Little Sirius Cove,313.944215,19.203008,1.827376,53907.423913
40,Megalong Creek,276.777778,19.101695,3.106977,1070.518182


In [24]:
# Are there noticeable improvements or declines in water quality over the years?
water_quality['year'] = water_quality['date'].dt.year
yearly_trends = water_quality.groupby(['year', 'beach']).agg({
    'enterococci': 'mean',
    'water_temperature': 'mean',
    'conductivity': 'mean'
}).reset_index()
yearly_trends = yearly_trends.sort_values(by=['year', 'enterococci'], ascending=[True, False])
yearly_trends.head(10)

Unnamed: 0,year,beach,enterococci,water_temperature,conductivity
0,1991,North Steyne Beach,0.0,,
1,1992,Queenscliff Beach,0.0,,
4,1993,Boat Harbour,125.586402,,
17,1993,Malabar Beach,109.41958,,
10,1993,Coogee Beach,107.992958,,
26,1993,Queenscliff Beach,100.509317,,
14,1993,Greenhills Beach,92.4375,,
32,1993,Wanda Beach,88.676136,,
12,1993,Elouera Beach,82.220963,,
8,1993,Clovelly Beach,81.186335,,
