# Import Libraries

In [1]:
import pandas as pd
import lightningchart as lc
import time
import datetime 
import re 

lc.set_license("LICENSE_KEY")

# Data Loading and Preprocessing

In [None]:
# Load the dataset
ireland_dataset_path = 'dataset/Water Quality Monitoring Dataset_ Ireland.csv'
ireland_water_data = pd.read_csv(ireland_dataset_path)

# Display basic information about the dataset and check for missing values
ireland_water_data_info = ireland_water_data.info()
ireland_null_values_summary = ireland_water_data.isnull().sum() 

ireland_water_data_info, ireland_null_values_summary # Display the information and missing values summary

In [None]:
unique_waterbodies = ireland_water_data['WaterbodyName'].unique()

print(unique_waterbodies)

In [4]:
original_waterbodies = ['ABBEYTOWN_010', 'Allua', 'ASKANAGAP STREAM_010', 'AVONMORE_040',
 'BALLAGHAMUCK_010', 'Ballinhassig East', 'Ballinhassig West',
 'BALLYCREEN BROOK_020', 'Ballyshannon East', 'BEHY (NORTH MAYO)_010',
 'BELLAWADDY_010', 'BELLAWADDY_020', 'Belmullet', 'Bray Lower', 'Bredagh',
 'BURN DAURNETT_010', 'CAMCOR_020', 'CAMCOR_050', 'Carrick on Shannon',
 'CARRIGAHORIG STREAM_010', 'CARRIGANS_010',
 'CARROWKERIBLY LOUGH STREAM_010', 'Castlebar', 'CASTLEBAR_010',
 'CASTLEBAR_020', 'CASTLEBAR_030', 'CASTLEBAR_040', 'Clare-Corrib',
 'CLAREEN STREAM/FUARAWN_020', 'CLOGHAUN_010', 'Clonaslee West',
 'CLOONAGHMORE_010', 'CLOONAGHMORE_020', 'CLOONAGHMORE_030',
 'CLOONAGHMORE_040', 'CLOONAGHMORE_060', 'CLOONLAVIS_010',
 'CLYDAGH (CASTLEBAR)_010', 'CLYDAGH (CASTLEBAR)_020', 'Cong-Robe', 'Conn',
 'CORROY_010', 'CREEVY_010', 'CROSS ROADS STREAM_010', 'Cullin', 'DARGLE_010',
 'DARGLE_020', 'DARGLE_030', 'DARGLE_040', 'DEEL (CROSSMOLINA)_020',
 'DEEL (CROSSMOLINA)_040', 'DEEL (CROSSMOLINA)_050', 'Derg DL', 'DUVOWEN_010',
 'EIGNAGH_010', 'EIGNAGH_030', 'FINN (DONEGAL)_050', 'FINN (DONEGAL)_060',
 'FINN (DONEGAL)_070', 'FINN (DONEGAL)_080', 'Foxford', 'GLENCREE_010',
 'GLENCULLEN_010', 'GLENCULLEN_020', 'GLENFELLY STREAM_010', 'GLENREE_010',
 'GLENREE_020', 'GLENREE_030', 'GLORE (MAYO)_020', 'GRANEY (SHANNON)_050',
 'GWDTE-Caherglassaun Turlough (SAC000238)', 'GWEESTION_010',
 'GWEESTION_020', 'Holan', 'Kilcullen', 'Kilkelly Charlestown',
 'KILL OF THE GRANGE STREAM_010', 'KILMACANOGE_010', 'LEAFFONY_010',
 'LEAFFONY_020', 'LEE (CORK)_020', 'LEE (CORK)_030', 'LEE (CORK)_040',
 'Levally', 'LITTLE (STRADE)_010', 'LOUGHNAMINOO STREAM_010', 'MANULLA_010',
 'MANULLA_020', 'MANULLA_030', 'MANULLA_040', 'Moy Estuary', 'MOY_070',
 'MOY_080', 'MOY_090', 'MOY_100', 'MOY_120', 'MOYNE_010', 'POLLAGH_010',
 'POLLAGH_030', 'POLLAGH_040', 'River Foyle', 'ROUGH BURN_010', 'Scurmore_010',
 'SHANGANAGH_010', 'Shinrone', 'SLIEVECLAUR_010', 'SONNAGH (MOY)_010',
 'SPADDAGH_010', 'ST JOHNSTON_010', 'SWILLY BURN_010', 'SWILLY BURN_020',
 'SWILLY BURN_030', 'Swinford', 'SWINFORD_010', 'Talt', 'TRIMOGE_010',
 'TRIMOGE_020', 'TRIMOGE_030', 'TULLYEGAN_010', 'Tynagh', 'Washpool', 'Wicklow',
 'YELLOW (FOXFORD)_010', 'YELLOW (FOXFORD)_020', 'YELLOW (KNOCK)_010',
 'YELLOW (KNOCK)_020', 'BUTLERSTOWN_010', 'BUTLERSTOWN_020',
 'BUTLERSTOWN_030', 'Cahersiveen', 'Carrigdrohid', 'DRIPSEY_010',
 'DRIPSEY_020', 'DUNGOURNEY_020', 'FOHERISH_020',
 'GLASHABOY (LOUGH MAHON)_010', 'GLASHEEN (Cork City)_010', 'Glenville',
 'Inniscarra', 'KEEL_010', 'Lee (Cork) Estuary Lower',
 'Lee (Cork) Estuary Upper', 'LEE (CORK)_050', 'LEE (CORK)_060',
 'LEE (CORK)_070', 'LEE (CORK)_080', 'LEE (CORK)_090', 'Lee Valley Gravels',
 'Lough Mahon', 'Midleton', 'North Channel Great Island',
 'Owenacurra Estuary', 'OWENNACURRA_040', 'SHOURNAGH_030', 'SHOURNAGH_040',
 'SULLANE_020', 'SULLANE_040', 'SULLANE_050', 'SULLANE_060',
 'TWO POT (Cork City)_010']

In [None]:
def clean_waterbody_name(name):
    # Replace underscores with spaces and keep numbers that follow important identifiers
    cleaned_name = re.sub(r'_+', ' ', name)  # Replace underscores with spaces
    cleaned_name = re.sub(r'(?<![a-zA-Z])(\d+)', r'\1', cleaned_name)  # Retain numbers if attached to names
    cleaned_name = cleaned_name.title()  # Capitalize for readability
    return cleaned_name.strip()

# Apply this function to your dataset
cleaned_names = [clean_waterbody_name(name) for name in original_waterbodies]
print("Cleaned names:", cleaned_names)

In [6]:
ireland_water_data['CleanedWaterbodyName'] = ireland_water_data['WaterbodyName'].apply(clean_waterbody_name)

# Exploratory Data Analysis (EDA)

## Line Charts & Area Charts

In [None]:
# Group by Years and calculate the average pH value for each year
grouped_data = ireland_water_data.groupby('Years')['pH'].mean().reset_index()

# Extract x_values (Years) and y_values (average pH)
# Convert years to UNIX time for proper display on the x-axis
x_values = [ 
    time.mktime(datetime.datetime(year, 1, 1).timetuple()) * 1000 for year in grouped_data['Years']   
]
y_values = grouped_data['pH'].tolist()

# Create the chart
chart = lc.ChartXY(
    theme=lc.Themes.Light,
    title='Average pH Over Years'
)

# Configure the x-axis for proper time display
x_axis = chart.get_default_x_axis()
x_axis.set_title("Year")
x_axis.set_tick_strategy('DateTime') 

# Add an area series
series = chart.add_area_series(data_pattern="ProgressiveX")

# Append the data to the area series
series.add(x_values, y_values)

# Open the chart
chart.open() 

In [None]:
# Define the physical parameters to be visualized
parameters = ['Dissolved Oxygen', 'Temperature', 'pH', 'Conductivity @25°C']

# Group by Years and calculate the mean for the selected physical parameters
grouped_data = ireland_water_data.groupby('Years')[parameters].mean().reset_index()

# Convert years to UNIX time for proper display on the x-axis
x_values = [
    time.mktime(datetime.datetime(year, 1, 1).timetuple()) * 1000 for year in grouped_data['Years']
]

# Create the chart
chart = lc.ChartXY(
    theme=lc.Themes.Light,
    title='Mean Physical Parameters Over Years'
)

# Configure the x-axis to display dates
x_axis = chart.get_default_x_axis()
x_axis.set_title("Years")
x_axis.set_tick_strategy('DateTime')

# Add spline series for each parameter
for parameter in parameters:
    y_values = grouped_data[parameter].tolist()
    series = chart.add_spline_series(data_pattern="ProgressiveX").append_samples(
        x_values=x_values,
        y_values=y_values
    )
    series.set_name(parameter)  # Set the series name for identification
    series.set_line_thickness(2)

legend = chart.add_legend()
legend.add(chart)  # Attach all elements within the chart to the legend

# Open the chart
chart.open()

In [None]:
# Define the chemical parameters to be visualized
parameters = ['Chloride', 'Ammonia-Total (as N)', 'BOD - 5 days (Total)', 
              'Alkalinity-total (as CaCO3)', 'ortho-Phosphate (as P) - unspecified', 
              'Total Hardness (as CaCO3)']

# Group by Years and calculate the mean for the selected chemical parameters
grouped_data = ireland_water_data.groupby('Years')[parameters].mean().reset_index()

# Apply a scaling factor to the ammonia and phosphate values
scaling_factor = 100
grouped_data['Ammonia-Total (as N)'] = grouped_data['Ammonia-Total (as N)'] * scaling_factor
grouped_data['ortho-Phosphate (as P) - unspecified'] = grouped_data['ortho-Phosphate (as P) - unspecified'] * scaling_factor

# Convert years to UNIX time for proper display on the x-axis
x_values = [
    time.mktime(datetime.datetime(year, 1, 1).timetuple()) * 1000 for year in grouped_data['Years']
]

# Create the chart
chart = lc.ChartXY(
    theme=lc.Themes.Light,
    title='Mean Chemical Parameters Over Years'
)

# Configure the x-axis to display dates
x_axis = chart.get_default_x_axis()
x_axis.set_title("Years")
x_axis.set_tick_strategy('DateTime')

# Add spline series for each parameter
for parameter in parameters:
    y_values = grouped_data[parameter].tolist()
    series = chart.add_spline_series(data_pattern="ProgressiveX").append_samples(
        x_values=x_values,
        y_values=y_values
    )
        # Indicate scaling for ammonia and phosphate
    if parameter == 'Ammonia-Total (as N)': 
        series.set_name(f'{parameter} (scaled x{scaling_factor})')  
    elif parameter == 'ortho-Phosphate (as P) - unspecified':
        series.set_name(f'{parameter} (scaled x{scaling_factor})')
    else:
        series.set_name(parameter)  
    series.set_line_thickness(2)

legend = chart.add_legend()
legend.add(chart)  # Attach all elements within the chart to the legend

# Open the chart
chart.open()

In [None]:
# Group by Years and calculate the mean for the selected eutrophication-related parameters
parameters = ['ortho-Phosphate (as P) - unspecified', 'Ammonia-Total (as N)', 'BOD - 5 days (Total)']
grouped_data = ireland_water_data.groupby('Years')[parameters].mean().reset_index()

# Convert years to UNIX time for proper display on the x-axis
x_values = [
    time.mktime(datetime.datetime(year, 1, 1).timetuple()) * 1000 for year in grouped_data['Years']
]

chart = lc.ChartXY(
    theme=lc.Themes.Light,
    title='Mean Eutrophication-Related Parameters Over Years'
)

# Configure the x-axis to display dates
x_axis = chart.get_default_x_axis()
x_axis.set_title("Years")
x_axis.set_tick_strategy('DateTime')

# Add spline series for each parameter
for parameter in parameters:
    y_values = grouped_data[parameter].tolist()
    series = chart.add_spline_series(data_pattern="ProgressiveX").append_samples(
        x_values=x_values,
        y_values=y_values
    )
    series.set_name(parameter)  # Set the series name for identification
    series.set_line_thickness(2)

legend = chart.add_legend()
legend.add(chart)  # Attach all elements within the chart to the legend

# Open the chart
chart.open()

In [None]:
# Function to create a chart for specific parameters
def create_chart(dashboard, row_index, column_index, title, parameters, data, scaling=None):
    grouped_data = data.groupby('Years')[parameters].mean().reset_index()
    x_values = [
        time.mktime(datetime.datetime(year, 1, 1).timetuple()) * 1000 for year in grouped_data['Years']
    ]
    
    chart = dashboard.ChartXY(row_index=row_index, column_index=column_index, title=title)

    # Configure the x-axis to display dates
    x_axis = chart.get_default_x_axis()
    x_axis.set_title("Years")
    x_axis.set_tick_strategy('DateTime')
    
    # Add legend to the chart
    legend = chart.add_legend()

    for parameter in parameters:
        y_values = grouped_data[parameter].tolist()
        
        # Apply scaling if specified
        if scaling and parameter in scaling:
            y_values = [y * scaling[parameter] for y in y_values]
            series_name = f"{parameter} (scaled x{scaling[parameter]})"
        else:
            series_name = parameter

        series = chart.add_spline_series(data_pattern="ProgressiveX").append_samples(
            x_values=x_values,
            y_values=y_values
        )
        series.set_name(series_name)
        series.set_line_thickness(2)
        
        # Attach the series to the legend
        legend.add(series)

# Function to create a bar chart for the highest values of each parameter
def create_bar_chart(dashboard, row_index, column_index, parameters, data):
    highest_values = []
    
    # Iterate through each parameter to find the maximum value, corresponding waterbody, and year
    for parameter in parameters:
        max_value_row = data.loc[data[parameter].idxmax()]
        highest_values.append({
            'category': f"{parameter} ({max_value_row['CleanedWaterbodyName']}, {max_value_row['Years']})",
            'value': max_value_row[parameter]
        })

    # Prepare data for the bar chart
    data_list = [{'category': item['category'], 'value': item['value']} for item in highest_values]
    
    bar_chart = dashboard.BarChart(row_index=row_index, column_index=column_index, vertical=False)
    bar_chart.set_sorting('disabled')  # Disable sorting to show parameters in the order they appear in the dataset
    bar_chart.set_data(data_list)
    bar_chart.set_title('Highest Values for Each Parameter')

# Setup dashboard with a 2x2 grid
dashboard = lc.Dashboard(rows=2, columns=2, theme=lc.Themes.Light)

# Parameters for each chart
physical_params = ['Dissolved Oxygen', 'Temperature', 'pH', 'Conductivity @25°C']
chemical_params = ['Chloride', 'Ammonia-Total (as N)', 'BOD - 5 days (Total)', 
                   'Alkalinity-total (as CaCO3)', 'ortho-Phosphate (as P) - unspecified', 
                   'Total Hardness (as CaCO3)']
eutrophication_params = ['ortho-Phosphate (as P) - unspecified', 'Ammonia-Total (as N)', 'BOD - 5 days (Total)']
highest_value_params = ['Alkalinity-total (as CaCO3)', 'Ammonia-Total (as N)', 'BOD - 5 days (Total)', 
                        'Chloride', 'Conductivity @25°C', 'Dissolved Oxygen', 
                        'ortho-Phosphate (as P) - unspecified', 'pH', 'Temperature', 
                        'Total Hardness (as CaCO3)', 'True Colour']

# Scaling factors for specific parameters
scaling_factors = {
    'Ammonia-Total (as N)': 100,
    'ortho-Phosphate (as P) - unspecified': 100
}

# Create charts
create_chart(dashboard, row_index=0, column_index=0, title='Mean Physical Parameters Over Years', 
             parameters=physical_params, data=ireland_water_data)
create_chart(dashboard, row_index=0, column_index=1, title='Mean Chemical Parameters Over Years', 
             parameters=chemical_params, data=ireland_water_data, scaling=scaling_factors)
create_chart(dashboard, row_index=1, column_index=0, title='Mean Eutrophication-Related Parameters Over Years', 
             parameters=eutrophication_params, data=ireland_water_data)

# Create bar chart for highest values
create_bar_chart(dashboard, row_index=1, column_index=1, 
                 parameters=highest_value_params, data=ireland_water_data)

# Open the dashboard
dashboard.open()

## Scatter Chart

In [None]:
# Find the waterbodies with the highest and lowest True Colour values
max_true_color_row = ireland_water_data.loc[ireland_water_data['True Colour'].idxmax()]   
min_true_color_row = ireland_water_data.loc[ireland_water_data['True Colour'].idxmin()]

# Get the waterbody names
max_true_color_waterbody = max_true_color_row['CleanedWaterbodyName']  
min_true_color_waterbody = min_true_color_row['CleanedWaterbodyName']

# Filter data for these specific waterbodies
high_true_color_data = ireland_water_data[ireland_water_data['CleanedWaterbodyName'] == max_true_color_waterbody]
low_true_color_data = ireland_water_data[ireland_water_data['CleanedWaterbodyName'] == min_true_color_waterbody]

# Convert years to UNIX time for proper display on the x-axis
high_x_values = [
    time.mktime(datetime.datetime(year, 1, 1).timetuple()) * 1000 for year in high_true_color_data['Years']
]
low_x_values = [
    time.mktime(datetime.datetime(year, 1, 1).timetuple()) * 1000 for year in low_true_color_data['Years'] 
]

# Create the chart
chart = lc.ChartXY(
    theme=lc.Themes.Light,
    title='True Colour Trends for Waterbodies with Highest and Lowest Values'
)

# Configure the x-axis to display dates
x_axis = chart.get_default_x_axis()
x_axis.set_title("Years")
x_axis.set_tick_strategy('DateTime')

# Add point series for the waterbody with the highest True Colour
high_series = chart.add_point_series().append_samples(
    x_values=high_x_values,
    y_values=high_true_color_data['True Colour'].tolist()
)
high_series.set_name(f'Highest True Colour: {max_true_color_waterbody}')

# Add point series for the waterbody with the lowest True Colour
low_series = chart.add_point_series().append_samples(
    x_values=low_x_values,
    y_values=low_true_color_data['True Colour'].tolist()
)
low_series.set_name(f'Lowest True Colour: {min_true_color_waterbody}')

# Set point sizes
high_series.set_point_size(5)
low_series.set_point_size(5)

legend = chart.add_legend()
legend.add(chart)  # Attach all elements within the chart to the legend
legend.set_position(x=98.5, y=70)  # Set the position of the legend

# Open the chart
chart.open()

## Bar Charts

In [None]:
# List of parameters to analyze
# These are the water quality parameters for which we want to calculate and visualize the average values
parameters = ['Alkalinity-total (as CaCO3)', 'Ammonia-Total (as N)', 'BOD - 5 days (Total)', 'Chloride', 
              'Conductivity @25°C', 'Dissolved Oxygen', 'ortho-Phosphate (as P) - unspecified', 'pH', 
              'Temperature', 'Total Hardness (as CaCO3)', 'True Colour']

# List to store the average values for each parameter
average_values = []

# Iterate through each parameter and calculate the average value
for parameter in parameters:
    average_value = ireland_water_data[parameter].mean()
    # Append the calculated average as a dictionary with 'category' as the parameter name and 'value' as the mean
    average_values.append({
        'category': parameter,
        'value': average_value
    })

# Prepare the data for the bar chart
# 'category' is the parameter name, and 'value' is the corresponding average value
data = [{'category': item['category'], 'value': item['value']} for item in average_values]

# Create a horizontal bar chart to visualize the average values across all parameters
chart = lc.BarChart(
    vertical=False,
    theme=lc.Themes.Light,
    title='Average Values Across All Parameters'
)

# Disable sorting to keep the parameters in the order they were listed in the 'parameters' list
chart.set_sorting('disabled')

# Assign the prepared data to the chart
chart.set_data(data)
 
# Open the chart
chart.open()

In [None]:
# Assuming you've already added the 'CleanedWaterbodyName' column to your dataset
# Group data by CleanedWaterbodyName and calculate the mean of Dissolved Oxygen
parameter = ['Dissolved Oxygen']
grouped_by_waterbody = ireland_water_data.groupby('CleanedWaterbodyName')[parameter].mean().reset_index()

# Prepare data for the bar chart using CleanedWaterbodyName and Dissolved Oxygen
data = []
for i, row in grouped_by_waterbody.iterrows():
    data.append({'category': row['CleanedWaterbodyName'], 'value': row['Dissolved Oxygen']})

# Create bar chart
chart = lc.BarChart(
    vertical=True,
    theme=lc.Themes.Light,
    title='Mean Dissolved Oxygen by Waterbody'
)

# Set sorting and label rotation
chart.set_sorting('descending')
chart.set_label_rotation(90)
chart.set_value_label_display_mode('hidden')

# Set the data for the chart
chart.set_data(data)

# Set gradient colors based on the range of dissolved oxygen values
chart.set_palette_colors(
    steps=[
        {'value': 0, 'color': lc.Color(255, 0, 0)},  # Red for low values (insufficient oxygen)
        {'value': 30, 'color': lc.Color(255, 255, 0)},  # Yellow for moderate values
        {'value': 70, 'color': lc.Color(0, 255, 0)},  # Green for healthy levels
        {'value': 122, 'color': lc.Color(0, 0, 255)}  # Blue for maximum values
    ],
    percentage_values=False  # Use actual values, not percentage
)

# Open the chart
chart.open()

In [None]:
# Group data by CleanedWaterbodyName and calculate the mean of Temperature
parameter = ['Temperature']
grouped_by_waterbody = ireland_water_data.groupby('CleanedWaterbodyName')[parameter].mean().reset_index()

# Prepare data for the bar chart using CleanedWaterbodyName and Temperature
data = []
for i, row in grouped_by_waterbody.iterrows():
    data.append({'category': row['CleanedWaterbodyName'], 'value': row['Temperature']})

# Create bar chart
chart = lc.BarChart(
    vertical=True,
    theme=lc.Themes.Light,
    title='Mean Temperature by Waterbody'
)

# Set sorting and label rotation
chart.set_sorting('descending')
chart.set_label_rotation(90) 
chart.set_value_label_display_mode('hidden')

# Set the data for the chart
chart.set_data(data)

# Set gradient colors based on the temperature values
chart.set_palette_colors(
    steps=[
        {'value': 0, 'color': lc.Color(0, 0, 255)},   # Blue for cold temperatures
        {'value': 10, 'color': lc.Color(0, 255, 0)},  # Green for moderate temperatures
        {'value': 20, 'color': lc.Color(255, 255, 0)},  # Yellow for warmer temperatures
        {'value': 30, 'color': lc.Color(255, 0, 0)}   # Red for hot temperatures
    ],
    percentage_values=False
)

# Open the chart
chart.open()

In [None]:
# Group data by CleanedWaterbodyName and calculate the mean of BOD - 5 days (Total)
parameter = ['BOD - 5 days (Total)']
grouped_by_waterbody = ireland_water_data.groupby('CleanedWaterbodyName')[parameter].mean().reset_index()

# Prepare data for the bar chart using CleanedWaterbodyName and BOD values
# 'category' is the waterbody name, and 'value' is the mean BOD for that waterbody
data = []
for i, row in grouped_by_waterbody.iterrows():
    data.append({'category': row['CleanedWaterbodyName'], 'value': row['BOD - 5 days (Total)']})

# Create a vertical bar chart to visualize mean BOD by waterbody
chart = lc.BarChart(
    vertical=True,
    theme=lc.Themes.Light,
    title='Mean BOD - 5 days (Total) by Waterbody'
)

# Sort bars in descending order and rotate waterbody labels for better visibility
chart.set_sorting('descending')
chart.set_label_rotation(90)
chart.set_value_label_display_mode('hidden')  # Hide value labels for a cleaner look

# Assign the prepared data to the chart
chart.set_data(data)

# Set gradient color ranges based on BOD values
chart.set_palette_colors(
    steps=[
        {'value': 0, 'color': lc.Color(0, 0, 255)},   # Blue for low BOD
        {'value': 1.5, 'color': lc.Color(0, 255, 0)},  # Green for moderate BOD
        {'value': 2.5, 'color': lc.Color(255, 255, 0)},  # Yellow for higher BOD
        {'value': 3.5, 'color': lc.Color(255, 165, 0)},  # Orange for concerning levels
        {'value': 5, 'color': lc.Color(255, 0, 0)}   # Red for high BOD (poor water quality)
    ],
    percentage_values=False  # Use absolute BOD values for the color gradient
)

# Open the chart
chart.open()

In [None]:
# Group data by CleanedWaterbodyName and calculate the mean of ortho-Phosphate (as P) - unspecified
parameter = ['ortho-Phosphate (as P) - unspecified']
grouped_by_waterbody = ireland_water_data.groupby('CleanedWaterbodyName')[parameter].mean().reset_index()

# Prepare data for the bar chart using CleanedWaterbodyName and ortho-Phosphate values
# 'category' is the waterbody name, and 'value' is the mean ortho-Phosphate level for that waterbody
data = []
for i, row in grouped_by_waterbody.iterrows():
    data.append({'category': row['CleanedWaterbodyName'], 'value': row['ortho-Phosphate (as P) - unspecified']})

# Create a vertical bar chart to visualize the mean ortho-Phosphate by waterbody
chart = lc.BarChart(
    vertical=True,
    theme=lc.Themes.Light,
    title='Mean ortho-Phosphate by Waterbody'
)

# Sort bars in descending order and rotate waterbody labels for better visibility
chart.set_sorting('descending')
chart.set_label_rotation(90)
chart.set_value_label_display_mode('hidden')  # Hide value labels for a cleaner look

# Assign the prepared data to the chart
chart.set_data(data)

# Set gradient color ranges based on ortho-Phosphate levels
chart.set_palette_colors(
    steps=[
        {'value': 0, 'color': lc.Color(0, 0, 255)},   # Blue for low ortho-Phosphate
        {'value': 1.5, 'color': lc.Color(0, 255, 0)},  # Green for moderate ortho-Phosphate
        {'value': 2.5, 'color': lc.Color(255, 255, 0)},  # Yellow for higher ortho-Phosphate
        {'value': 3.5, 'color': lc.Color(255, 165, 0)},  # Orange for concerning levels
        {'value': 5, 'color': lc.Color(255, 0, 0)}   # Red for high ortho-Phosphate (poor water quality)
    ],
    percentage_values=False
)

# Open the chart
chart.open()

In [None]:
# Group data by CleanedWaterbodyName and calculate the mean of Total Hardness (as CaCO3)
parameter = ['Total Hardness (as CaCO3)']
grouped_by_waterbody = ireland_water_data.groupby('CleanedWaterbodyName')[parameter].mean().reset_index()

# Prepare data for the bar chart using CleanedWaterbodyName and water hardness values
# 'category' is the waterbody name, and 'value' is the mean water hardness (as CaCO3) for that waterbody
data = []
for i, row in grouped_by_waterbody.iterrows():
    data.append({'category': row['CleanedWaterbodyName'], 'value': row['Total Hardness (as CaCO3)']})

# Create a vertical bar chart to visualize the mean water hardness by waterbody
chart = lc.BarChart(
    vertical=True,
    theme=lc.Themes.Light,
    title='Mean Hardness by Waterbody'
)

# Sort bars in descending order and rotate waterbody labels for better visibility
chart.set_sorting('descending')
chart.set_label_rotation(90)
chart.set_value_label_display_mode('hidden')  # Hide value labels for a cleaner look

# Assign the prepared data to the chart
chart.set_data(data)

# Set gradient color ranges based on hardness values (as CaCO3)
chart.set_palette_colors(
    steps=[
        {'value': 0, 'color': lc.Color(0, 0, 255)},   # Blue for soft water
        {'value': 60, 'color': lc.Color(0, 255, 0)},  # Green for moderately soft water
        {'value': 120, 'color': lc.Color(255, 255, 0)},  # Yellow for moderately hard water
        {'value': 180, 'color': lc.Color(255, 165, 0)},  # Orange for hard water
        {'value': 400, 'color': lc.Color(255, 0, 0)}   # Red for very hard water
    ],
    percentage_values=False  # Use absolute hardness values for the color gradient
)

# Open the chart
chart.open()

## Spider Chart

In [None]:
# Define the time periods you want to compare
time_periods = [2007, 2011, 2015, 2019, 2023]

# Create a spider chart with a circular web and light theme to visualize parameter comparison across years
chart = lc.SpiderChart(
    theme=lc.Themes.Light,
    title='Water Quality Parameters Comparison Over Years'
)
chart.set_web_mode('circle')  # Set the web shape to circular

# Define the water quality parameters to be plotted on the spider chart
parameters = ['Dissolved Oxygen', 'Temperature', 'pH', 'Conductivity @25°C', 'Chloride', 'Total Hardness (as CaCO3)', 'True Colour']

# Function to calculate the average values for the water quality parameters in a specific year
# This function filters the data by the year and returns the average for each parameter
def get_average_values_for_year(year):
    data_for_year = ireland_water_data[ireland_water_data['Years'] == year]
    return [data_for_year[param].mean() for param in parameters]

# Add a data series to the chart for each time period
# For each year, calculate the average parameter values and plot them as a series on the spider chart
for year in time_periods:
    avg_values = get_average_values_for_year(year)  # Get the average values for the specified year
    points = [{'axis': param, 'value': value} for param, value in zip(parameters, avg_values)]  # Prepare data points
    chart.add_series().add_points(points).set_name(f'{year}')  # Add the series to the chart with the corresponding year label

# Open the chart
chart.open()