In [1]:
import pandas as pd
import altair as alt

Station number for goldstream: 08HA039, 
Station number for cowichan: 08HA011, 
Station number for nanaimo: 08HB005, 
Station number for englishman: 08HB002, 
Station number for puntledge: 08HB006, 
Station number for quinsam: 08HD005, 
Station number for millstream: 08HA034, 

In [2]:
df = pd.read_csv('data/data1.csv')
df.head()

Unnamed: 0,STATION_NUMBER,YEAR,MONTH,FLOW1,FLOW2,FLOW3,FLOW4,FLOW5,FLOW6,FLOW7,...,FLOW22,FLOW23,FLOW24,FLOW25,FLOW26,FLOW27,FLOW28,FLOW29,FLOW30,FLOW31
0,08HD005,2019,1,18.5,16.700001,25.299999,51.299999,62.0,52.0,37.200001,...,19.299999,24.700001,21.1,18.1,15.8,13.9,12.4,11.1,10.0,9.14
1,08HD005,2019,2,8.83,8.57,8.3,7.86,7.47,7.17,6.96,...,5.08,5.11,5.22,5.14,4.95,4.83,4.85,,,
2,08HD005,2019,3,4.83,4.77,4.77,4.72,4.7,4.67,4.62,...,6.2,6.51,6.67,6.52,6.33,6.21,6.17,6.07,5.96,5.92
3,08HD005,2019,4,5.91,5.93,6.79,7.43,7.79,10.1,14.6,...,10.1,10.5,9.8,8.78,7.89,7.31,6.73,6.32,6.03,
4,08HD005,2019,5,5.82,5.69,5.4,5.27,5.4,5.55,5.47,...,3.53,3.44,3.34,3.18,3.06,2.92,2.81,2.73,2.65,2.56


In [3]:
flow_columns = [col for col in df.columns if 'FLOW' in col]
df_long = pd.melt(df, id_vars=["STATION_NUMBER", "YEAR", "MONTH"], value_vars=flow_columns, var_name="FlowType", value_name="FlowValue")
df_long.head() 

Unnamed: 0,STATION_NUMBER,YEAR,MONTH,FlowType,FlowValue
0,08HD005,2019,1,FLOW1,18.5
1,08HD005,2019,2,FLOW1,8.83
2,08HD005,2019,3,FLOW1,4.83
3,08HD005,2019,4,FLOW1,5.91
4,08HD005,2019,5,FLOW1,5.82


In [5]:
df_long['FlowType'] = df_long['FlowType'].str.replace("FLOW", "").astype(int)
df_long.head()

Unnamed: 0,STATION_NUMBER,YEAR,MONTH,FlowType,FlowValue
0,08HD005,2019,1,1,18.5
1,08HD005,2019,2,1,8.83
2,08HD005,2019,3,1,4.83
3,08HD005,2019,4,1,5.91
4,08HD005,2019,5,1,5.82


In [7]:
sorted_df = df_long.sort_values(by=["STATION_NUMBER", "YEAR", "MONTH", "FlowType"])
sorted_df = sorted_df.reset_index(drop=True)
sorted_df.head()

Unnamed: 0,STATION_NUMBER,YEAR,MONTH,FlowType,FlowValue
0,08HA011,2019,1,1,147.0
1,08HA011,2019,1,2,137.0
2,08HA011,2019,1,3,276.0
3,08HA011,2019,1,4,372.0
4,08HA011,2019,1,5,297.0


In [24]:
unique_station_numbers = df['STATION_NUMBER'].unique()
unique_station_numbers

array(['08HD005', '08HA011', '08HB006', '08HB002'], dtype=object)

In [9]:
# pip install altair vega_datasets

Collecting altair
  Downloading altair-5.3.0-py3-none-any.whl.metadata (9.2 kB)
Collecting vega_datasets
  Downloading vega_datasets-0.9.0-py3-none-any.whl.metadata (5.5 kB)
Collecting toolz (from altair)
  Using cached toolz-0.12.1-py3-none-any.whl.metadata (5.1 kB)
Downloading altair-5.3.0-py3-none-any.whl (857 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m857.8/857.8 kB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m:00:01[0m0:01[0m
[?25hDownloading vega_datasets-0.9.0-py3-none-any.whl (210 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m210.8/210.8 kB[0m [31m8.9 MB/s[0m eta [36m0:00:00[0m
[?25hUsing cached toolz-0.12.1-py3-none-any.whl (56 kB)
Installing collected packages: toolz, vega_datasets, altair
Successfully installed altair-5.3.0 toolz-0.12.1 vega_datasets-0.9.0
Note: you may need to restart the kernel to use updated packages.


In [19]:
df_filtered = sorted_df[sorted_df['STATION_NUMBER'] == "08HA011"]
df_filtered = df_filtered.reset_index().rename(columns={'index': 'Index'})

chart = alt.Chart(df_filtered).mark_line(point=True).encode(
    x='Index:Q',  # Ordinal data
    y='FlowValue:Q',  # Quantitative data
    tooltip=['YEAR', 'MONTH', 'FlowValue']  # Tooltip shows info on hover
).properties(
    width=600,
    height=300,
    title='Monthly FlowValue for Station 08HA011'
)

chart 

In [23]:
station_titles = {
    "08HA039": "Goldstream",
    "08HA011": "Cowichan",
    "08HB005": "Nanaimo",
    "08HB002": "Englishman",
    "08HB006": "Puntledge",
    "08HD005": "Quinsam",
    "08HA034": "Millstream"
}

# Loop through the station numbers and create plots
for station_number, river_name in station_titles.items():
    # Filter the DataFrame for the current station number
    df_filtered = sorted_df[sorted_df['STATION_NUMBER'] == station_number].reset_index()

    # Create a line chart with Index as x-axis
    chart1 = alt.Chart(df_filtered).mark_line(point=True).encode(
        x=alt.X('index:Q', title='Index'),  # Use the DataFrame index as x
        y=alt.Y('FlowValue:Q', title='FlowValue'),  # FlowValue as y
        tooltip=['YEAR', 'MONTH', 'FlowValue', 'index']  # Tooltip information
    ).properties(
        width=600,
        height=300,
        title=f'{river_name} River Flow at Station {station_number}'
    )
    chart1.display()

In [21]:
chart1