In [150]:
import csv

def read_and_convert(input_file, output_file, target_string):
    with open(input_file, 'r') as infile:
        lines = infile.readlines()

    # Find the index of the line containing the target string
    target_index = next((i for i, line in enumerate(lines) if target_string in line), None)

    if target_index is not None:
        # Keep only the lines starting from the line containing the target string
        lines = lines[target_index:]

        # Write remaining lines to a new CSV file
        with open(output_file, 'w', newline='') as csvfile:
            writer = csv.writer(csvfile, delimiter=',')
            for line in lines:
                # Split the line by whitespace and write to CSV
                writer.writerow(line.split())

        print("CSV file created successfully.")
    else:
        print("Target string not found in the file.")

# usage:
year = '2022'
input_file = f'./docs/{year}.txt'
output_file = f'./docs/{year}.csv'
target_string = 'DOY    WS    WD    AT     n'

read_and_convert(input_file, output_file, target_string)

CSV file created successfully.


In [162]:
for year in range(2013,2024):
    input_file = f'./docs/{year}.txt'
    output_file = f'./docs/{year}.csv'
    read_and_convert(input_file, output_file, target_string)

CSV file created successfully.
CSV file created successfully.
CSV file created successfully.
CSV file created successfully.
CSV file created successfully.
CSV file created successfully.
CSV file created successfully.
CSV file created successfully.
CSV file created successfully.
CSV file created successfully.
CSV file created successfully.


# Turbidity

Beach Name
Ohio Street Beach          16608
Calumet Beach               7570
Montrose Beach              7268
Osterman Beach              4022
63rd Street Beach           3419
Rainbow Beach               3297
Ohio Street Beach Buoy       365
New Site                      21

In [126]:
import pandas as pd
import plotly.express as px
t = pd.read_csv('./docs/Beach_Water_Quality_-_Automated_Sensors_20240308.csv')
t.drop(columns=['Measurement ID'], inplace=True)
t['Beach Name'].value_counts()
t = t[t['Beach Name']=="Ohio Street Beach"]
# t = t[t['Beach Name']=="Montrose Beach"]
t.drop(columns=['Beach Name'], inplace=True)
t = t[t['Turbidity']!=-100000.0	]
t['Date'] = pd.to_datetime(t['Measurement Timestamp'].str.extract('(\d{2}/\d{2}/\d{4})')[0], format='%m/%d/%Y')
t['Date'] = t['Date'].dt.date
t.drop(columns=['Measurement Timestamp'], inplace=True)
t.drop(columns=['Measurement Timestamp Label'], inplace=True)

daily_averages = t.groupby('Date').mean()
daily_averages = daily_averages.sort_values(by="Date", ascending=False)
daily_averages

Unnamed: 0_level_0,Water Temperature,Turbidity,Transducer Depth,Wave Height,Wave Period,Battery Life
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-08-27,24.220000,1.060000,,0.181600,4.200000,11.000000
2021-08-26,23.991667,19.620417,,0.139375,4.500000,11.041667
2021-08-25,24.050000,43.635833,,0.153292,4.125000,11.187500
2021-08-24,23.545833,12.837083,,0.145042,4.708333,11.312500
2021-08-23,22.450000,1.565833,,0.206250,4.833333,11.416667
...,...,...,...,...,...,...
2014-06-08,16.658333,6.208333,1.661250,0.321125,4.416667,11.945833
2014-06-07,17.412500,0.963333,1.528458,0.127125,2.875000,12.145833
2014-06-06,18.587500,0.878750,1.529500,0.138375,2.375000,12.312500
2014-06-05,16.900000,1.600000,1.780000,0.159000,3.000000,12.800000


In [127]:
fig = px.line(
    daily_averages, 
    x=daily_averages.index, 
    y='Turbidity', 
    # color='Wave Height', 
    template='seaborn',
    title='Turbidity'
    )
fig.show()

# Wind

Input File=/chi/archive/chi2023.04t.avg
WS=Wind Speed (m/s)
WD=Wind Dir (deg)
AT=Air Temp (C)

Year=2023
Min WS =    0.0 m/s   Max WS =   27.8 m/s
Min WD =    0.0 deg   Max WD =  360.0 deg
Min AT =  -16.0 C     Max AT =   36.6 C

In [37]:
import pandas as pd
import plotly.express as px

year = '2021'
wind_csv = f'./docs/{year}.csv'
wind = pd.read_csv(wind_csv)
wind['WS'] = wind['WS'] * 2.23694       # m/s to mi/hr
wind['AT'] = wind['AT'] * (9/5) + 32    # C to F
wind

Unnamed: 0,DOY,WS,WD,AT,n
0,1,16.441509,92,30.956,719
1,2,15.792796,351,33.188,719
2,3,9.775428,291,32.972,719
3,4,14.294047,202,27.968,719
4,5,10.625465,305,31.514,719
...,...,...,...,...,...
360,361,21.429885,127,40.064,719
361,362,16.061229,114,35.474,719
362,363,13.265054,288,35.798,719
363,364,10.893898,95,34.484,719


## Wind Direction 360

In [39]:
def degrees_to_cardinal(direction_degrees):
    cardinal_directions = ['N', 'NNE', 'NE', 'ENE', 'E', 'ESE', 'SE', 'SSE', 'S', 'SSW', 'SW', 'WSW', 'W', 'WNW', 'NW', 'NNW']
    index = round(direction_degrees / (360. / len(cardinal_directions)))
    return cardinal_directions[(index % len(cardinal_directions))]
wind['Wind_Direction'] = wind['WD'].apply(degrees_to_cardinal)

wind_grouped = wind.groupby('Wind_Direction').agg({'WS':'mean','DOY':'count'})
wind_grouped['Frequency'] = wind_grouped['DOY']/365
wind_grouped.columns = ['Wind_Speed','Day_Count','Frequency']

desired_order = ['N', 'NNE', 'NE', 'ENE', 'E', 'ESE', 'SE', 'SSE', 'S', 'SSW', 'SW', 'WSW', 'W', 'WNW', 'NW', 'NNW']
wind_grouped = wind_grouped.reindex(desired_order)

display(wind_grouped)

fig = px.bar_polar(wind_grouped, r="Frequency", theta=wind_grouped.index,
                   color="Wind_Speed", 
                   color_discrete_sequence= px.colors.sequential.Greens,
                  #  template="seaborn",
                   title=f'{year} Wind Direction Frequency'
                #    start_angle=-203
                   )
fig.update_layout(title_x=0.5)  # Center the title

fig.show()

Unnamed: 0_level_0,Wind_Speed,Day_Count,Frequency
Wind_Direction,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
N,14.71712,23,0.063014
NNE,14.306573,25,0.068493
NE,16.319135,17,0.046575
ENE,11.572969,14,0.038356
E,11.970425,16,0.043836
ESE,12.500766,12,0.032877
SE,12.775559,17,0.046575
SSE,12.691691,19,0.052055
S,15.524861,45,0.123288
SSW,18.441198,33,0.090411


## Daily Alert

In [38]:
wind = wind.sort_values(by='DOY',ascending=True)

# surf
mask1 = (wind['WS'] > 14)
mask2 = ((wind['WD'] > 5) & (wind['WD'] < 75))
ideal_surf_count = len(wind[mask1 & mask2]['DOY'])

# dive
mask3 = (wind['WS'] <= 10)
mask4 = ((wind['WD'] > 135) | (wind['WD'] < 45))
mask5 = (wind['DOY']>120)
ideal_dive_count = len(wind[mask3 & mask4 & mask5]['DOY'])

fig = px.line(wind, x='DOY',y='WS', template="seaborn", title=f"Lake Michigan's Ideal Chicago Conditions in {year}")
                   
# add ideal day indicators
fig.add_scatter(x=wind[mask1 & mask2]['DOY'], y=wind[mask1 & mask2]['WS'], mode='markers', marker=dict(color='gray', size=10), name=f"{ideal_surf_count} ideal surf days")
fig.add_scatter(x=wind[mask3 & mask4 & mask5]['DOY'], y=wind[mask3 & mask4 & mask5]['WS'], mode='markers', marker=dict(color='blue', size=10), name=f"{ideal_dive_count} ideal dive days")

# make seasonal divisions
season_cutoffs = {
    'Winter': [355, 78],  # Example: Winter from DOY 1 to 80
    'Spring': [79, 170],  # Example: Spring from DOY 81 to 170
    'Summer': [171, 264],  # Example: Summer from DOY 171 to 260
    'Fall': [265, 354]  # Example: Fall from DOY 261 to 365
}

for season, cutoffs in season_cutoffs.items():
    fig.add_annotation(x=cutoffs[0], y=35, text=season, showarrow=False)
    fig.add_shape(type="line", x0=cutoffs[0], y0=0, x1=cutoffs[0], y1=30, line=dict(color="red", width=1, dash="dash"))

# Updating x and y axis labels
fig.update_xaxes(title_text='Day of Year')
fig.update_yaxes(title_text='Wind Speed (mph)')

fig.show()

## Surf vs. Dive 10 yr histogram

In [40]:
import pandas as pd
import plotly.express as px

def degrees_to_cardinal(direction_degrees):
    cardinal_directions = ['N', 'NNE', 'NE', 'ENE', 'E', 'ESE', 'SE', 'SSE', 'S', 'SSW', 'SW', 'WSW', 'W', 'WNW', 'NW', 'NNW']
    index = round(direction_degrees / (360. / len(cardinal_directions)))
    return cardinal_directions[(index % len(cardinal_directions))]


historic_data_list = []
for year in range(2013, 2024):
    wind_csv = f'./docs/{year}.csv'
    wind = pd.read_csv(wind_csv)
    wind['WS'] = wind['WS'] * 2.23694       # m/s to mi/hr
    wind['AT'] = wind['AT'] * (9/5) + 32    # C to F
    wind['Wind_Direction'] = wind['WD'].apply(degrees_to_cardinal)
    wind['Year'] = year  
    historic_data_list.append(wind)

historic_data = pd.concat(historic_data_list, ignore_index=True)
historic_data.to_csv("./docs/historic_data.csv", index=False)
historic_data

Unnamed: 0,DOY,WS,WD,AT,n,Wind_Direction,Year
0,1,14.965129,328,20.660,288,NNW,2013
1,2,15.636211,251,19.130,288,WSW,2013
2,3,20.400893,249,26.042,288,WSW,2013
3,4,20.221938,249,23.990,288,WSW,2013
4,5,16.173076,224,30.812,288,SW,2013
...,...,...,...,...,...,...,...
4002,361,12.146584,59,40.154,718,ENE,2023
4003,362,22.973374,353,40.352,719,N,2023
4004,363,21.004867,4,41.360,719,N,2023
4005,364,14.987498,252,34.700,719,WSW,2023


In [41]:
# surf
mask1 = (historic_data['WS'] > 14)
mask2 = ((historic_data['WD'] > 5) & (historic_data['WD'] < 75))
ideal_surf_df = historic_data[mask1 & mask2]
display(ideal_surf_df)

# dive
mask3 = (historic_data['WS'] <= 10)
mask4 = ((historic_data['WD'] > 135) | (historic_data['WD'] < 45))
mask5 = (historic_data['DOY']>120)
ideal_dive_df = historic_data[mask3 & mask4 & mask5]
display(ideal_dive_df)

# add ideal day indicators
# fig.add_scatter(x=wind[mask1 & mask2]['DOY'], y=wind[mask1 & mask2]['WS'], mode='markers', marker=dict(color='gray', size=10), name=f"{ideal_surf_count} ideal surf days")
# fig.add_scatter(x=wind[mask3 & mask4 & mask5]['DOY'], y=wind[mask3 & mask4 & mask5]['WS'], mode='markers', marker=dict(color='blue', size=10), name=f"{ideal_dive_count} ideal dive days")


Unnamed: 0,DOY,WS,WD,AT,n,Wind_Direction,Year
51,52,14.786173,17,24.800,288,NNE,2013
56,57,24.919512,56,33.350,288,NE,2013
74,75,17.873151,7,31.964,288,N,2013
75,76,16.620464,22,29.858,288,NNE,2013
82,83,20.870650,55,32.432,288,NE,2013
...,...,...,...,...,...,...,...
3902,261,14.182200,8,64.202,719,N,2023
3912,271,14.137461,62,65.750,719,ENE,2023
3928,287,31.764548,66,56.066,719,ENE,2023
3943,302,21.027236,31,51.296,719,NNE,2023


Unnamed: 0,DOY,WS,WD,AT,n,Wind_Direction,Year
126,127,9.529364,340,49.190,288,NNW,2013
128,129,7.806921,147,58.334,288,SSE,2013
136,137,8.276678,5,52.394,288,N,2013
144,145,6.397648,15,46.202,288,NNE,2013
145,146,7.627965,35,50.234,288,NE,2013
...,...,...,...,...,...,...,...
3861,220,9.864905,266,73.778,719,W,2023
3863,222,9.529364,257,73.472,719,WSW,2023
3905,264,6.241063,139,70.646,719,SE,2023
3915,274,9.887275,145,68.900,719,SE,2023


In [43]:
import plotly.graph_objects as go

import numpy as np

x0 = ideal_surf_df['DOY']
# Add 1 to shift the mean of the Gaussian distribution
x1 = ideal_dive_df['DOY']

fig = go.Figure()
fig.add_trace(go.Histogram(x=x0, name='Ideal Surf Days', marker_color="grey"))
fig.add_trace(go.Histogram(x=x1, name='Ideal Dive Days', marker_color="blue"))

season_cutoffs = {
    'Winter': [355, 78],  # Example: Winter from DOY 1 to 80
    'Spring': [79, 170],  # Example: Spring from DOY 81 to 170
    'Summer': [171, 264],  # Example: Summer from DOY 171 to 260
    'Fall': [265, 354]  # Example: Fall from DOY 261 to 365
}

for season, cutoffs in season_cutoffs.items():
    fig.add_vline(x=cutoffs[0], line_dash="dash", line_color="red", annotation_text=season, annotation_position="top left")

fig.update_xaxes(title_text='Day of Year')
# fig.update_yaxes(title_text='Ideal Days')


# Overlay both histograms
fig.update_layout(barmode='overlay')
# Reduce opacity to see both histograms
fig.update_traces(opacity=0.75)

fig.update_layout(title="Annual Ideal Surf/Dive Day Distribution | 10 Years")

fig.update_layout(title_x=0.5)  # Center the title

fig.show()

## surf vs. dive annual days

In [47]:
import pandas as pd
import plotly.express as px

historic_data = pd.read_csv('./docs/historic_data.csv')

# results_df = pd.DataFrame(columns=['Year', 'Ideal_Surf_Count', 'Ideal_Dive_Count'])
results = []

for year in range(2013,2024):
    wind = historic_data[historic_data['Year']==year]
    # surf
    mask1 = (wind['WS'] > 14)
    mask2 = ((wind['WD'] > 5) & (wind['WD'] < 75))
    ideal_surf_count = len(wind[mask1 & mask2]['DOY'])

    # dive
    mask3 = (wind['WS'] <= 10)
    mask4 = ((wind['WD'] > 135) | (wind['WD'] < 45))
    mask5 = (wind['DOY']>120)
    ideal_dive_count = len(wind[mask3 & mask4 & mask5]['DOY'])

    # print(year, ideal_surf_count, ideal_dive_count)
    # Append to results list
    results.append({'Year': year, 'Ideal_Surf_Count': ideal_surf_count, 'Ideal_Dive_Count': ideal_dive_count})

# Convert list of dictionaries to DataFrame
results_df = pd.DataFrame(results)
fig = px.line(results_df, x='Year',y=['Ideal_Surf_Count','Ideal_Dive_Count'], 
              template="seaborn", 
              title=f"Chicago's Ideal Lake Conditions Each Year | 10 Years",
              color_discrete_map={'Ideal_Surf_Count': 'grey', 'Ideal_Dive_Count': 'blue'})
fig.update_yaxes(title_text='Days')  # Customize y-axis label
fig.show()

# correlate turbidity to wind speed