# Main API Synt+Mar[2]

This notebook contains all the main functions and pieces of code used to run Synt+Mar[2].

Also it contains the information of the assets used in the project (like stored data, geojson files, etc)

### Notebook preparation
This part contains all the imports and things to make this notebook work.

Those might be also useful to run the functions here, to explore this when you are getting the code into a script.

## Functions

In [3]:
import pandas as pd
from datetime import datetime
import plotly.graph_objects as go
import matplotlib.pyplot as plt


def fetch_buoy_data(buoy_df, start_time, end_time=None):
    # If end_time is not provided, use today's date
    if end_time is None:
        end_time = datetime.now().strftime('%Y-%m-%d')
    
    # Ensure start_time and end_time are in the correct format
    start_time = pd.to_datetime(start_time).strftime('%Y-%m-%d')
    end_time = pd.to_datetime(end_time).strftime('%Y-%m-%d')

    stations = buoy_df['Station'].unique()
    variables = ['WDIR', 'WSPD', 'GST', 'WVHT', 'DPD', 'APD', 'MWD', 'PRES', 'ATMP', 'WTMP', 'DEWP', 'VIS', 'PTDY']

    # Create a DataFrame to store availability
    availability = pd.DataFrame(index=stations, columns=variables, dtype=int)
    availability.fillna(0, inplace=True)

    # Dictionary to store data for each station
    station_data = {}

    for station in stations:
        try:
            # Fetch data for each station
            df = api.get_data(
                station_id=station,
                mode='stdmet',
                start_time=start_time,
                end_time=end_time,
            )
            
            # Store the data in the dictionary
            station_data[station] = df
            
            # Update availability DataFrame
            for var in variables:
                if var in df.columns and not df[var].isna().all():
                    availability.loc[station, var] = 1
        except Exception as e:
            print(f"Error fetching data for station {station}: {str(e)}")

    # Ensure all data in availability is numeric
    availability = availability.astype(int)

    return station_data, availability

def clean_station_data(station_data, availability):
    cleaned_data = {}
    
    for station, df in station_data.items():
        if station in availability.index:
            available_columns = availability.loc[station][availability.loc[station] == 1].index.tolist()
            
            # Keep only the available columns
            cleaned_df = df[available_columns].copy()
            
            # Ensure the index is datetime
            cleaned_df.index = pd.to_datetime(cleaned_df.index)
            
            # Remove rows where all data columns are NaN
            cleaned_df = cleaned_df.dropna(how='all')
            
            # Only add the station to cleaned_data if it has any remaining data
            if not cleaned_df.empty:
                cleaned_data[station] = cleaned_df
        else:
            print(f"Warning: Station {station} not found in availability DataFrame")
    
    return cleaned_data

def plot_availability_heatmap(availability):
    # Remove columns (variables) with no data
    availability = availability.loc[:, (availability != 0).any(axis=0)]
    # Create the heatmap
    fig = go.Figure(data=go.Heatmap(
        z=availability.values,
        x=availability.columns,
        y=availability.index,
        colorscale='YlGnBu',
        colorbar=dict(title='Data Available')
    ))
    
    # Update layout
    fig.update_layout(
        title='Data Availability by Station and Variable',
        xaxis_title='Variable',
        yaxis_title='Station',
        height=600,
        width=1000
    )
    
    # Show the plot
    fig.show()


def store_buoy_data(cleaned_station_data, availability_df):
    # Analyze timestamp range
    start_date = datetime.now()
    end_date = datetime.min
    
    for df in cleaned_station_data.values():
        if not df.empty:
            df_start = df.index.min()
            df_end = df.index.max()
            start_date = min(start_date, df_start)
            end_date = max(end_date, df_end)
    
    # Format dates for filename
    start_str = start_date.strftime('%Y%m%d')
    end_str = end_date.strftime('%Y%m%d')
    
    # Create filename
    filename = f'buoy_data_{start_str}_to_{end_str}.h5'
    
    # Save data to HDF5 file
    with pd.HDFStore(filename) as store:
        store['availability'] = availability_df
        for station, df in cleaned_station_data.items():
            store[f'station_{station}'] = df
    
    print(f"Data stored in {filename}")
    return filename

def load_buoy_data(filename):
    with pd.HDFStore(filename) as store:
        availability_df = store['availability']
        station_data = {}
        for key in store.keys():
            if key.startswith('/station_'):
                station = key.split('_', 1)[1]
                station_data[station] = store[key]
    
    return station_data, availability_df


def create_music_base_dict(ocean_dict, variable_ranges, availability, output_octaves):
    ocean_music_base_dict = {}

    for station in availability.index:
        if station not in ocean_dict:
            continue

        df = ocean_dict[station]
        # Create a copy of the original dataframe, but only with columns that exist in both df and availability
        available_columns = [col for col in availability.columns if col in df.columns]
        normalized_df = df[available_columns].copy()

        for variable in available_columns:
            if availability.loc[station, variable] == 1:
                if variable not in variable_ranges:
                    print(f"Warning: {variable} not found in variable_ranges. Skipping.")
                    continue

                min_val, max_val = variable_ranges[variable]
                
                # Normalize the data
                normalized_df[variable] = (df[variable] - min_val) / (max_val - min_val)
                
                # Scale to the desired number of octaves
                normalized_df[variable] = normalized_df[variable] * (output_octaves * 7)
                
                # Round to nearest integer, but keep NaN values
                normalized_df[variable] = normalized_df[variable].apply(lambda x: round(x) if pd.notnull(x) else x)

        ocean_music_base_dict[station] = normalized_df

    return ocean_music_base_dict

def plot_music_base_dict(ocean_music_base_dict):
    # Determine the number of stations
    n_stations = len(ocean_music_base_dict)
    
    # Calculate the grid dimensions
    n_cols = 3  # You can adjust this for a different layout
    n_rows = (n_stations - 1) // n_cols + 1

    # Create a new figure
    fig, axes = plt.subplots(n_rows, n_cols, figsize=(20, 5*n_rows))
    fig.suptitle("Normalized and Scaled Data for All Stations", fontsize=16)

    # Flatten the axes array for easier indexing
    axes = axes.flatten()

    for i, (station, df) in enumerate(ocean_music_base_dict.items()):
        ax = axes[i]
        
        # Plot each variable
        for column in df.columns:
            ax.plot(df.index, df[column], label=column)
        
        ax.set_title(f"Station: {station}")
        ax.set_xlabel("Time")
        ax.set_ylabel("Scaled Value")
        ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
        ax.grid(True)

        # Rotate x-axis labels for better readability
        plt.setp(ax.get_xticklabels(), rotation=45, ha='right')

    # Remove any unused subplots
    for j in range(i+1, len(axes)):
        fig.delaxes(axes[j])

    plt.tight_layout()
    plt.show()
    


## Assets

### GeoJson of the 5 seas

The file **5oceansByNick.geojson** contains a geojson map drawn manually to filter the buoys based on location

This webpage was used to create it
https://geojson.io

The next code is used to visulize the map

In [3]:
import json
import folium

# Load the GeoJSON data from the file
with open('5oceansByNick.geojson', 'r') as f:
    geojson_data = json.load(f)

# Create a Folium map centered around the average coordinates of the oceans
# Here I'm using [0, 0], but you may want to center it according to your data
m = folium.Map(location=[0, 0], zoom_start=2)

# Add the GeoJSON data to the Folium map
folium.GeoJson(
    geojson_data,
    name='Five Oceans',
    style_function=lambda feature: {
        'fillColor': 'blue',  # Color for the oceans
        'color': 'black',
        'weight': 1,
        'fillOpacity': 0.5,
    },
    tooltip=folium.GeoJsonTooltip(fields=('name',))  # Make sure there is a 'name' field in your GeoJSON
).add_to(m)

# Add Layer control
folium.LayerControl().add_to(m)

# Save the map to an HTML file
m.save('five_oceans_map.html')

m

### Buoys with ocean list

**buoys_with_ocean.csv** contains the list of buoys already divided in different oceans. it can be turn into a dataframe this way:

In [2]:
import pandas as pd

# ReRading the df
buoy_df = pd.read_csv('buoys_with_ocean.csv')
buoy_df.head()

Unnamed: 0,Station,Lat,Lon,Elevation,Name,Owner,Program,Ocean
0,13001,12.0,-23.0,0.0,NE Extension,Prediction and Research Moored Array in the At...,International Partners,Atlantic Ocean
1,13002,21.0,-23.0,0.0,NE Extension,Prediction and Research Moored Array in the At...,International Partners,Atlantic Ocean
2,13008,15.0,-38.0,0.0,Reggae,Prediction and Research Moored Array in the At...,International Partners,Atlantic Ocean
3,13009,8.0,-38.0,0.0,Lambada,Prediction and Research Moored Array in the At...,International Partners,Atlantic Ocean
4,14048,-8.0,65.0,0.0,,Research Moored Array for African-Asian-Austra...,International Partners,Indian Ocean


### Buoy data

The buoy data is stored using HDF5 format. you can use the functions to store and load them.

**buoy_data_20240601_to_20240915.h5** 

In [4]:
# Loading data
loaded_station_data, loaded_availability_df = load_buoy_data("buoy_data_20240601_to_20240915.h5")

In [5]:
plot_availability_heatmap(loaded_availability_df)

## Instrumentos_all.json

This file describes the input that each instruments from the BBC Symphony Orchesta Discover VST 

https://www.spitfireaudio.com/bbc-symphony-orchestra-discover

In [6]:
import json

# Load the JSON data from the file
with open('instrumentos_all.json', 'r', encoding='utf-8') as file:
    instrumentos = json.load(file)

# Now you can work with the data
for instrumento in instrumentos:
    print(f"Instrument: {instrumento['name']}")
    print(f"Range: {instrumento['low_note']} to {instrumento['high_note']}")
    print(f"MIDI Range: {instrumento['low_midi']} to {instrumento['high_midi']}")
    print(f"Frequency Range: {instrumento['low_freq']} Hz to {instrumento['high_freq']} Hz")
    print()

# You can also access specific instruments or properties
piano = next(instrumento for instrumento in instrumentos if instrumento['name'] == 'Piano')
print(f"Piano's highest note: {piano['high_note']}")

violin = next(instrumento for instrumento in instrumentos if instrumento['name'] == 'Violín')
print(f"Violin's lowest frequency: {violin['low_freq']} Hz")


Instrument: Piano
Range: A0 to C8
MIDI Range: 21 to 108
Frequency Range: 27.5 Hz to 4186.01 Hz

Instrument: Violín
Range: G3 to A7
MIDI Range: 55 to 105
Frequency Range: 196.0 Hz to 3520.0 Hz

Instrument: Viola
Range: C3 to E6
MIDI Range: 48 to 88
Frequency Range: 130.81 Hz to 1318.51 Hz

Instrument: Violonchelo
Range: C2 to G5
MIDI Range: 36 to 79
Frequency Range: 65.41 Hz to 783.99 Hz

Instrument: Contrabajo
Range: E1 to C5
MIDI Range: 28 to 72
Frequency Range: 41.2 Hz to 523.25 Hz

Instrument: Arpa
Range: Cb1 to G#7
MIDI Range: 23 to 103
Frequency Range: 32.7 Hz to 3322.44 Hz

Instrument: Flauta
Range: C4 to C7
MIDI Range: 60 to 96
Frequency Range: 261.63 Hz to 2093.0 Hz

Instrument: Flautín
Range: D5 to C8
MIDI Range: 74 to 108
Frequency Range: 587.33 Hz to 4186.01 Hz

Instrument: Oboe
Range: Bb3 to A6
MIDI Range: 58 to 93
Frequency Range: 233.08 Hz to 1760.0 Hz

Instrument: Corno Inglés
Range: E3 to G5
MIDI Range: 52 to 79
Frequency Range: 164.81 Hz to 783.99 Hz

Instrument: Clari

### Notas_midi.json

Is a list of the midi notes values and the actual note

In [7]:
import json

# Read the JSON file
with open('notas_midi.json', 'r') as file:
    data = json.load(file)

# Access the MIDI_Notes list
midi_notes = data['MIDI_Notes']

# Example usage: Print all MIDI notes and their corresponding musical notes
print("MIDI Notes and their corresponding musical notes:")
for note in midi_notes:
    print(f"MIDI Note: {note['MIDI_Note']}, Musical Note: {note['Note']}, Octave: {note['Octave']}")

# Example usage: Find a specific MIDI note
def find_note_by_midi(midi_number):
    for note in midi_notes:
        if note['MIDI_Note'] == midi_number:
            return note
    return None

# Example: Find MIDI note 60
midi_60 = find_note_by_midi(60)
if midi_60:
    print(f"\nMIDI Note 60 corresponds to: {midi_60['Note']} (Octave: {midi_60['Octave']})")
else:
    print("\nMIDI Note 60 not found in the data")

# Example usage: Find notes in a specific octave
def find_notes_in_octave(octave):
    return [note for note in midi_notes if note['Octave'] == octave]

# Example: Find all notes in octave 4
octave_4_notes = find_notes_in_octave(4)
print("\nNotes in Octave 4:")
for note in octave_4_notes:
    print(f"MIDI Note: {note['MIDI_Note']}, Musical Note: {note['Note']}")


MIDI Notes and their corresponding musical notes:
MIDI Note: 0, Musical Note: C-1, Octave: -1
MIDI Note: 1, Musical Note: C#-1/Db-1, Octave: -1
MIDI Note: 2, Musical Note: D-1, Octave: -1
MIDI Note: 3, Musical Note: D#-1/Eb-1, Octave: -1
MIDI Note: 4, Musical Note: E-1, Octave: -1
MIDI Note: 5, Musical Note: F-1, Octave: -1
MIDI Note: 6, Musical Note: F#-1/Gb-1, Octave: -1
MIDI Note: 7, Musical Note: G-1, Octave: -1
MIDI Note: 8, Musical Note: G#-1/Ab-1, Octave: -1
MIDI Note: 9, Musical Note: A-1, Octave: -1
MIDI Note: 10, Musical Note: A#-1/Bb-1, Octave: -1
MIDI Note: 11, Musical Note: B-1, Octave: -1
MIDI Note: 12, Musical Note: C0, Octave: 0
MIDI Note: 13, Musical Note: C#0/Db0, Octave: 0
MIDI Note: 14, Musical Note: D0, Octave: 0
MIDI Note: 15, Musical Note: D#0/Eb0, Octave: 0
MIDI Note: 16, Musical Note: E0, Octave: 0
MIDI Note: 17, Musical Note: F0, Octave: 0
MIDI Note: 18, Musical Note: F#0/Gb0, Octave: 0
MIDI Note: 19, Musical Note: G0, Octave: 0
MIDI Note: 20, Musical Note: G#