# Histogram for Each Ship Type for Multiple Locations

Histograms for the speed of the ship and distance from hydrophones

In [2]:
# data
import ooipy
from ooipy.tools import ooiplotlib as ooiplt
import pandas as pd
import numpy as np
#import functions as fn
# plotting
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import matplotlib.colors as colors
from matplotlib.colors import Normalize
from matplotlib.backends.backend_agg import FigureCanvasAgg
# IO
from io import BytesIO
import io
import json

In [None]:
df = pd.read_csv('data/Axial_Base/AB_isolated_ais_10m_5_20.csv', sep=',')
grouped_df = df.dropna(subset=['ud_group']).groupby('ud_group')

for group_name, df_group in grouped_df:
    print(group_name)
    data = df_group['mean_speed(KNOTSx10)'].values
    print(data.shape)
    plt.hist(data)
    plt.xlabel('Mean Speed (Knots x 10)')
    plt.ylabel('Frequency')
    plt.title(group_name + ' Ship Mean Speed Distribution')
    plt.show()

In [29]:
def plot_speed_distribution(meta_data_CSV_path, location):
    df = pd.read_csv(meta_data_CSV_path, sep=',')
    grouped_df = df.dropna(subset=['ud_group']).groupby('ud_group')

    for group_name, df_group in grouped_df:
        #print(group_name)
        data = df_group['mean_speed(KNOTSx10)'].values
        #print(data.shape)
        plt.hist(data)
        plt.xlabel('Mean Speed (Knots x 10)')
        plt.ylabel('Frequency')
        plt.title(group_name + ' Ship Mean Speed Distribution')
        #plt.show()
        filepath = 'data/'+location+'/'
        plt.savefig(filepath+group_name+'_speed_distribution.jpg', format='jpg')
        plt.close()

def plot_speed_distribution_SOG(meta_data_CSV_path, location):
    df = pd.read_csv(meta_data_CSV_path, sep=',')
    grouped_df = df.dropna(subset=['ud_group']).groupby('ud_group')

    for group_name, df_group in grouped_df:
        #print(group_name)

        if (location=='Eastern_Caldera'):
            data = df_group['SPEED (KNOTSx10)'].values
            #print(data.shape)
            plt.hist(data)
            plt.xlabel('Speed (Knots x 10)')
            plt.ylabel('Frequency')
            plt.title(group_name + ' Ship Speed Distribution')
            #plt.show()
            filepath = 'data/'+location+'/'
            plt.savefig(filepath+group_name+'_speed_distribution.jpg', format='jpg')
            plt.close()
        else:
            data = df_group['SOG'].values
            #print(data.shape)
            plt.hist(data)
            plt.xlabel('SOG')
            plt.ylabel('Frequency')
            plt.title(group_name + ' Ship SOG Distribution')
            #plt.show()
            filepath = 'data/'+location+'/'
            plt.savefig(filepath+group_name+'_speed_distribution.jpg', format='jpg')
            plt.close()
        
def plot_dist_distribution(meta_data_CSV_path, location):
        df = pd.read_csv(meta_data_CSV_path, sep=',')
        grouped_df = df.dropna(subset=['ud_group']).groupby('ud_group')

        for group_name, df_group in grouped_df:
            #print(group_name)
            data = df_group['mean_distance(in kms)'].values
            #print(data.shape)
            plt.hist(data)
            plt.xlabel('Mean Distance (km)')
            plt.ylabel('Frequency')
            plt.title(group_name + ' Ship Mean Distance Distribution')
            #plt.show()
            filepath = 'data/'+location+'/'
            plt.savefig(filepath+group_name+'_distance_distribution.jpg', format='jpg')
            plt.close()

def plot_dist_distribution_SOG(meta_data_CSV_path, location):
    df = pd.read_csv(meta_data_CSV_path, sep=',')
    grouped_df = df.dropna(subset=['ud_group']).groupby('ud_group')

    for group_name, df_group in grouped_df:
        data = df_group['distance(in km)'].values
        #print(data.shape)
        plt.hist(data)
        plt.xlabel('Distance (km)')
        plt.ylabel('Frequency')
        plt.title(group_name + ' Ship Distance Distribution')
        #plt.show()
        filepath = 'data/'+location+'/'
        plt.savefig(filepath+group_name+'_distance_distribution.jpg', format='jpg')
        plt.close()

In [33]:
# Axial Base
plot_speed_distribution('data/Axial_Base/AB_isolated_ais_10m_5_20.csv', 'Axial_Base')
plot_dist_distribution('data/Axial_Base/AB_isolated_ais_10m_5_20.csv', 'Axial_Base')

# Central Caldera
plot_speed_distribution('data/Central_Caldera/CC_isolated_ais_10m_5_20.csv', 'Central_Caldera')
plot_dist_distribution('data/Central_Caldera/CC_isolated_ais_10m_5_20.csv', 'Central_Caldera')

# Eastern Caldera
plot_speed_distribution_SOG('data/Eastern_Caldera/EC_isolated_ais_5_20.csv', 'Eastern_Caldera')
plot_dist_distribution_SOG('data/Eastern_Caldera/EC_isolated_ais_5_20.csv', 'Eastern_Caldera')

# Oregon Slope
plot_speed_distribution_SOG('data/Oregon_Slope/OS_isolated_ais_5_20.csv', 'Oregon_Slope')
plot_dist_distribution_SOG('data/Oregon_Slope/OS_isolated_ais_5_20.csv', 'Oregon_Slope')

# Southern Hydrate
plot_speed_distribution_SOG('data/Southern_Hydrate/SH_isolated_ais_5_20.csv', 'Southern_Hydrate')
plot_dist_distribution_SOG('data/Southern_Hydrate/SH_isolated_ais_5_20.csv', 'Southern_Hydrate')
