# imports

In [1]:
from datetime import datetime
import pandas as pd
import matplotlib.pyplot as plt
import geopandas as gpd
from shapely.geometry import Point
import copy
import numpy as np
import os
from display_aux import *
from vessels import VESSELES
from df_aux import *


In [2]:
# Set display options to show all rows and columns
pd.set_option('display.max_rows', 10)
pd.set_option('display.max_columns', None)


# Run definitions

In [3]:

params = {}
# params['input_csv_file_name_full'] = 'C:\\gilad\\work\\tip_and_que\\data\\AIS\\TipandCue_DataSample_CSV\\exactEarth_historical_data_02_2023.csv'
params['input_csv_file_name_full'] = 'debug_data_base.csv'

params['min_date'] = None
params['max_date'] = None
params['columns_list_keep'] = ['Time','MMSI','IMO','Vessel_Name','Ship_Type','Longitude','Latitude','Message_ID','Accuracy','Heading','COG','Fixing_device','Destination_ID','offset1','Offset_2','Offset_3','Offset_4','ATON_type','ATON_name','GNSS_status']
params['filter_vessels_df_dic'] = {
          'max_time_diff[mins]':['<=',30]
          }
params['reload'] = False
params['export_to_excel'] = False


# Functions

## df aux functions

## Plot Functions

In [5]:
import matplotlib.pyplot as plt
import pandas as pd
from matplotlib.ticker import MaxNLocator
def plot_df_column_vs_time(df,column_name,time_column_name='Time'):

    # Sample data
    dates = df[time_column_name].dt.strftime('%Y-%m-%d %H:%M:%S').to_list()
    values = df[column_name]

    # Create a figure with a larger size
    plt.figure(figsize=(7, 4))

    # Create a line plot
    plt.plot(dates, values)

    # Rotate x-tick labels by 45 degrees and change their font size
    plt.xticks(rotation=45, fontsize=12, ha='right')


    # Use MaxNLocator to reduce the number of ticks
    ax = plt.gca()
    ax.xaxis.set_major_locator(MaxNLocator(nbins=5))  # Adjust the number of bins as needed

    # Add labels and title
    plt.xlabel('Date and Time')
    plt.ylabel('Values')
    plt.title('Plot with Rotated and Formatted x-tick Labels')

    # Add grid
    plt.grid(True)

    # Adjust layout to prevent clipping of tick-labels
    plt.tight_layout()

    # Optionally adjust the subplots manually
    plt.subplots_adjust(bottom=0.2)

    # Show the plot
    plt.show()



In [9]:
# Create an instance of the VESSELES class
vessels = VESSELES()


# Read the file

In [10]:
df = vessels.load_data(params['input_csv_file_name_full'],columns_list_keep=params['columns_list_keep'],reload=params['reload'])


Loading DataFrame from debug_data_base.pkl
df has 309420 lines./ncolumns are:['Time', 'MMSI', 'IMO', 'Vessel_Name', 'Ship_Type', 'Longitude', 'Latitude', 'Message_ID', 'Accuracy', 'Heading', 'COG', 'Fixing_device', 'Destination_ID', 'offset1', 'Offset_2', 'Offset_3', 'Offset_4', 'ATON_type', 'ATON_name', 'GNSS_status']


## create debug data set

In [11]:
# get_min_max_dates(df)
# filter_dic = {'Time':['between',('2023-02-01 00:00:01','2023-02-01 12:00:01')]}

# df_filt = filter_df(df,filter_dic)


# get_min_max_dates(df_filt)
# print(df_filt.shape)
# df_aux.export_df(df_filt,'debug_data_base.csv') 
# df1 = vessels.load_data('debug_data_base.csv',columns_list_keep=params['columns_list_keep'],reload=True)


# Export to exell

In [12]:
if (params['export_to_excel']):
    df_aux.export_df(df, os.path.basename(params['input_csv_file_name_full'].replace('csv','xlsx')),num_lines=10000)


# create data_dic and info_df

In [13]:
vesseles_data_dic = vessels.create_data_dic(df)        
vessels_info_df,prob_MMSI = vessels.create_info_df(num_lines=100)
# vessels.get_info_df_summary()


creating data_dic
create info_df
processing MMSI 0 out of 100
total number of MMSI:100
28 MMSI's passed
72 MMSI's failed


# get a vessel data

In [14]:
# vessel_data = get_vessel_data(vessels.data_dic,422)
vessel_data = vessels.get_vessel_data(vessels_info_df.index[2])
vessel_data.shape


(121, 20)

## filtering the vessles Df

In [15]:
filter_dic = {
        'len':['between',(700,800)],
        'min_Longitude': ('between', (40.0, 50.0)),        
      #   'mean_time_diff[mins]':['==',13],
        'min_time':['between',('2023-02-01 00:00:00','2023-02-01 00:01:39')]
              }

filter_dic = {
          'max_time_diff[mins]':['<=',30]
          }

vessels_df_filt = filter_df(vessels.info_df, filter_dic)


# Save the vessel_data to a jason file

 

In [16]:
vessel_data_jason_file_path = '.\\data'

In [17]:

save_vessels_data_to_geojson(vessels_df_filt,vessel_data_dic,vessel_data_jason_file_path)



NameError: name 'save_vessels_data_to_geojson' is not defined

## plot some statistis

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

def plot_vessels_df_statistics(df, columns, bins=10):
    """
    Plots histograms of specified columns in a DataFrame as subplots.

    df (pd.DataFrame): The DataFrame containing the data.
    Parameters:
    columns (list): List of column names to plot.
    bins (int): Number of bins for the histograms.
    """
    num_columns = len(columns)
    num_rows = (num_columns + 1) // 2  # Calculate number of rows needed for subplots

    fig, axes = plt.subplots(num_rows, 2, figsize=(12, num_rows * 4))
    axes = axes.flatten()  # Flatten the axes array to easily iterate over it

    for i, column in enumerate(columns):
        try:
            if column in df.columns:
                axes[i].hist(df[column], bins=bins, edgecolor='black')
                axes[i].set_title(f'Histogram of {column}')
                axes[i].set_xlabel(column)
                axes[i].set_ylabel('Frequency')
            else:
                axes[i].text(0.5, 0.5, f'Column {column} not found', ha='center', va='center')
                axes[i].set_title(f'Histogram of {column}')
                axes[i].set_xlabel(column)
                axes[i].set_ylabel('Frequency')
        except:
            print(f'could not plot {column}')
            
    # Remove any unused subplots
    for j in range(i + 1, len(axes)):
        fig.delaxes(axes[j])

    plt.tight_layout()
    plt.show()
    

columns_to_plot = ['field1', 'field2', 'field3']

plot_vessels_df_statistics(vessels_df, ['len','min_time_diff[mins]'], bins=100)


NameError: name 'vessels_df' is not defined

# Save the vessel_data to a jason file

 

In [None]:
vessel_data_jason_file_path = '.\\data'

In [None]:

save_vessels_data_to_geojson(vessels_df_filt,vessel_data_dic,vessel_data_jason_file_path)



# plots of vessel data

In [None]:

vessel_data = get_vessel_data(vessel_data_dic,MMSI)
plot_df_column_vs_time(vessel_data,column_name='Longitude')


In [None]:
grouped = df.groupby('MMSI')

# Create a dictionary to store each vessel's data
vessel_data_dic = {vessel_MMSI: group for vessel_MMSI, group in grouped}
