In [1]:
import pandas as pd
import numpy as np
import os
import csv
import json

# Loading Datasets

### Downtown Docks 2019 Boaters Boats Dataset

In [2]:
bb_df = pd.read_csv("Resources/boaters_boats.csv", encoding='ISO-8859-1')
bb_df.shape

(2733, 41)

### Master Boat Database

In [3]:
mbd = pd.read_csv("Resources/master_boat_db_edit.csv", encoding='ISO-8859-1')
mbd.shape

(591435, 18)

# Rename and Format Columns

#### Downtown Docks 2019 Boaters Boats Dataset

In [4]:
bb_df = bb_df.rename(columns={
    "Boat Make"	: "Boat_Make",
})

#### Master Boat Database

In [5]:
mbd = mbd.rename(columns={
    "make" : "Boat_Make",
})

# Reference Columns

In [6]:
bb_df = bb_df[["Country", "Boat_Make"]]
bb_df = bb_df.dropna()
bb_df.shape

(2710, 2)

#### Dropping Duplicates

In [7]:
unique_bb_df = bb_df.drop_duplicates()
unique_bb_df.shape

(120, 2)

# Grouped By Boat Make and Sorted by Freq (Amount of Reservation Made)

In [8]:
gp_bb_df = bb_df.groupby('Country').describe()
gp_bb_df = gp_bb_df.sort_values(('Boat_Make', 'freq'), ascending=False).reset_index()
gp_bb_df

Unnamed: 0_level_0,Country,Boat_Make,Boat_Make,Boat_Make,Boat_Make
Unnamed: 0_level_1,Unnamed: 1_level_1,count,unique,top,freq
0,USA,2261,95,Sea Ray Boats,1090
1,England,103,3,Sunseeker International,101
2,France,70,3,Four Winns,66
3,Italy,211,9,Prestige,61
4,Netherlands,41,3,Galeon,35
5,Japan,9,1,Yamaha,9
6,Canada,4,1,Doral,4
7,Poland,4,1,Axopar Boats,4
8,Australia,3,1,Riviera,3
9,Sweden,2,1,Albin Marine,2


## Description of groupby labels
#### Count - How many total boats made reservation
#### Unique - How many type of boats per country made reservation
#### Top - Show the type of boat per country that made the most reservation
#### Freq - Amount of reservation made per top boat per country

# Merging Datasets
#### Merging datasets on Boat Make to add country into the Master Dataset

In [9]:
boat_master = pd.merge(mbd, unique_bb_df, how='inner', on="Boat_Make")

#### Moved the Country column to the beginning

In [14]:
boat_master = boat_master[["boat_id", "Country", "boat_type",	"year", "Boat_Make", "model", "model_type",	"length_overall", "yield_length", "beam", "draft", "height", "hull", "engine", "hp", "weight", "fuel_type",	"boat_image", "active"]]
boat_master.rename(columns={
    "Boat_Make"	: "make",
})
boat_master.shape

(32258, 19)

# Saving DataFrame as csv file on desktop

#### Dockdocks Boat Report 2019

In [15]:
gp_bb_df.to_csv("Resources/dtdock_boat_report_2019.csv")

#### Boat Master Database

In [16]:
boat_master.to_csv("Resources/boat_master.csv")

# Saving DataFrame as json file on desktop

In [17]:
boat_master.to_json("static/js/boat_master.json")