# Introduction

The notebook is intended to predict the chance failure of a specific product code given its attributes and measurements.

This is the [dataset used](https://www.kaggle.com/competitions/tabular-playground-series-aug-2022).

In [None]:
# Import Standard Libraries
import pandas as pd
import numpy as np

import plotly.express as ex
from plotly.io import read_json

# Read Data

In [None]:
# Read train & test data
train_data = pd.read_csv('./../../data/2022_08/train.csv')
test_data = pd.read_csv('./../../data/2022_08/test.csv')

In [None]:
train_data.info()

In [None]:
train_data.head()

# Exploratory Data Analysis

## id

In [None]:
# Check for duplicates
print('ID duplicates: {}'.format(len(train_data[train_data['id'].duplicated()])))

## Product Code

In [None]:
# Check for NaN values
print('Product Code NaN values: {}'.format(len(train_data[train_data['product_code'].isna()])))

In [None]:
# Plot the Histogram of 'product_code' Distribution
figure = ex.histogram(train_data, 
                      x='product_code', 
                      title='Product Code Distribution', 
                      labels={'product_code':'Product Code',
                              'count': 'Share'},
                      color_discrete_sequence=['darkgreen'],
                      height=500,
                      histnorm='',
                      template='plotly_dark')

figure.update_layout(yaxis_title='Share', 
                     font=dict(family="PT Sans", 
                               size=14), 
                     title_font=dict(family="PT Sans",
                                     size=30), 
                     title_x=0.7)

# Save figure
figure.write_json("./plots/product_code_distribution.json")

# Read & plot figure
read_json('./plots/product_code_distribution.json').show()

## loading

In [None]:
# Plot boxplot of 'loading'
figure = ex.box(train_data, 
                x='loading', 
                color_discrete_sequence=['darkgreen']
                template='plotly_dark')



# Save figure
figure.write_json("./plots/product_code_distribution.json")

# Read & plot figure
read_json('./plots/product_code_distribution.json').show()