# <span style='font-family:Lato,sans-serif;color:#55b748;font-size: 1em;'> [Project Name] Exploratory Data Analysis

<font color='#78c26d'>author</font> João Baiochi  
<font color='#78c26d'>github</font> <a href='https://github.com/baiochi'>@baiochi</a>  
<font color='#78c26d'>dataset source</font> <a href='https://www.kaggle.com'>Kaggle</a>


# <a id='imports' style='font-family:Lato,sans-serif;color:#fdbf11;font-size: 0.8em;'> Libraries and Configurations

In [7]:
# Aesthetics and utils
import os
from IPython.core.display import display

# Data maniputalion
from datetime import datetime
import re
import numpy as np
import pandas as pd

# Pandas config
pd.set_option('display.float_format', lambda x: '%.2f' % x) # supress scientific notation
pd.set_option('display.max_columns', 100)                   # increase max columns displayed

# Saving binary files
import pickle

# Data visualization
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
import plotly.express as px
import plotly.graph_objects as go

# Machine Learning
import sklearn

# Disable warnings - useless most of the time
import warnings
warnings.filterwarnings('ignore')

# Terminal colors
WHITE = '\033[39m'
CYAN = '\033[36m'
GREEN = '\033[32m'
RED = '\033[31m'

# Color pallete for plotting
colors = {
    'cyan': '#1696d2',
    'gray': '#5c5859',
    'black': '#000000',
    'yellow': '#fdbf11',
    'orange': '#ca5800',
    'magenta': '#af1f6b',
    'green': '#408941',
    'red': '#a4201d'
}

# Check Libraries version
print(f'Numpy: {np.__version__}')
print(f'Pandas: {pd.__version__}')
print(f'Sklearn: {sklearn.__version__}')
print(f'Matplotlib: {matplotlib.__version__}')
print(f'Seaborn: {sns.__version__}')
print(datetime.now().strftime('Last run on %d/%m/%Y at %H:%M:%S'))


# Markdown styling
# header 1 - yellow 
# <a style='font-family:Lato,sans-serif;color:#fdbf11;font-size: 1em;'>
# header 2 - blue #46abdb
## <a style='font-family:Lato,sans-serif;color:#1696d2;font-size: 1em;'>

Numpy: 1.20.3
Pandas: 1.3.4
Sklearn: 1.0.2
Matplotlib: 3.4.3
Seaborn: 0.11.2
Last run on 22/03/2022 at 14:40:06


# <span style='font-family:Lato,sans-serif;color:#fdbf11;font-size: 0.8em;'>Table of Contents

1. [Data Overview](#overview)
2. [Reading Data](#read)  
3. [Checking Null Values](#na)  
4. [Features Types](#ftypes)  
	- [Numerical Features](#num_features)
	- [Categorical Features](#cat_features)
5. [Data Correlation](#corr)
6. [Target Distribuition](#target)
7. [Domain Knowledge Questions](#dkq)
8. [Feature Engineering](#feat_eng)

# <a id='overview' style='font-family:Lato,sans-serif;color:#fdbf11;font-size: 0.8em;'>1. Data Overview

# <a id='read' style='font-family:Lato,sans-serif;color:#fdbf11;font-size: 0.8em;'>2. Reading Data

# <a id='na' style='font-family:Lato,sans-serif;color:#fdbf11;font-size: 0.8em;'>3. Checking Null Values

# <a id='ftypes' style='font-family:Lato,sans-serif;color:#fdbf11;font-size: 0.8em;'>4.Features Types

## <a id='num_features' style='font-family:Lato,sans-serif;color:#1696d2;font-size: 0.8em;'> Numerical Features

## <a id='cat_features' style='font-family:Lato,sans-serif;color:#1696d2;font-size: 0.8em;'> Categorical Features

# <a id='corr' style='font-family:Lato,sans-serif;color:#fdbf11;font-size: 0.8em;'>5. Data Correlation

# <a id='target' style='font-family:Lato,sans-serif;color:#fdbf11;font-size: 0.8em;'>6. Target Distribuition

# <a id='dkq' style='font-family:Lato,sans-serif;color:#fdbf11;font-size: 0.8em;'>7. Domain Knowledge Questions

# <a id='feat_eng' style='font-family:Lato,sans-serif;color:#fdbf11;font-size: 0.8em;'>8. Feature Engineering