# Summarizing a Dataset with Pandas-Profiling
This notebook uses the pandas-profiling library to generate data profiles and summaries from a pandas data frame.  
These examples were generated based on pandas-profiling version 2.8.0.

**Examples:**
* **Display in the notebook** (widget & HTML formats)
* **Save as a file** (HTML & JSON formats)
* **Assign to a string** (HTML & JSON formats)

**Resources:**   
https://www.linkedin.com/pulse/summarizing-exploring-datasets-using-jupyter-notebooks-joseph-true  
https://pandas-profiling.github.io/pandas-profiling/  
https://pandas-profiling.github.io/pandas-profiling/docs/master/index.html


In [None]:
import pandas as pd
import numpy as np
from pandas_profiling import ProfileReport

# Load Data into a Data Frame

In [None]:
# Get some data
df = pd.read_csv('auto-mpg-data.csv')

# Generate the Pandas-Profiling Report

In [None]:
# Run standrd profile report
profile = ProfileReport(df, title="Pandas Profiling Report")

# Display in the Notebook

In [None]:
# Display the notebook 'widget' version of the report
profile.to_widgets()

In [None]:
# Display the HTML version of the report
profile.to_notebook_iframe()

# Save to a File

In [None]:
# Save as HTML file
profile.to_file("my_report.html")

In [None]:
# Save as JSON file
profile.to_file("my_report.json")

# Assign to a Local String Variable

In [None]:
# HTML string
html_data = profile.to_html()
html_data

In [None]:
# JSON string
json_data = profile.to_json()
json_data

# Advanced Configuration

In [None]:
# Run Pandas Data Profile Report with specific options
# This example enables sets a custom title, only one correlation chart and sets the sample preview size to 3 rows
#
# Advanced Configuration:
# https://pandas-profiling.github.io/pandas-profiling/docs/master/rtd/pages/advanced_usage.html
# https://github.com/pandas-profiling/pandas-profiling/blob/master/src/pandas_profiling/config_default.yaml
    
adv_profile = ProfileReport(df, title="My Custom Pandas Profiling Report",
                  
    missing_diagrams={
        "bar": True,
        "matrix": False,
        "heatmap": False,
        "dendrogram": False                    
    },

    correlations={
        "pearson":{
            "calculate": True,
            "warn_high_correlations": True,
            "threshold": 0.9
        } ,
        "spearman":{
            "calculate": False,
            "warn_high_correlations": False},
        "kendall":{
            "calculate": False,
            "warn_high_correlations": False},
        "phi_k":{
            "calculate": False,
            "warn_high_correlations": False},
        "cramers":{
            "calculate": False,
            "warn_high_correlations": True,
            "threshold": 0.9},
        "recoded":{
            "calculate": False,
            "warn_high_correlations": True,
            "threshold": 1.0}
    },

    samples={
        "head": 3,
        "tail": 3 
    }
)


In [None]:
adv_profile.to_notebook_iframe()