# Imports, Options, and Getting Data

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pandas_profiling
from dataprep.eda import plot
from sqlalchemy import create_engine

In [None]:
pd.set_option("display.max_columns", 100)
pd.set_option('display.width', 100)
pd.set_option("display.precision", 2)
%matplotlib inline
plt.style.use('fivethirtyeight')
plt.rcParams.update({'font.size': 16, 'font.family': 'sans'})

In [None]:
def load_dataframe_from_yelp_2(query):
    """
    Connects to yelp_2 database on Postgres and
    loads a Pandas dataframe based off sql query.

    Args:
        query (string): Sql query to select data from yelp_2.

    Returns:
        Dataframe: Pandas dataframe of records
                    from sql query of yelp_2 database.
    """
    connect = 'postgresql+psycopg2://postgres:password@localhost:5432/yelp_2'
    engine = create_engine(connect)
    df = pd.read_sql(query, con=engine)
    df = df.copy()
    return df

def counter(x):
    if x in ['None', None, '']:
        return 0
    else:
        y = x.split(',')
        return len(y)

In [None]:
query = '''
        SELECT *
        FROM features_and_targets
        LIMIT 10000
        ;
        '''
df = load_dataframe_from_yelp_2(query)

In [None]:
dataset_release_date = pd.to_datetime('2020-3-25 19:13:01')

# EDA

## Broad Tools
## Pandas Profiling

In [None]:
print(df.info())
print(df.describe())

In [None]:
pd.scatter_matrix(df, alpha=0.1)

In [None]:
profile = df.profile_report(title='Pandas Profiling Report for Features and Targets Sample.')
profile.to_file(output_file="features_and_targets_sample_pandas_profiling.html")

## Explore Columns and Relationships Between Columns
## Dataprep.eda

plot(df): “I want an overview of the dataset”
plot(df, “col_1”): “I want to understand the column col_1”
plot(df, “col_1”, “col_2”): “I want to understand the relationship between columns col_1 and col_2”

In [None]:
plot(df)

## Explore Deeper and Find Interesting Possibilities for Feature Engineering