In [1]:
import pandas as pd
import numpy as np
import seaborn as sns

# Superstore analysis

In [26]:
# load superstore
superstore = pd.read_csv("./datasets/SampleSuperstore.csv")

In [31]:
def table_metadata_to_latex(frame, caption):
    
    # create frame with column names and data types
    metadata = pd.DataFrame(frame.dtypes, columns=["Data Type"])

    # add column for number of unique values
    metadata["Unique Values"] = frame.nunique()

    print(metadata.to_latex(caption=caption))

In [32]:
table_metadata_to_latex(superstore, caption="Superstore Metadata")

\begin{table}
\caption{Superstore Metadata}
\begin{tabular}{llr}
\toprule
 & Data Type & Unique Values \\
\midrule
Ship Mode & object & 4 \\
Segment & object & 3 \\
Country & object & 1 \\
City & object & 531 \\
State & object & 49 \\
Postal Code & int64 & 631 \\
Region & object & 4 \\
Category & object & 3 \\
Sub-Category & object & 17 \\
Sales & float64 & 5825 \\
Quantity & int64 & 14 \\
Discount & float64 & 12 \\
Profit & float64 & 7287 \\
\bottomrule
\end{tabular}
\end{table}



In [14]:
# get amount of unique features in frame
def get_unique_features(frame):
    unique_features = []
    for col in frame.columns:
        unique_features.append([col, len(frame[col].unique())])
    return unique_features

In [12]:
# get amount of null values in frame
superstore.isnull().sum()

Ship Mode       0
Segment         0
Country         0
City            0
State           0
Postal Code     0
Region          0
Category        0
Sub-Category    0
Sales           0
Quantity        0
Discount        0
Profit          0
dtype: int64

In [15]:
get_unique_features(superstore)

[['Ship Mode', 4],
 ['Segment', 3],
 ['Country', 1],
 ['City', 531],
 ['State', 49],
 ['Postal Code', 631],
 ['Region', 4],
 ['Category', 3],
 ['Sub-Category', 17],
 ['Sales', 5825],
 ['Quantity', 14],
 ['Discount', 12],
 ['Profit', 7287]]

In [34]:
credit_risk = pd.read_csv("./datasets/credit_risk_dataset.csv")

In [36]:
credit_risk.shape

(32581, 12)

In [35]:
credit_risk.isnull().sum()

person_age                       0
person_income                    0
person_home_ownership            0
person_emp_length              895
loan_intent                      0
loan_grade                       0
loan_amnt                        0
loan_int_rate                 3116
loan_status                      0
loan_percent_income              0
cb_person_default_on_file        0
cb_person_cred_hist_length       0
dtype: int64