### <span style='color:skyblue'>Mushroom Project: Visualizing Categorical Data</span>

In [None]:
# imports
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.ticker import MultipleLocator

In [None]:
# set path to data
input_folder = r'/Users/alexandrabrown/Desktop/data_science/datasets/exploring_mushrooms'
input_file   = 'mushroom_data.csv'
input_path   = os.path.join(input_folder, input_file)

# load data into DataFrame
df = pd.read_csv(input_path)
df

#### <span style='color:skyblue'>Challenge 1: Create list of all column names --> iterate over columns and plot</span>
- Add conditional for creating pie charts if there are fewer than 6 categories for a column variable

In [None]:
cols_list = df.columns.to_list()

for col in cols_list:
    plt.rcParams['font.family'] = 'Times New Roman'

    # count number of unique categories for current col
    num_categories = df[col].nunique()
    # if num categories is less than 6 then make a pie chart
    if num_categories < 6:
        # get labels
        labels = df[col].value_counts().index
        # get values as array
        vals = list(df[col].value_counts().values)
        plt.pie(x=vals, labels=labels, autopct='%.1f%%')
        plt.axis('equal')
        plt.show()
        plt.clf()
    # if more than 6 categories then make countplot
    else:
        sns.countplot(data=df, x=col, order=df[col].value_counts().index)
        plt.title(f'{col} Value Counts', fontsize=12)
        plt.ylabel('Count', fontsize=12)
        plt.xlabel(col, fontsize=12)
        plt.xticks(rotation=30, fontsize=10)
        plt.tick_params(axis='both', which='major', length=3.5, width=2)
        plt.show()
        plt.clf()




#### <span style='color:skyblue'>Challenge 2: create plots using list comprehension</span>
This is poor coding practice, just a fun challenge

In [None]:
list_comp = [[sns.countplot(data=df, x=col, palette='Set2', hue=col), plt.show(), plt.clf()] for col in df.columns]

#### <span style='color:skyblue'>Challenge 3: For columns that contain color information as their categories make the bars on the plot match the color category they represent</span>

In [None]:
color_dict = {'black': '#111111',
             'brown': '#906511',
             'purple': '#9371CD',
             'white': '#ECE8F3',
             'green': '#50C668',
             'blue': '#79CCE7',
             'orange': '#EE9540',
             'yellow': '#EEE140',
             'pink': '#EE40D4',
             'red': '#EE4840',
             'gray': '#C2BCBC'}

alt_color = '#FAE5D3'


# create list of data col names
col_names = df.columns.to_list()

# text to search for in col names
color_txt = 'color'

for col_name in col_names:
    # check if word 'color' is in column name
    if color_txt in col_name.lower():
        # initialize plotting_pal dict
        plotting_pal = {}
        # get unique categories for this color column (colors)
        col_colors = df[col_name].unique()

        for color in col_colors:
            color_lower = color.lower()
            if color_lower in color_dict:
                plotting_pal[color] = color_dict[color_lower]
            else:
                plotting_pal[color] = alt_color

        # plot the color column data using custom palette to match color categories
        sns.countplot(data=df, x=col_name, order=df[col_name].value_counts().index, palette=plotting_pal, hue=col_name)
        plt.xticks(rotation=30)
        plt.show()
        plt.clf()
    else:
        # plot the coloumn data using standard palette
        sns.countplot(data=df, x=col_name, order=df[col_name].value_counts().index, palette='Set2', hue=col_name)
        plt.xticks(rotation=30)
        plt.show()
        plt.clf()        