# Categorical Variables

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

---

# Bar plot

In [None]:
fig, ax = plt.subplots(figsize = (15, 5))

       # the labels to go beneath the bars              # the height of each bar
ax.bar(x=["Label1", "Label2","Label3", "Label4"], height=[212, 234, 348, 449], 
       color="lightblue", edgecolor="black");

### Display each bar with a different color

In [None]:
fig, ax = plt.subplots(figsize = (15, 5))

       # the labels to go beneath the bars              # the height of each bar
ax.bar(x=["Label1", "Label2","Label3", "Label4"], height=[212, 234, 348, 449], 
       color=["darkblue", "mediumblue", "lightblue", "lightcyan"], edgecolor="black");

### Horizontal bar plot

In [None]:
fig, ax = plt.subplots(figsize = (15, 5))

ax.barh(y=["Label1", "Label2","Label3", "Label4"], width=[212, 234, 348, 449], 
        color=["darkblue", "mediumblue", "lightblue", "lightcyan"], edgecolor="black");

---

## Dataset: Heart Disease

In [None]:
df = pd.read_csv("heart-disease.csv")

df.head()

### Count of chest pain type for females

In [None]:
females_pain = df.loc[df["sex"] == "female", ["chest_pain"]].value_counts()
females_pain

In [None]:
fig, ax = plt.subplots()

# the labels to go beneath the bars   # the height of each bar
ax.bar(x = ['0', '2', '1', '3'], height=females_pain, color='violet')

ax.set_xlabel("Chest Pain Type")
ax.set_title("Females");

### Count of chest pain type for males

In [None]:
males_pain = df.loc[df["sex"] == "male", ["chest_pain"]].value_counts()
males_pain

In [None]:
fig, ax = plt.subplots()

# the labels to go beneath the bars   # the height of each bar
ax.bar(x = ['0', '2', '1', '3'],  height=males_pain, color='cornflowerblue')

ax.set_xlabel("Chest Pain Type")
ax.set_title("Males");

---

# Joint: categorical x categorical

## Stacked bar chart with legend

In [None]:
fig, ax = plt.subplots()

 
ax.bar(x = ['0', '2', '1', '3'], height=females_pain, color='violet')

                                                            # set the first plot on the "bottom" of the second plot
ax.bar(x = ['0', '2', '1', '3'], height=males_pain, color='cornflowerblue', bottom=females_pain);


ax.set_xlabel("Chest Pain Type")

plt.legend(["Female", "Male"]);

---

# Pie chart

In [None]:
fig, ax = plt.subplots(figsize = (15, 5))

       # the value of each slice      # the labels to go with the slices   
ax.pie(x=[154, 75, 223, 44], labels = ["Label1", "Label2","Label3", "Label4"]);

### Set the colors

In [None]:
fig, ax = plt.subplots(figsize = (15, 5))

ax.pie(x=[154, 75, 223, 44], labels = ["Label1", "Label2","Label3", "Label4"],
       colors= ["aliceblue", "lightblue", "deepskyblue", "dodgerblue"]);

### Set the wedge properties

In [None]:
fig, ax = plt.subplots(figsize = (15, 5))

ax.pie(x=[154, 75, 223, 44], labels = ["Label1", "Label2","Label3", "Label4"],
       colors= ["aliceblue", "lightblue", "deepskyblue", "dodgerblue"],
       wedgeprops = {"edgecolor" : "black", 
                      'linewidth': 1, 
                      'antialiased': True});

### autopct

In [None]:
fig, ax = plt.subplots(figsize = (15, 5))

ax.pie(x=[154, 75, 223, 44], labels = ["Label1", "Label2","Label3", "Label4"],
       colors= ["aliceblue", "lightblue", "deepskyblue", "dodgerblue"],
       autopct='%.2f%%'); # format values as a float with 2 decimal places.

### Set the start angle
Rotates so that Label1 is at 90 degrees

In [None]:
fig, ax = plt.subplots(figsize = (15, 5))

ax.pie(x=[154, 75, 223, 44], labels = ["Lable1", "Label2","Lable3", "Label4"],
       colors= ["aliceblue", "lightblue", "deepskyblue", "dodgerblue"],
       autopct='%.1f%%', startangle=90);

### explode
Separates out the indicated slice

In [None]:
fig, ax = plt.subplots(figsize = (15, 5))

ax.pie(x=[154, 75, 223, 44], labels = ["Lable1", "Label2","Lable3", "Label4"],
        colors=["aliceblue", "lightblue", "deepskyblue", "dodgerblue"],
        autopct='%.1f%%', startangle=90, explode = [0, 0, 0.05, 0]); # separate out Label 3 slice 

### shadow

In [None]:
fig, ax = plt.subplots(figsize = (15, 5))

ax.pie(x=[154, 75, 223, 44], labels = ["Lable1", "Label2","Lable3", "Label4"],
        colors= ["aliceblue", "lightblue", "deepskyblue", "dodgerblue"],
        autopct='%.1f%%', startangle=90, explode = [0, 0, 0.05, 0], shadow=True); 

---

---

## Dataset: Top 50 US Tech Companies

In [None]:
df = pd.read_csv("Top 50 US Tech Companies.csv")

df.head()

#### Unique categories

In [None]:
df["HQ State"].unique()

#### Value counts

In [None]:
df["HQ State"].value_counts()

## Plot the top 4 states for tech company headquarters

## Bar plot

In [None]:
data = df["HQ State"].value_counts()[:4]
labels = ["California", "Texas", "Washington", "New York"]
colors = ["darkorange", "darkgoldenrod", "gold", "lightyellow"]

In [None]:
fig, ax = plt.subplots(figsize = (10, 4))

ax.bar(x=labels, height=data, color=colors, edgecolor="black");

## Pie Chart

In [None]:
fig, ax = plt.subplots(figsize = (15, 5))

ax.pie(x=data, labels=labels, colors=colors,
       autopct='%.1f%%', startangle=90, explode = [0.1, 0, 0, 0], shadow=True);

---

# Transforming a continuous variable into a categorical variable

## Discretizing
Transforming from continuous to discrete variable

In [None]:
df = pd.read_csv("churn.csv")
df.head()

### Binning

In [None]:
df["Credit Category"] = pd.cut(df["CreditScore"], [0, 579, 669, 739, 799, 850], 
                                        labels=["Poor","Fair","Good","Very Good", "Excellent"])
df["Credit Category"].head(10)

#### Count each category

In [None]:
df["Credit Category"].value_counts()

# Bar chart

In [None]:
data = df["Credit Category"].value_counts()
labels = ["Fair", "Good", "Poor", "Very Good", "Excellent"]
colors = ["goldenrod", "aquamarine", "lightcoral", "limegreen", "green"]

In [None]:
fig, ax = plt.subplots(figsize = (15, 5))

ax.bar(x=labels, height=data, color=colors, edgecolor="black");

# Pie chart

In [None]:
fig, ax = plt.subplots(figsize = (15, 5))

ax.pie(x=data, labels=labels, colors=colors,
       autopct='%.1f%%', startangle=90, explode = [0, 0, 0, 0, .3], shadow=True,
       wedgeprops = {"edgecolor" : "black", 
                      'linewidth': 1, 
                      'antialiased': True});

---