## Seaborn Visualizations

In [None]:
# Import Matplotlib and Seaborn
import matplotlib.pyplot as plt
import seaborn as sns

# Change this scatter plot to have percent literate on the y-axis
sns.scatterplot(x=gdp, y=percent_literate)

# Show plot
plt.show()

In [None]:
# Create count plot with region on the y-axis
sns.countplot(y=region)

# Show plot
plt.show()

In [None]:
# Import Matplotlib, Pandas, and Seaborn
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd


# Create a DataFrame from csv file
df = pd.read_csv(csv_filepath)

# Create a count plot with "Spiders" on the x-axis
sns.countplot(x="Spiders", data=df)

# Display the plot
plt.show()

In [None]:
#hue allows us to easily make subgroups within Seaborn plots

# Create a scatter plot of absences vs. final grade
sns.scatterplot(x="absences", y="G3", data=student_data, hue="location")

#make rural appear before urban
sns.scatterplot(x="absences", y="G3", 
                data=student_data, 
                hue="location", 
                hue_order=["Rural", "Urban"])

# Show plot
plt.show()

In [None]:
# Create a dictionary mapping subgroup values to colors
palette_colors = {"Rural": "green", "Urban": "blue"}

# Create a count plot of school with location subgroups
sns.countplot(x="school", data=student_data, 
              hue="location", palette=palette_colors)

#### Relational Plots 
- questions about relationship between quantitative variables
- relplot() visualize relationship between variables w/scatterplots or line plots

In [None]:
# Change to use relplot() instead of scatterplot()
sns.relplot(x="absences", y="G3", kind="scatter", data=student_data)

# Show plot
plt.show()

# Change to make subplots based on study time
sns.relplot(x="absences", y="G3", data=student_data, kind="scatter", col="study_time")

# Show plot
plt.show()

# Change this scatter plot to arrange the plots in rows instead of columns
sns.relplot(x="absences", y="G3", 
            data=student_data,
            kind="scatter", 
            row="study_time")

# Show plot
plt.show()

In [None]:
# Create a scatter plot of G1 vs. G3
# Adjust to add subplots based on school support
# Adjust further to add subplots based on family support
sns.relplot(x="G1", y="G3", 
            data=student_data,
            kind="scatter", 
            col="schoolsup",
            col_order=["yes", "no"], row="famsup", row_order=["yes", "no"])

# Create scatter plot of horsepower vs. mpg
#Vary the size of the points by the number of cylinders in the car ("cylinders")
sns.relplot(x="horsepower" , y="mpg" , 
            data=mpg, kind="scatter", 
            size="cylinders")

#use hue to vary the color of the points by the number of cylinders 
sns.relplot(x="horsepower", y="mpg", 
            data=mpg, kind="scatter", 
            size="cylinders", hue="cylinders")
        
# Create a scatter plot of acceleration vs. mpg
# Vary the style and color of the plot points by country of origin
sns.relplot(x="acceleration", y="mpg", data=mpg, 
            kind="scatter", style="origin", hue="origin")

#### Scatter plots = each plot point is an independent observation
#### Line plots = each point represents the same variable tracked over time

In [None]:
# Create line plot
sns.relplot(x="model_year", y="mpg", data=mpg, kind="line")

#Change the plot so the shaded area shows the standard deviation 
#instead of the confidence interval for the mean
sns.relplot(x="model_year", y="mpg",
            data=mpg, kind="line", ci="sd")
#turn off the confidence interval 
sns.relplot(x="model_year", y="horsepower", 
            data=mpg, kind="line", 
            ci=None)

#Use the markers parameter to turn on showing markers on the lines.
#Use the dashes parameter to turn off having different line styles per line.
sns.relplot(x="model_year", y="horsepower", 
            data=mpg, kind="line", 
            ci=None, style="origin", 
            hue="origin", dashes=False, markers=True)

#### Categorical plots 
- involves a categorical varable and show comparisons between groups, ex: count plots, bar plots bar plots show the mean of quantitative variable per category; bar plots show 95% CI and assumes a random sample
- generally put categorical variable on the x axis
- use catplot(), use col= and row= like relplot()

In [None]:
# Create count plot of internet usage
sns.catplot(x="Internet usage", data=survey_data,
            kind="count")
# Change the orientation of the plot
sns.catplot(y="Internet usage", data=survey_data,
            kind="count")
# Separate into column subplots based on age category
sns.catplot(y="Internet usage", data=survey_data,
            kind="count", col="Age Category")

In [None]:
# Create a bar plot of interest in math, separated by gender
sns.catplot(x="Gender", y="Interested in Math", data=survey_data, 
            kind="bar")

# Create bar plot of average final grade in each study category
sns.catplot(x="study_time", y="G3", data=student_data, kind="bar")

# List of categories from lowest to highest
category_order = ["<2 hours", 
                  "2 to 5 hours", 
                  "5 to 10 hours", 
                  ">10 hours"]

# Rearrange the categories
sns.catplot(x="study_time", y="G3",
            data=student_data,
            kind="bar", order=category_order)

# Turn off the confidence intervals
sns.catplot(x="study_time", y="G3",
            data=student_data,
            kind="bar", order=category_order, ci=None)


#### Box plots 
- see median, spread, skewness, and outliers
- colored box = 25-75 percentile/IQR (interquartile range), line in middle = median, whiskers = spread of distribution, floating points = outliers
- facilitates comparisons between groups

In [None]:
# Specify the category ordering
study_time_order = ["<2 hours", "2 to 5 hours", 
                    "5 to 10 hours", ">10 hours"]

# Create a box plot and set the order of the categories
sns.catplot(x="study_time". y="G3", data=student_data, kind="box", order=study_time_order)

# Specify the category ordering
study_time_order = ["<2 hours", "2 to 5 hours", 
                    "5 to 10 hours", ">10 hours"]

# Create a box plot and set the order of the categories
sns.catplot(x="study_time", y="G3", data=student_data, kind="box", order=study_time_order)

In [None]:
# Create a box plot with subgroups and omit the outliers
sns.catplot(data=student_data, kind="box", 
            x="internet", y="G3", hue="location", sym="")

# Set the whiskers to 0.5 * IQR
sns.catplot(x="romantic", y="G3",
            data=student_data,
            kind="box", whis=0.5)

# Extend the whiskers to the 5th and 95th percentile
sns.catplot(x="romantic", y="G3",
            data=student_data,
            kind="box",
            whis=[5, 95])

# Set the whiskers at the min and max values
sns.catplot(x="romantic", y="G3",
            data=student_data,
            kind="box",
            whis=[0, 100])

In [None]:
# Create a point plot of family relationship vs. absences
sns.catplot(data=student_data, kind="point", x="famrel", y="absences")

# Create a point plot that uses color to create subgroups
sns.catplot(data=student_data, kind="point", 
            x="romantic", y="absences", hue="school")

# Turn off the confidence intervals for this plot
sns.catplot(x="romantic", y="absences",
			data=student_data,
            kind="point",
            hue="school", ci=None)


# Plot the median number of absences instead of the mean
# Import median function from numpy
from numpy import median
sns.catplot(x="romantic", y="absences",
			data=student_data,
            kind="point",
            hue="school",
            ci=None, estimator=median)

In [None]:
# Set the style to "whitegrid"
sns.set_style("whitegrid")

# Create a count plot of survey responses
category_order = ["Never", "Rarely", "Sometimes", 
                  "Often", "Always"]

sns.catplot(x="Parents Advice", 
            data=survey_data, 
            kind="count", 
            order=category_order)

# Set the color palette to "Purples"
sns.set_style("whitegrid")
sns.set_palette("Purples")

# Create a count plot of survey responses
category_order = ["Never", "Rarely", "Sometimes", 
                  "Often", "Always"]

sns.catplot(x="Parents Advice", 
            data=survey_data, 
            kind="count", 
            order=category_order)

# Change the color palette to "RdBu"
sns.set_style("whitegrid")
sns.set_palette("RdBu")

# Create a count plot of survey responses
category_order = ["Never", "Rarely", "Sometimes", 
                  "Often", "Always"]

sns.catplot(x="Parents Advice", 
            data=survey_data, 
            kind="count", 
            order=category_order)

# Set the context to "paper"
sns.set_context("paper")

# Create bar plot
sns.catplot(x="Number of Siblings", y="Feels Lonely",
            data=survey_data, kind="bar")

# Change the context to "notebook"
sns.set_context("notebook")

# Change the context to "talk"
sns.set_context("talk")

# Change the context to "poster"
sns.set_context("poster")

# Set the style to "darkgrid"
sns.set_style("darkgrid")

# Set a custom color palette
sns.set_palette(["#39A7D0", "#36ADA4"])

# Create the box plot of age distribution by gender
sns.catplot(x="Gender", y="Age", 
            data=survey_data, kind="box")

#### Seaborn plot functions create two different types of objects: 
- FacetGrid objects and AxesSubplot objects. 
- The method for adding a title to your plot will differ depending on the type of object it is.

In [None]:
# Create scatter plot
g = sns.relplot(x="weight", 
                y="horsepower", 
                data=mpg,
                kind="scatter")

# Identify plot type
type_of_g = type(g)

# Print type
print(type_of_g)


# Create scatter plot
g = sns.relplot(x="weight", 
                y="horsepower", 
                data=mpg,
                kind="scatter")

# Add a title "Car Weight vs. Horsepower"
g.fig.suptitle("Car Weight vs. Horsepower")

# Create line plot
g = sns.lineplot(x="model_year", y="mpg_mean", 
                 data=mpg_mean,
                 hue="origin")

# Add a title "Average MPG Over Time"
g.set_title("Average MPG Over Time")

# Add x-axis and y-axis labels
g.set(xlabel="Car Model Year", 
      ylabel="Average MPG")

# Create point plot
sns.catplot(x="origin", 
            y="acceleration", 
            data=mpg, 
            kind="point", 
            join=False, 
            capsize=0.1)

# Rotate x-tick labels
plt.xticks(rotation=90)

### putting it all together

In [None]:
# Set palette to "Blues"
color_palette = "Blues"
sns.set_palette(color_palette)

# Adjust to add subgroups based on "Interested in Pets"
g = sns.catplot(x="Gender",
                y="Age", data=survey_data, 
                kind="box", hue="Interested in Pets")

# Set title to "Age of Those Interested in Pets vs. Not"
g.fig.suptitle("Age of Those Interested in Pets vs. Not")

# Show plot
plt.show()

In [None]:
# Set the figure style to "dark"
sns.set_style("dark")

# Adjust to add subplots per gender
g = sns.catplot(x="Village - town", y="Likes Techno", 
                data=survey_data, kind="bar",
                col="Gender")

# Add title and axis labels
g.fig.suptitle("Percentage of Young People Who Like Techno", y=1.02)
g.set(xlabel="Location of Residence", 
       ylabel="% Who Like Techno")

# Show plot
plt.show()