# Introduction to Data Visualization in Python - Customizing Plots

In [6]:
# Multiple plots on single axis

# With Datacamp, the dataset is already loaded. For this particular example, 
# the dataset that has been loaded looks at records of undergraduate degrees 
# awarded to women in a variety of fields from 1970 to 2011. 
# We use multiple plots on a single axis to compare trends in degrees.
# The code below loads a plot that compares the degrees conferred to women
# in Physical Sciences and Computer Sciences between 1970-2011.

In [None]:
# Import matplotlib.pyplot
import matplotlib.pyplot as plt

# Plot in blue the % of degrees awarded to women in the Physical Sciences
plt.plot(year, physical_sciences, color='blue')

# Plot in red the % of degrees awarded to women in Computer Science
plt.plot(year, computer_science, color='red')

# Display the plot
plt.show()


In [None]:
# Using axes()

# Axes() allows you to plot different line plots on distinct axes, but you will
# need to specify coordinates relative to your fig.
# When loaded, the plot creates separate sets of axes where each line plot is drawn.
# The two plots show the percentage of degrees awarded to women in both Physical Sciences
# and Computer Science.

In [None]:
# Create plot axes for the first line plot
plt.axes([0.05, 0.05, 0.425, 0.9])

# Plot in blue the % of degrees awarded to women in the Physical Sciences
plt.plot(year, physical_sciences, color='blue')

# Create plot axes for the second line plot
plt.axes([0.525, 0.05, 0.425, 0.9])

# Plot in red the % of degrees awarded to women in Computer Science
plt.plot(year, computer_science, color='red')

# Display the plot
plt.show()


In [None]:
# Using subplot() (1)

# plt.axes() can be tedious to use, which is why we have the alternative: subplot.
# In the plot below, we used plt.subplot(m, n, k) to make the subplot grid of 
# dimensions m by n and to make the kth subplot active.
# Below we plotted the percentage of degrees awarded to women in Physical Sciences
# and Computer Science.

In [None]:
# Create a figure with 1x2 subplot and make the left subplot active
plt.subplot(1,2,1)

# Plot in blue the % of degrees awarded to women in the Physical Sciences
plt.plot(year, physical_sciences, color='blue')
plt.title('Physical Sciences')

# Make the right subplot active in the current 1x2 subplot grid
plt.subplot(1,2,2)

# Plot in red the % of degrees awarded to women in Computer Science
plt.plot(year, computer_science, color='red')
plt.title('Computer Science')

# Use plt.tight_layout() to improve the spacing between subplots
plt.tight_layout()
plt.show()

In [None]:
# Using subplot() (2)

# Maybe you want to compare more areas of study. In this case, we use subplot
# to include Health Professions and Education. We use the same subplot concept, only
# this time it is 2x2 and we label the kth subplot as active.
# This creates separate subplots for 4 areas of study, where we plotted the percentage of
# degrees awarded to women in each field, each with their own colors.

In [None]:
# Create a figure with 2x2 subplot layout and make the top left subplot active
plt.subplot(2,2,1)

# Plot in blue the % of degrees awarded to women in the Physical Sciences
plt.plot(year, physical_sciences, color='blue')
plt.title('Physical Sciences')

# Make the top right subplot active in the current 2x2 subplot grid 
plt.subplot(2,2,2)

# Plot in red the % of degrees awarded to women in Computer Science
plt.plot(year, computer_science, color='red')
plt.title('Computer Science')

# Make the bottom left subplot active in the current 2x2 subplot grid
plt.subplot(2,2,3)

# Plot in green the % of degrees awarded to women in Health Professions
plt.plot(year, health, color='green')
plt.title('Health Professions')

# Make the bottom right subplot active in the current 2x2 subplot grid
plt.subplot(2,2,4)

# Plot in yellow the % of degrees awarded to women in Education
plt.plot(year, education, color='yellow')
plt.title('Education')

# Improve the spacing between subplots and display them
plt.tight_layout()
plt.show()


In [None]:
# Using xlim(), ylim()

# We may want to customize each axis, we can do this with xlim() and ylim(). 
# With these commands, we are able to zoom in or expand our plot to get a closer
# look at the data. 
# We also learned how to use plt.savefig() to save the file we generated. Woohoo!

In [None]:
# Plot the % of degrees awarded to women in Computer Science and the Physical Sciences
plt.plot(year,computer_science, color='red') 
plt.plot(year, physical_sciences, color='blue')

# Add the axis labels
plt.xlabel('Year')
plt.ylabel('Degrees awarded to women (%)')

# Set the x-axis range
plt.xlim((1990,2010))

# Set the y-axis range
plt.ylim((0,50))

# Add a title and display the plot
plt.title('Degrees awarded to women (1990-2010)\nComputer Science (red)\nPhysical Sciences (blue)')
plt.show()

# Save the image as 'xlim_and_ylim.png'
plt.savefig('xlim_and_ylim.png')


In [None]:
# Using axis()

# We can make things simpler by using axis() to plot the xlim and ylim together.

In [None]:
# Plot in blue the % of degrees awarded to women in Computer Science
plt.plot(year,computer_science, color='blue')

# Plot in red the % of degrees awarded to women in the Physical Sciences
plt.plot(year, physical_sciences,color='red')

# Set the x-axis and y-axis limits
plt.axis((1990,2010,0,50))

# Show the figure
plt.show()

# Save the figure as 'axis_limits.png'
plt.savefig('axis_limits.png')

In [None]:
# Using legend()

# Legends are helpful if we need to decipher between multiple datasets. You can see
# how we labeled each dataset separately, as well as labeling the x and y axes.
# Loc refers to the placement of the legend on the plot.

In [None]:
# Specify the label 'Computer Science'
plt.plot(year, computer_science, color='red', label='Computer Science') 

# Specify the label 'Physical Sciences' 
plt.plot(year, physical_sciences, color='blue', label='Physical Sciences')

# Add a legend at the lower center
plt.legend(loc="lower center")

# Add axis labels and title
plt.xlabel('Year')
plt.ylabel('Enrollment (%)')
plt.title('Undergraduate enrollment of women')
plt.show()

In [None]:
# Using annotate()

# Annotate helps us give context to out plot. This form of annotation
# does not refer to the annotation we require when labeling data for 
# machine learning models. Annotation in this case simply means
# an addition of text or arrows to the graph to help the viewer 
# understand it conceptually. 


In [None]:
# Compute the maximum enrollment of women in Computer Science: cs_max
cs_max = computer_science.max()

# Calculate the year in which there was maximum enrollment of women in Computer Science: yr_max
yr_max = year[computer_science.argmax()]

# Plot with legend as before
plt.plot(year, computer_science, color='red', label='Computer Science') 
plt.plot(year, physical_sciences, color='blue', label='Physical Sciences')
plt.legend(loc='lower right')

# Add a black arrow annotation
plt.annotate('Maximum', xy=(yr_max, cs_max),
    xytext=(yr_max+5, cs_max+5), arrowprops=dict(facecolor='black'))

# Add axis labels and title
plt.xlabel('Year')
plt.ylabel('Enrollment (%)')
plt.title('Undergraduate enrollment of women')
plt.show()

In [None]:
# Modifying styles

# We are also able to modify the style of the plot, including the stylesheet.

In [None]:
# Import matplotlib.pyplot
import matplotlib.pyplot as plt

# Set the style to 'ggplot'
plt.style.use('ggplot')

# Create a figure with 2x2 subplot layout
plt.subplot(2, 2, 1) 

# Plot the enrollment % of women in the Physical Sciences
plt.plot(year, physical_sciences, color='blue')
plt.title('Physical Sciences')

# Plot the enrollment % of women in Computer Science
plt.subplot(2, 2, 2)
plt.plot(year, computer_science, color='red')
plt.title('Computer Science')

# Add annotation
cs_max = computer_science.max()
yr_max = year[computer_science.argmax()]
plt.annotate('Maximum', xy=(yr_max, cs_max), xytext=(yr_max-1, cs_max-10), arrowprops=dict(facecolor='black'))

# Plot the enrollmment % of women in Health professions
plt.subplot(2, 2, 3)
plt.plot(year, health, color='green')
plt.title('Health Professions')

# Plot the enrollment % of women in Education
plt.subplot(2, 2, 4)
plt.plot(year, education, color='yellow')
plt.title('Education')

# Improve spacing between subplots and display them
plt.tight_layout()
plt.show()