In [1]:
# import libraries
from matplotlib import pyplot as plt
%matplotlib notebook
from pandas import read_csv

## Data parsing

In [2]:
# dataframe data
data = read_csv('data.csv')

# seperated variables
ages = data['Age']
dev_salaries = data['All_Devs']
py_salaries = data['Python']
js_salaries = data['JavaScript']

## Filling Area under line plots
### fill_between method

In [3]:
# same old boring stuff
plt.plot(ages, dev_salaries, color='#444444', linestyle='--', label='All Devs')
plt.plot(ages, py_salaries, label='Python')

# but, let's introduce the matplotlib method which is the reason for the whole lesson

# At simplest case, fill_between takes two arguments, x component and y1
# and fills the sketch under the y1 curve we told it to, with respect to 0 (y2)
# By default, y2=0
plt.fill_between(x=ages, y1=py_salaries, y2=0, alpha=.2)

plt.legend()

plt.title('Programming Language Salary By Age')
plt.xlabel('Age')
plt.ylabel('Dollars(USD)')
plt.show()

<IPython.core.display.Javascript object>

### Filling with respect to  non-zero y2

In [4]:
from statistics import median
median_salary = median(dev_salaries)

In [5]:
plt.plot(ages, dev_salaries, color='#444444', linestyle='--', label='All Devs')
plt.plot(ages, py_salaries, label='Python')

# Now, it fills the area under the py_salaries curve above the avg_salary and the area below py_salaries under the avg_salary
plt.fill_between(ages, py_salaries, median_salary, alpha=.2)

plt.legend()

plt.title('Programming Language Salary By Age')
plt.xlabel('Age')
plt.ylabel('Dollars(USD)')
plt.show()

<IPython.core.display.Javascript object>

### where parameter

In [6]:
plt.plot(ages, dev_salaries, color='#444444', linestyle='--', label='All Devs')
plt.plot(ages, py_salaries, label='Python')

# area under the desired curve is now filled by also taking a condition into consideration
# interpolate parameter makes sure x-axis intersections don't get clipped and all the regions are filled correctly
plt.fill_between(ages, py_salaries, median_salary, 
                 where=(py_salaries > median_salary),
                 interpolate=True, alpha=.2)

# call same method to draw/fill other regions with different conditions
plt.fill_between(ages, py_salaries, median_salary, 
                 where=(py_salaries <= median_salary),
                 interpolate=True, alpha=.2)
plt.legend()

plt.title('Programming Language Salary By Age')
plt.xlabel('Age')
plt.ylabel('Dollars(USD)')
plt.show()

<IPython.core.display.Javascript object>

In [7]:
## By labeling our fills we emphasize the information/feedback we wont to gain from the plot

In [8]:
plt.plot(ages, dev_salaries, color='#444444', linestyle='--', label='All Devs')
plt.plot(ages, py_salaries, label='Python')

plt.fill_between(ages, py_salaries, dev_salaries, 
                 where=(py_salaries > dev_salaries),
                 interpolate=True, alpha=.2, label='Above Avg')

plt.fill_between(ages, py_salaries, dev_salaries, 
                 where=(py_salaries <= dev_salaries),
                 interpolate=True, alpha=.2, label='Below Avg')
plt.legend()

plt.title('Programming Language Salary By Age')
plt.xlabel('Age')
plt.ylabel('Dollars(USD)')
plt.show()

<IPython.core.display.Javascript object>