In [1]:
import numpy as np
import pandas as pd

In [2]:
x = pd.Series([10,20,30,40,50])
x

0    10
1    20
2    30
3    40
4    50
dtype: int64

In [3]:
x.values

array([10, 20, 30, 40, 50], dtype=int64)

In [4]:
data = [450, 700, 943]
index = ['Don', 'Mike', 'Bob']
sales = pd.Series(data, index=index)
sales.index.name = "Salesperson"
sales.name = "Total sales per person"
sales

Salesperson
Don     450
Mike    700
Bob     943
Name: Total sales per person, dtype: int64

In [5]:
sales['Don']

450

In [6]:
sales[0]

450

In [7]:
sales.loc['Mike']

700

In [8]:
sales.iloc[1]

700

In [9]:
sales > 500

Salesperson
Don     False
Mike     True
Bob      True
Name: Total sales per person, dtype: bool

In [10]:
sales[[False,True,True]]

Salesperson
Mike    700
Bob     943
Name: Total sales per person, dtype: int64

In [11]:
sales[sales > 500]

Salesperson
Mike    700
Bob     943
Name: Total sales per person, dtype: int64

In [12]:
sales[sales > 500].values

array([700, 943], dtype=int64)

In [13]:
list(sales[sales > 500].values)

[700, 943]

In [14]:
'Don' in sales

True

In [15]:
450 in sales.values

True

In [16]:
sales.to_dict()

{'Don': 450, 'Mike': 700, 'Bob': 943}

In [17]:
sales_dict = {
    'Rob': 1000,
    'Tom': 420,
    'Pat': 444
}

In [18]:
sales_series = pd.Series(sales_dict)
sales_series

Rob    1000
Tom     420
Pat     444
dtype: int64

In [19]:
new_sales = pd.Series(sales_dict, index=['Rob', 'Tom', 'Mike', 'Lucy'])
new_sales

Rob     1000.0
Tom      420.0
Mike       NaN
Lucy       NaN
dtype: float64

In [20]:
# Matches up the indexes to key value pairs, new undefined keys will be NaN, and converts to float
# NaN is compatible with float

In [21]:
np.isnan(new_sales)

Rob     False
Tom     False
Mike     True
Lucy     True
dtype: bool

In [22]:
new_sales.loc['Lucy'] = 548
new_sales

Rob     1000.0
Tom      420.0
Mike       NaN
Lucy     548.0
dtype: float64

In [23]:
new_sales.index.name = "Salesperson"
new_sales.name = "TV sales"

In [24]:
new_sales

Salesperson
Rob     1000.0
Tom      420.0
Mike       NaN
Lucy     548.0
Name: TV sales, dtype: float64

# Dataframes
-Two dimensional
-Size mutable
-Heterogenous 
-ints and strings in same collection
-Rows and columns

In [25]:
sales_df = pd.DataFrame(new_sales)
sales_df

Unnamed: 0_level_0,TV sales
Salesperson,Unnamed: 1_level_1
Rob,1000.0
Tom,420.0
Mike,
Lucy,548.0


In [26]:
data = [['Aidan', 20], ['Tom', 40], ['Gary', 70]]
df = pd.DataFrame(data, columns=["Name", "Age"])
df

Unnamed: 0,Name,Age
0,Aidan,20
1,Tom,40
2,Gary,70


In [27]:
new_dict = {
    'Name': ['Tom', 'Jane', 'Steve'],
    'Sales': [250, 300, 350]
}
df_dict = pd.DataFrame(new_dict)
df_dict

Unnamed: 0,Name,Sales
0,Tom,250
1,Jane,300
2,Steve,350


In [28]:
list_dict = [
    {'Name': 'Tom', 'Sales': 2322, 'Performance': 'Great'},
    {'Name': 'Pete', 'Sales': 231},
    {'Name': 'Sam', 'Sales': 126}
]
new_df = pd.DataFrame(list_dict)
new_df

Unnamed: 0,Name,Sales,Performance
0,Tom,2322,Great
1,Pete,231,
2,Sam,126,


In [29]:
new_df['Sales']

0    2322
1     231
2     126
Name: Sales, dtype: int64

In [30]:
#Data frame is a pandas version of a series (more aestetical)

In [31]:
new_df.loc[1]

Name           Pete
Sales           231
Performance     NaN
Name: 1, dtype: object

In [32]:
new_df['Grade'] = ['A','A','B']
new_df

Unnamed: 0,Name,Sales,Performance,Grade
0,Tom,2322,Great,A
1,Pete,231,,A
2,Sam,126,,B


In [33]:
new_df.drop(2)

Unnamed: 0,Name,Sales,Performance,Grade
0,Tom,2322,Great,A
1,Pete,231,,A


In [34]:
new_df.drop('Performance', axis=1)

Unnamed: 0,Name,Sales,Grade
0,Tom,2322,A
1,Pete,231,A
2,Sam,126,B


In [35]:
new_df[['Name','Sales']]

Unnamed: 0,Name,Sales
0,Tom,2322
1,Pete,231
2,Sam,126


In [39]:
east = pd.Series([1000,1200,3400],index=['Q1','Q2','Q3'])
west = pd.Series([1100,1300,2400,3500],index=['Q1','Q2','Q3','Q4'])
df_region = pd.DataFrame({'East':east, 'West': west})
df_region['North'] = [2000,3000,2500,4000]
df_region['South'] = [1500,2000,1500,4000]
east

Q1    1000
Q2    1200
Q3    3400
dtype: int64

In [56]:
df_region['years'] = ['2016', '2017', '2018', '2019']
df_region

Unnamed: 0_level_0,East,West,North,South,years
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2016,1000.0,1100,2000,1500,2016
2017,1200.0,1300,3000,2000,2017
2018,3400.0,2400,2500,1500,2018
2019,,3500,4000,4000,2019


In [57]:
df_region.set_index('years')

Unnamed: 0_level_0,East,West,North,South
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2016,1000.0,1100,2000,1500
2017,1200.0,1300,3000,2000
2018,3400.0,2400,2500,1500
2019,,3500,4000,4000


In [42]:
df_region

Unnamed: 0,East,West,North,South,years
Q1,1000.0,1100,2000,1500,2016
Q2,1200.0,1300,3000,2000,2017
Q3,3400.0,2400,2500,1500,2018
Q4,,3500,4000,4000,2019


In [58]:
df_region.set_index('years', inplace=True)
df_region

Unnamed: 0_level_0,East,West,North,South
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2016,1000.0,1100,2000,1500
2017,1200.0,1300,3000,2000
2018,3400.0,2400,2500,1500
2019,,3500,4000,4000


In [47]:
new_region = df_region.reindex(['2018', '2019', '2020', '2021'])
new_region

Unnamed: 0_level_0,East,West,North,South
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018,3400.0,2400.0,2500.0,1500.0
2019,,3500.0,4000.0,4000.0
2020,,,,
2021,,,,


In [49]:
# reindex has kept data where theres overlap, blank data where theres new index 
# and lost data where we havn't specified the index

In [51]:
re_indexed = new_region.reindex(columns=['North', 'South', 'New'])
re_indexed

Unnamed: 0_level_0,North,South,New
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018,2500.0,1500.0,
2019,4000.0,4000.0,
2020,,,
2021,,,


In [52]:
re_indexed.fillna(0) # fill NaN with perticular value

Unnamed: 0_level_0,North,South,New
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018,2500.0,1500.0,0.0
2019,4000.0,4000.0,0.0
2020,0.0,0.0,0.0
2021,0.0,0.0,0.0


In [53]:
new_region

Unnamed: 0_level_0,East,West,North,South
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018,3400.0,2400.0,2500.0,1500.0
2019,,3500.0,4000.0,4000.0
2020,,,,
2021,,,,


In [60]:
new_region.fillna(method='ffill') # ffill will copy the most recent value into missing value

Unnamed: 0_level_0,East,West,North,South
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018,3400.0,2400.0,2500.0,1500.0
2019,3400.0,3500.0,4000.0,4000.0
2020,3400.0,3500.0,4000.0,4000.0
2021,3400.0,3500.0,4000.0,4000.0


In [61]:
new_region.fillna(method='pad')

Unnamed: 0_level_0,East,West,North,South
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018,3400.0,2400.0,2500.0,1500.0
2019,3400.0,3500.0,4000.0,4000.0
2020,3400.0,3500.0,4000.0,4000.0
2021,3400.0,3500.0,4000.0,4000.0


In [62]:
new_region.interpolate() # looks at what happens between existing data points

Unnamed: 0_level_0,East,West,North,South
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018,3400.0,2400.0,2500.0,1500.0
2019,3400.0,3500.0,4000.0,4000.0
2020,3400.0,3500.0,4000.0,4000.0
2021,3400.0,3500.0,4000.0,4000.0


In [65]:
new_region.loc['2022'] = [6400,7400,5200,800]
new_region

Unnamed: 0_level_0,East,West,North,South
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018,3400.0,2400.0,2500.0,1500.0
2019,,3500.0,4000.0,4000.0
2020,,,,
2021,,,,
2022,6400.0,7400.0,5200.0,800.0


In [67]:
new_region.interpolate() # draws line between existing values and fills in missing gaps
new_region

Unnamed: 0_level_0,East,West,North,South
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018,3400.0,2400.0,2500.0,1500.0
2019,,3500.0,4000.0,4000.0
2020,,,,
2021,,,,
2022,6400.0,7400.0,5200.0,800.0


In [68]:
new_region.mean() # return mean per column

East     4900.000000
West     4433.333333
North    3900.000000
South    2100.000000
dtype: float64

In [69]:
new_region.fillna(new_region.mean()) # fill gaps with mean for that column

Unnamed: 0_level_0,East,West,North,South
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018,3400.0,2400.0,2500.0,1500.0
2019,4900.0,3500.0,4000.0,4000.0
2020,4900.0,4433.333333,3900.0,2100.0
2021,4900.0,4433.333333,3900.0,2100.0
2022,6400.0,7400.0,5200.0,800.0


In [74]:
new_region

Unnamed: 0_level_0,East,West,North,South
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018,3400.0,2400.0,2500.0,1500.0
2019,,3500.0,4000.0,4000.0
2020,,,,
2021,,,,
2022,6400.0,7400.0,5200.0,800.0


In [72]:
new_region.dropna() # drop any rows that contain any null values

Unnamed: 0_level_0,East,West,North,South
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018,3400.0,2400.0,2500.0,1500.0
2022,6400.0,7400.0,5200.0,800.0


In [76]:
new_region.dropna(axis=1) # drop any columns that contain null values

2018
2019
2020
2021
2022


In [78]:
new_region.dropna(axis=1, thresh=3) # keep columns with at least 3 non-null values

Unnamed: 0_level_0,West,North,South
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018,2400.0,2500.0,1500.0
2019,3500.0,4000.0,4000.0
2020,,,
2021,,,
2022,7400.0,5200.0,800.0


In [79]:
new_region['na'] = np.nan
new_region

Unnamed: 0_level_0,East,West,North,South,na
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018,3400.0,2400.0,2500.0,1500.0,
2019,,3500.0,4000.0,4000.0,
2020,,,,,
2021,,,,,
2022,6400.0,7400.0,5200.0,800.0,


In [80]:
new_region.dropna(axis=1, how='any') # drop if contain any null

2018
2019
2020
2021
2022


In [82]:
new_region.dropna(axis=1, how='all') # drop if all values are null

Unnamed: 0_level_0,East,West,North,South
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018,3400.0,2400.0,2500.0,1500.0
2019,,3500.0,4000.0,4000.0
2020,,,,
2021,,,,
2022,6400.0,7400.0,5200.0,800.0


In [83]:
new_region.drop(['2020', '2021']) # drop particular rows

Unnamed: 0_level_0,East,West,North,South,na
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018,3400.0,2400.0,2500.0,1500.0,
2019,,3500.0,4000.0,4000.0,
2022,6400.0,7400.0,5200.0,800.0,


In [84]:
new_region

Unnamed: 0_level_0,East,West,North,South,na
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018,3400.0,2400.0,2500.0,1500.0,
2019,,3500.0,4000.0,4000.0,
2020,,,,,
2021,,,,,
2022,6400.0,7400.0,5200.0,800.0,


In [86]:
new_region.loc['2017'] = [3400,2400,2500,1500, np.nan] # add new row
new_region

Unnamed: 0_level_0,East,West,North,South,na
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018,3400.0,2400.0,2500.0,1500.0,
2019,,3500.0,4000.0,4000.0,
2020,,,,,
2021,,,,,
2022,6400.0,7400.0,5200.0,800.0,
2017,3400.0,2400.0,2500.0,1500.0,


In [87]:
new_region.duplicated() # if that row is duplicated excluding index. First instance is not classed as duplicated

years
2018    False
2019    False
2020    False
2021     True
2022    False
2017     True
dtype: bool

In [88]:
new_region.sort_index(ascending=False)

Unnamed: 0_level_0,East,West,North,South,na
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022,6400.0,7400.0,5200.0,800.0,
2021,,,,,
2020,,,,,
2019,,3500.0,4000.0,4000.0,
2018,3400.0,2400.0,2500.0,1500.0,
2017,3400.0,2400.0,2500.0,1500.0,


In [89]:
new_region.sort_index(ascending=True)

Unnamed: 0_level_0,East,West,North,South,na
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017,3400.0,2400.0,2500.0,1500.0,
2018,3400.0,2400.0,2500.0,1500.0,
2019,,3500.0,4000.0,4000.0,
2020,,,,,
2021,,,,,
2022,6400.0,7400.0,5200.0,800.0,


In [90]:
new_region.drop_duplicates() # get rid of duplicated rows

Unnamed: 0_level_0,East,West,North,South,na
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018,3400.0,2400.0,2500.0,1500.0,
2019,,3500.0,4000.0,4000.0,
2020,,,,,
2022,6400.0,7400.0,5200.0,800.0,


In [91]:
new_region

Unnamed: 0_level_0,East,West,North,South,na
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018,3400.0,2400.0,2500.0,1500.0,
2019,,3500.0,4000.0,4000.0,
2020,,,,,
2021,,,,,
2022,6400.0,7400.0,5200.0,800.0,
2017,3400.0,2400.0,2500.0,1500.0,


In [92]:
new_region.West # dot notation

years
2018    2400.0
2019    3500.0
2020       NaN
2021       NaN
2022    7400.0
2017    2400.0
Name: West, dtype: float64

In [93]:
new_region['West'] # square brackets the same

years
2018    2400.0
2019    3500.0
2020       NaN
2021       NaN
2022    7400.0
2017    2400.0
Name: West, dtype: float64

In [94]:
new_region[['West', 'East']]

Unnamed: 0_level_0,West,East
years,Unnamed: 1_level_1,Unnamed: 2_level_1
2018,2400.0,3400.0
2019,3500.0,
2020,,
2021,,
2022,7400.0,6400.0
2017,2400.0,3400.0


In [95]:
new_region.loc['2022'] # .loc by index names - indexed like a dictionary

East     6400.0
West     7400.0
North    5200.0
South     800.0
na          NaN
Name: 2022, dtype: float64

In [97]:
new_region.iloc[1] # integer location - indexed like a list

East        NaN
West     3500.0
North    4000.0
South    4000.0
na          NaN
Name: 2019, dtype: float64

In [98]:
new_region

Unnamed: 0_level_0,East,West,North,South,na
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018,3400.0,2400.0,2500.0,1500.0,
2019,,3500.0,4000.0,4000.0,
2020,,,,,
2021,,,,,
2022,6400.0,7400.0,5200.0,800.0,
2017,3400.0,2400.0,2500.0,1500.0,


In [99]:
new_region.iloc[1,2] # 2nd row, 3rd value in row

4000.0

In [100]:
new_region.iloc[1,2:] # from here to end of the row

North    4000.0
South    4000.0
na          NaN
Name: 2019, dtype: float64

In [102]:
new_region.loc[['2018', '2019', '2020']] # list of rows

Unnamed: 0_level_0,East,West,North,South,na
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018,3400.0,2400.0,2500.0,1500.0,
2019,,3500.0,4000.0,4000.0,
2020,,,,,


In [103]:
new_region[new_region['North'] >= 4000] # show rows where north value is >= 4000

Unnamed: 0_level_0,East,West,North,South,na
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019,,3500.0,4000.0,4000.0,
2022,6400.0,7400.0,5200.0,800.0,


In [105]:
new_region['North'] + new_region['South'] # perform maths on the data # adding to nan results in nan

years
2018    4000.0
2019    8000.0
2020       NaN
2021       NaN
2022    6000.0
2017    4000.0
dtype: float64

In [107]:
new_region['North'].add(new_region['South'], fill_value=0) # we keep the value, nan is assumed to be 0

years
2018    4000.0
2019    8000.0
2020       NaN
2021       NaN
2022    6000.0
2017    4000.0
dtype: float64

In [108]:
new_region['North'].add(new_region['South'], fill_value=0).add(new_region['East'], fill_value=0).add(new_region['West'], fill_value=0)

years
2018     9800.0
2019    11500.0
2020        NaN
2021        NaN
2022    19800.0
2017     9800.0
dtype: float64

In [109]:
#Total for all areas

In [110]:
new_region.sum(axis=1) # better way of getting total

years
2018     9800.0
2019    11500.0
2020        0.0
2021        0.0
2022    19800.0
2017     9800.0
dtype: float64

In [112]:
new_region.sort_index(ascending=0) # can use zero instead of false

Unnamed: 0_level_0,East,West,North,South,na
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022,6400.0,7400.0,5200.0,800.0,
2021,,,,,
2020,,,,,
2019,,3500.0,4000.0,4000.0,
2018,3400.0,2400.0,2500.0,1500.0,
2017,3400.0,2400.0,2500.0,1500.0,


In [114]:
new_region.sort_values(by=['East'], ascending=0)

Unnamed: 0_level_0,East,West,North,South,na
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022,6400.0,7400.0,5200.0,800.0,
2018,3400.0,2400.0,2500.0,1500.0,
2017,3400.0,2400.0,2500.0,1500.0,
2019,,3500.0,4000.0,4000.0,
2020,,,,,
2021,,,,,


In [115]:
new_region['South'].rank(ascending=0)

years
2018    2.5
2019    1.0
2020    NaN
2021    NaN
2022    4.0
2017    2.5
Name: South, dtype: float64

In [117]:
new_region.sort_values(by=['East'], ascending=0)['South'] # sort by south and only show south

years
2022     800.0
2018    1500.0
2017    1500.0
2019    4000.0
2020       NaN
2021       NaN
Name: South, dtype: float64

In [118]:
new_region.describe() # general overview of dataset

Unnamed: 0,East,West,North,South,na
count,3.0,4.0,4.0,4.0,0.0
mean,4400.0,3925.0,3550.0,1950.0,
std,1732.050808,2373.991014,1307.669683,1405.939781,
min,3400.0,2400.0,2500.0,800.0,
25%,3400.0,2400.0,2500.0,1325.0,
50%,3400.0,2950.0,3250.0,1500.0,
75%,4900.0,4475.0,4300.0,2125.0,
max,6400.0,7400.0,5200.0,4000.0,


In [119]:
new_region.mean()

East     4400.0
West     3925.0
North    3550.0
South    1950.0
na          NaN
dtype: float64

In [120]:
new_region.sum()

East     13200.0
West     15700.0
North    14200.0
South     7800.0
na           0.0
dtype: float64

In [121]:
new_region.sum(axis=1)

years
2018     9800.0
2019    11500.0
2020        0.0
2021        0.0
2022    19800.0
2017     9800.0
dtype: float64

In [123]:
new_region.sort_index().cumsum()

Unnamed: 0_level_0,East,West,North,South,na
years,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017,3400.0,2400.0,2500.0,1500.0,
2018,6800.0,4800.0,5000.0,3000.0,
2019,,8300.0,9000.0,7000.0,
2020,,,,,
2021,,,,,
2022,13200.0,15700.0,14200.0,7800.0,


In [124]:
new_region.min()

East     3400.0
West     2400.0
North    2500.0
South     800.0
na          NaN
dtype: float64

In [125]:
new_region.max()

East     6400.0
West     7400.0
North    5200.0
South    4000.0
na          NaN
dtype: float64

In [126]:
new_region.std()

East     1732.050808
West     2373.991014
North    1307.669683
South    1405.939781
na               NaN
dtype: float64

In [127]:
new_region.plot()

ImportError: matplotlib is required for plotting when the default backend "matplotlib" is selected.

In [132]:
new_region['2022'].plot()

KeyError: '2022'

In [131]:
new_region.interpolate().plot()

ImportError: matplotlib is required for plotting when the default backend "matplotlib" is selected.