### Plotting Problems and Practise

Practise with matplotlib and seborn and some common problems

In [None]:
# Analysis modules
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
! ls

### Seaborn and Matplotlib

In [None]:
df = pd.read_csv('lengths.csv')

In [None]:
df.head(3)

In [None]:
# Create a scatter plot with a regression line using Matplotlib
plt.scatter(df['Stamen_length'], df['Leaf_Length'])

In [None]:
# Create a scatter plot with a regression line using Matplotlib
plt.scatter(df['Stamen_length'], df['Leaf_Length'])

# Calculate the regression line
slope, intercept = np.polyfit(df['Stamen_length'], df['Leaf_Length'], 1)

# Plot the regression line
plt.plot(df['Stamen_length'], slope * df['Stamen_length'] + intercept, color='red', label='Regression Line')


In [None]:
# Create a scatter plot with a regression line using Matplotlib
plt.scatter(df['Stamen_length'], df['Leaf_Length'])

# Calculate the regression line
slope, intercept = np.polyfit(df['Stamen_length'], df['Leaf_Length'], 1)

# Plot the regression line
plt.plot(df['Stamen_length'], slope * df['Stamen_length'] + intercept, color='red', label='Regression Line')

plt.title('Scatter Plot with Linear Regression Line (Matplotlib)')
plt.xlabel('Stamen length (mm)')
plt.ylabel('Leaf length (mm)')
plt.legend()
plt.show()

In [None]:
import seaborn as sns

In [None]:
# Create a scatter plot with a regression line using Seaborn
sns.lmplot(x='Stamen_length', y='Leaf_Length', data=df);

In [None]:
import warnings
warnings.filterwarnings('ignore')

### Seaborn is quick, lovely, but not quite as flexible as matplotlib

### An example of flexibility in a matplotlib plot

In [None]:
df = pd.read_csv('health.csv')

Check what the data looks like

In [None]:
df.head(3)

plot using matplot lib.

In [None]:
plt.plot('days', 'health', data=df)

    [<matplotlib.lines.Line2D at 0x175b48e00>]  

is a matplotlib object.  

If we want to see just the plot we can use 

    plt.show()
    
or end the line with ";"

Depending on your settings, sometimes the plots does not show up properly in the notebook.  This is often solved with:

    %matplotlib inline

In [None]:
plt.plot('days', 'health', data=df)
plt.show()

In [None]:
plt.plot('days', 'health', data=df);

We need to add labels, this is easy enough - just add the two lines of code.

In [None]:
plt.plot('days', 'health', data=df)
plt.xlabel('Days tracked')
plt.ylabel('My estimated health (arbitrary scale)')
plt.show()

To get fancier we have to define the figure space, then we can change it's shape and add more information.

Define a figure space, 
    
    fig = plt.figure()  
        
put in some axes,  defining the [left, bottom, width, height] of the axes as fractions of figure width and height.  These will set the plot at the bottom left hand corner of the space, and make it the full width of the space and half as tall as the space.

    ax = fig.add_axes((0, 0, 1, 0.5))  
    


In [None]:
fig = plt.figure() 
ax = fig.add_axes((0, 0, 1, 0.5)) 

Bigger

In [None]:
fig = plt.figure() 
ax = fig.add_axes((0, 0, 2, 2)) 

Smaller

In [None]:
fig = plt.figure() 
ax = fig.add_axes((0, 0, 0.5, 0.5)) 

Add the data and labels

In [None]:
fig = plt.figure() 
# change the size of the figure you're plotting on
ax = fig.add_axes((0, 0, 1, 0.5)) 
# plot the data
ax.plot('days', 'health', data=df)
# add labels for the axes on the plot
plt.xlabel('days')
plt.ylabel('health')
# change limits of axes on the figure space
ax.set_ylim([-30, 10])
plt.show()

x and y labels belong to the plot  plt.  
limits on the axes belong to the figure ax.  
You'll get an error if you assign these wrongly. 

In [None]:
fig = plt.figure() 
# change the size of the figure you're plotting on
ax = fig.add_axes((0, 0, 1, 0.5)) 
# plot the data
ax.plot('days', 'health', data=df)
# add labels for the axes on the plot
plt.xlabel('days')
plt.ylabel('health')
# change limits of axes on the figure space
plt.set_ylim([-30, 10])
plt.show()

#### Adding text to the figure. 

Labeling your x and y axes on the plot with plt.xlabel() and plt.ylabel() is easy, so no excuse to miss this!  There is another way of doing it by setting the labels on the figure, ax:

    ax.set_xlabel('x axis label')
    
    ax.set_ylabel('y axis label')
    
The title is set with fig.text. setting location on the x and y axis, a string for the text, and positon on the horizontal (center, left or right)

    fig.text(0.5, 0.05, 'Title', ha='center')
    
You can also annotate the graph with a string, giving postion of the point, arrow type, and poistion of the text

    ax.annotate('text on graph',
        xy=(70, 1), arrowprops=dict(arrowstyle='->'), xytext=(15, -10))

include \n in the text for line breaks
        
       

In-figure text belongs to the figure and is controlled by fig.text

In [None]:
#define the plot space
fig = plt.figure() 
#set up axes
ax = fig.add_axes((0, 0, 1, 0.5)) 
#plot the data
ax.plot('days', 'health', data=df)
#add x axis
plt.xlabel('days')
#add y axiss
plt.ylabel('health')
#set y axis limits
ax.set_ylim([-30, 10])
#add a title under the plot
fig.text(0.5, 0.05, 'Stove ownership', ha='center')
#add a label into the plot
ax.annotate('The day I realised \nI could cook bacon \nwhenever I wanted',
        xy=(70, 1), arrowprops=dict(arrowstyle='->'), xytext=(15, -10));


#### Styles

In [None]:
from matplotlib import style

In [None]:
plt.style.available

In [None]:
plt.rcdefaults()

In [None]:
#set the style
plt.style.use('classic') 
#plt.xkcd()
#define the plot space
fig = plt.figure() 
#set up axes
ax = fig.add_axes((0, 0, 1, 0.5)) 
#plot the data
ax.plot('days', 'health', data=df)
#add x axis
plt.xlabel('days')
#add y axiss
plt.ylabel('health')
#set y axis limits
ax.set_ylim([-30, 10])
#add a title under the plot
fig.text(0.5, 0.05, 'Stove ownership', ha='center')
#add a label into the plot
ax.annotate('The day I realised \nI could cook bacon \nwhenever I wanted',
        xy=(70, 1), arrowprops=dict(arrowstyle='->'), xytext=(15, -10));

To return the plotting parameters to normal use:

    plt.rcdefaults()

### Saving plots

You can save your plot to file using:

    plt.savefig('title of file')
   
There are many parameters you can play with

In [None]:
#set the style
plt.xkcd()
#define the plot space
fig = plt.figure() 
#set up axes
ax = fig.add_axes((0, 0, 1, 0.5)) 
#plot the data
ax.plot('days', 'health', data=df)
#add x axis
plt.xlabel('days')
#add y axiss
plt.ylabel('health')
#set y axis limits
ax.set_ylim([-30, 10])
#add a title under the plot
fig.text(0.5, 0.05, 'Stove ownership', ha='center')
#add a label into the plot
ax.annotate('The day I realised \nI could cook bacon \nwhenever I wanted',
        xy=(70, 1), arrowprops=dict(arrowstyle='->'), xytext=(15, -10));
plt.savefig("my_plot3.pdf", transparent=True, bbox_inches='tight')

In [None]:
! ls

### Using a richer dataset to show what matplotlib can do

In [None]:
seedling = pd.read_csv('Seedling.csv')

In [None]:
seedling.head(3)

Control the colour of the points by scale with:  

    c=''  
    
Control the size of points by scale with:  

    s=''  

These can be absolute, or you can set them to a colum and show variation

This might be best displayed as 3 plots within the same figure

In [None]:
fig, axes = plt.subplots(3, 1, figsize=(10, 5),sharex=True)

axes[0].plot('Days_post_gemination', 'LRR_TPM', c='green',  data=seedling);
axes[0].set_ylabel("LRR_TPM")
axes[1].plot('Days_post_gemination', 'GCN_TPM', c='blue',   data=seedling);
axes[1].set_ylabel("GCN_TPM")
axes[2].scatter('Days_post_gemination', 'Leaf_area', c='red', s='GCN_TPM', data=seedling);
axes[2].set_ylabel("Leaf Area (mm2)")

plt.show()


In [None]:
  plt.rcdefaults()

Use Seaborn if you can as it's easy and tends to make nice plots,  but some things require Matplotlib