## 12. Plotting: Drawing Graphs with Python. 

### Matplotlib
Matplotlib is a *module* that contains many useful functions for turning raw data into graphs. 

In [None]:
# the line below is a magic jupyter command to make our graphs show up right - you can ignore it
%matplotlib inline
# this is the line that actually imports matplotlib module
import matplotlib.pyplot as plt

### Scatter Plots

The following is a scatter plot of the height of various people versus their weight:

In [None]:
heights = [182, 150, 197, 164, 171, 155, 187, 148, 162, 168]
weights = [81, 55, 90, 60, 65, 57, 86, 52, 61, 62]
plt.scatter(heights, weights)
plt.title('weight vs height')
plt.xlabel('Height in cm')
plt.ylabel('Weight in kg')
plt.show()

Notice that the lines of code above work as follows:

1. We call the `scatter` function and provide it with two arguments: a list of $x$ values, and a list of $y$ values
2. We call the `show` function to make our graph show up on the screen

In [None]:
plt.scatter(heights, weights, c="#5817b3")
plt.show()

In [None]:
# c="b" means color = blue
plt.scatter(heights, weights, c="y",marker="s")
plt.show()

In [None]:
# Changing it another color
plt.scatter(heights, weights, marker="*", c="#0ddb5c")
plt.show()

In [None]:
# the help function can the details of its colors
import matplotlib
help(matplotlib.colors)

In [None]:
# Helpf for markers
help(matplotlib.markers)

In [None]:
# Adding title, xlabel and ylabel to the graph
plt.title("Weight vs Height")
plt.xlabel("Height (cm)",)
plt.ylabel("Weight (kg)")
plt.scatter(heights, weights, marker="x", c="r",label="plot")
plt.legend()
plt.show()

In [None]:
monthly_salary = [10000, 1800, 3800, 4500, 3000, 4300, 4500,8300, 14500]
age = [60, 24, 32, 36, 28, 36, 32, 60, 68]

<span style="color:red;font-weight:bold">Try</span>
: Plot a scatterplot of **ages** (x) vs **monthly_salary** (y). Use the **triangle_down** marker and color green. 

In [None]:
# YOUR CODE HERE


### Line Plots

We need to use the `plot` function rather than the `scatter` function:

In [None]:
import numpy as np

x = np.arange(0,10)
y = np.arange(0,10)
plt.plot(x, y)
plt.show()

In [None]:
x

In [None]:
y

In [None]:
import numpy as np

x = np.arange(0,10,1)
y = [1,2,3,2,5,6,7,8,6,10]
plt.plot(x, y)
plt.show()

In [None]:
# create an array of numbers
x=np.arange(-3,10) 
y=x**2
plt.plot(x,y)
plt.title('a title')
plt.ylabel('y axis label')
plt.xlabel('x axis label')
# save the figure in a file
plt.savefig('sampleline.jpg')
plt.show()

### Formatting the style of plot

source: https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.plot.html

some of the line styles:
+ '-' solid line style
+ '--' dashed line style
+ '-.' dash-dot line style
+ ':' dotted line style

Colors:
+ ‘b’	blue
+ ‘g’	green
+ ‘r’	red
+ ‘c’	cyan
+ ‘m’	magenta
+ ‘y’	yellow
+ ‘k’	black
+ ‘w’	white

More colors possible: https://matplotlib.org/stable/tutorials/colors/colors.html

Some of the Markers:
+ '.' point marker
+ ',' pixel marker
+ 'o' circle marker
+ 'v' triangle_down marker
+ '^' triangle_up marker
+ '<' triangle_left marker
+ '>' triangle_right marker
+ '1' tri_down marker

In [None]:
plt.plot(x,y,'r--')
plt.show()

In [None]:
plt.plot(x,y,color='b', linestyle=':', linewidth=5)
plt.show()

In [None]:
plt.plot(x, x, 'r--', x, x**2, 'bs', x, x**3, 'g^')
plt.savefig('sample.png')#save plot as png file
plt.show()


In [None]:
import pandas as pd
df=pd.read_csv('data/insurance.csv')
df

In [None]:
plt.title("Charges vs BMI")
plt.xlabel("BMI",)
plt.ylabel("Charges")
plt.scatter(df.bmi, df.charges) #x,y
plt.show()

In [None]:
plt.title("Charges vs BMI"")
plt.xlabel("BMI",)
plt.ylabel("Charges")
# setting facecolor C2(Circle green) and edgecolor (k - black)
plt.scatter(df.bmi, df.charges,facecolor='C2', edgecolor='k') #x,y
plt.show()

In [None]:
#scatter plot of charges vs age
plt.scatter(df['age'][df['smoker']=='yes'], df['charges'][df['smoker']=='yes'],color='r', label='smoker')
plt.scatter(df['age'][df['smoker']=='no'], df['charges'][df['smoker']=='no'], color='b',alpha=0.5,label='non smoker')
plt.xlabel('age')
plt.ylabel('charges')
plt.title('Charges vs age')
plt.legend()
plt.show()

## Histogram
plt.hist()
+ rwidth - The relative width of the bars
+ bins - If bins is an integer, it defines the number of equal-width bins in the range.
+ align -{'left', 'mid', 'right'}, default: 'mid'

If bins is a sequence, it defines the bin edges



In [None]:
plt.hist(df['age'],rwidth=0.8,bins=5)
plt.title('Histogram of age')
plt.xlabel('age')
plt.ylabel('frequency')

In [None]:
# Changing the orientation to horizontal
plt.hist(df['age'],rwidth=0.5,bins=5, orientation='horizontal', color='red')
plt.title('Histogram of age')
plt.xlabel('frequency')
plt.ylabel('age')

In [None]:
plt.hist(df['age'],rwidth=0.5,bins=[20,30,40,50,60,70], color='r')
plt.title('Histogram of age')
plt.xlabel('age')
plt.ylabel('frequency')

In [None]:
plt.hist(df['age'],rwidth=0.7,bins=[20,30,40,50,60,70],align='left' ,color='c',edgecolor='black', linewidth=2)
plt.title('Histogram of age')
plt.xlabel('age')
plt.ylabel('frequency')

In [None]:
df['age'][df['sex']=='male']

In [None]:
plt.hist([df['age'][df['sex']=='male'],df['age'][df['sex']=='female']], bins=10, rwidth=0.8, color=['green','orange'],label=['men','women'])
plt.legend()
plt.title('Histogram of age per gender')
plt.xlabel('age')
plt.ylabel('frequency')

<span style="color:blue;font-weight:bold">Exercise</span>
Plot a histogram to illustrate the distribution of insurance charges for smoker and non smoker

### Box plot 
A Box Plot is also known as Whisker plot is created to display the summary of the set of data values having properties like minimum, first quartile, median, third quartile and maximum. 

In [None]:
# Creating plot
plt.boxplot(df['charges'])
 
# show plot
plt.show()

In [None]:
# Creating plot
plt.boxplot(df['bmi'])
plt.show()

## Barchart
plt.bar()

+ x - float or array-like :The x coordinates of the bars
+ height- float or array-like
+ width- float or array-like, default: 0.8
+ bottom-float or array-like, default: 0 : bar bases
+ align{'center', 'edge'}, default: 'center': Alignment of the bars to the x coordinates.

In [None]:
courses=['Javascript', 'Java','R', 'Python']
students =[20, 15, 30, 35]

In [None]:
# Figure size  
fig = plt.figure(figsize = (7, 5))
 
# creating the bar plot
plt.bar(courses, students, color='#aa80ff',edgecolor ='black', linewidth=2,width=0.5)
 
plt.xlabel("Training Courses")
plt.ylabel("Number of students")
plt.title("Students for different courses")
plt.show()

In [None]:
# Horizontal bar
plt.barh(courses, students, label="Charges" ,color='#70db70',edgecolor ='black', linewidth=2)

plt.ylabel("Training Courses")
plt.xlabel("Number of students")
plt.title('Students for different courses')
plt.show()

<span style="color:blue;font-weight:bold">Exercise</span>
Plot a bar chart to display the average(mean) insurance charges per region.


In [None]:
# Use the following data to plot the bar chart
data=df.groupby('region').mean(numeric_only=True)
data

## Pie Chart
plt.pie()

+ x - 1D array-like. The wedge sizes.
+ labels- list, default: None
+ colors - rray-like, default: None
+ autopct - None or str or callable, default: None.It is a string or function used to label the wedges with their numeric value.
+ shadow - bool, default: False
+ radius - float, default: 1 The radius of the pie.

In [None]:
# Creating plot
fig = plt.figure(figsize =(5, 5))
plt.pie(students, labels = courses)
# show plot
plt.show()

In [None]:
# changing the startangle - rotate anti-clockwise
plt.pie(students, labels = courses,startangle=90)
plt.show()

In [None]:
# stting autopct, radious and shadow
plt.pie(students, labels = courses,shadow=True, autopct='%1.2f%%',radius=1.5)
plt.show()

In [None]:
# Setting Explode
plt.pie(students, labels = courses,shadow=True, autopct='%1.2f%%',radius=1.5,explode=[0,0.0,0.1,0.1])
plt.show()
