# Matplotlib

In [None]:
import matplotlib.pyplot as plt

In [None]:
# a very simple plot

xAxis = [1,2,3,4]
yAxis = [4,3,1,5]
plt.plot(xAxis,yAxis) #creates a line plot. x-axis data always comes first
plt.show() #note, plt.show() is likely unnecessary, depending on the environment you are running the script in

In [None]:
# That plot isn't that informative... lets add some labels
plt.plot(xAxis,yAxis)
plt.xlabel("x-axis label")
plt.ylabel("y-axis label")
plt.title("This is the title")
plt.show()

In [None]:
# Let's make the labels more readable and make the ticks
# look a little better as well

plt.plot(xAxis,yAxis)
plt.xlabel("x-axis label", size = 15)
plt.ylabel("y-axis label", size = 15)
plt.title("This is the title", size = 18)
plt.xticks([1,2,3,4,5])
plt.yticks([1,2,3,4,5])
plt.show()

In [None]:
# what if we want to change the color of the line?
plt.plot(xAxis,yAxis, c = "r") #multple ways to do this, https://matplotlib.org/stable/gallery/color/named_colors.html
plt.xlabel("x-axis label", size = 15)
plt.ylabel("y-axis label", size = 15)
plt.title("This is the title", size = 18)
plt.xticks([1,2,3,4,5])
plt.yticks([1,2,3,4,5])
plt.show()

In [None]:
#add a second line?
xAxis2 = [1,2,3,4,5]
yAxis2 = [1,2,3,4,5]
plt.plot(xAxis2,yAxis2, c = "g") #you can stack lines on each other
plt.plot(xAxis,yAxis, c = "r") 
plt.xlabel("x-axis label", size = 15)
plt.ylabel("y-axis label", size = 15)
plt.title("This is the title", size = 18)
plt.xticks([1,2,3,4,5])
plt.yticks([1,2,3,4,5])

plt.show()

In [None]:
# add a legend?

xAxis2 = [1,2,3,4,5]
yAxis2 = [1,2,3,4,5]
plt.plot(xAxis2,yAxis2, c = "g", label = "Line 2") #legends will display the "label" for each line
plt.plot(xAxis,yAxis, c = "r", label = "Line 1") 
plt.xlabel("x-axis label", size = 15)
plt.ylabel("y-axis label", size = 15)
plt.title("This is the title", size = 18)
plt.xticks([1,2,3,4,5])
plt.yticks([1,2,3,4,5])
plt.legend(fontsize = 20, loc = 8) # draw the legend, locations https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.legend.html
plt.show()

In [None]:
# do the same plot, but as a scatter plot?

xAxis2 = [1,2,3,4,5]
yAxis2 = [1,2,3,4,5]
plt.scatter(xAxis2,yAxis2, c = "g", label = "Line 2") 
plt.scatter(xAxis,yAxis, c = "r", label = "Line 1") 
plt.xlabel("x-axis label", size = 15)
plt.ylabel("y-axis label", size = 15)
plt.title("This is the title", size = 18)
plt.xticks([1,2,3,4,5])
plt.yticks([1,2,3,4,5])
plt.legend(fontsize = 10) 
plt.show()

# Example 1
Write a program that allows a user to input a number, then generates that many pairs of dice rolls. Plot a histogram of the sum of each roll. Then write a program that does the same thing, but also generates a random number between 2 and 12 and compares the two distributions.

In [None]:
import random
numRolls = int(input("How many dice rolls should be simulated? "))
rollSums = []
for i in range(numRolls):
    d1 = random.randint(1,6)
    d2 = random.randint(1,6)
    
    rollSums.append(d1 + d2)
    
plt.hist(rollSums, bins = 11) #bins defaults to 10
plt.xlim(1,13)
plt.xlabel('Sum of dice roll')
plt.ylabel("Num. of occurrences")
plt.show()

In [None]:
# What about comparing distributions?

#compare distributions of dice rolls vs randomly generating a number between 2 and 12

numRolls = 10000

rollSums = []
singleRoll = []
for i in range(numRolls):
    d1 = random.randint(1,6)
    d2 = random.randint(1,6)
    
    rollSums.append(d1 + d2)
    
    d3 = random.randint(2,12)
    singleRoll.append(d3)
plt.hist(rollSums, bins = 11, alpha = .3) #alpha adds transparency to plotted data
plt.hist(singleRoll, bins = 11, color= "g", alpha = .3)
plt.xlim(1,13)
plt.show()

# Example 2 - working with categorical data

Download the Game_of_Thrones_Script.csv from Canvas. Create a bar chart showing the number of lines spoken for the 10 characters with the most speaking lines througout the show. Also create a bar chart plotting the number of words spoken (for the 10 characters with the most words spoken) througout the show.

CSV stands for comma separated values - we will talk more about working with these files when we talk about pandas next week

In [None]:
# Read in the data using csv, look at the first few lines of the data
import csv

got = open("Files/Game_of_Thrones_Script.csv")
scriptReader = csv.reader(got)

# lets look at how this file opens
counter = 0
for line in scriptReader:
    if counter < 10: 
        print(line)
    counter += 1
    print()
got.close()

In [None]:
# now lets actually count the lines

got = open("Files/Game_of_Thrones_Script.csv")
scriptReader = csv.reader(got)

charLineCounts = dict() #keys are character names, values are how many lines they have

#get line counts for each character
counter = 0
for line in scriptReader:
    if counter != 0: #we want to skip the header line
        character = line[4]
        if character not in charLineCounts:
            charLineCounts[character] = 1
        else: 
            charLineCounts[character] += 1  
    counter += 1
got.close() 

    
#determine cutoff for top 10 character words spoken
lineCounts = list(charLineCounts.values())
lineCounts = sorted(lineCounts, reverse = True) #sort in descending order
cutoff = lineCounts[9] #10th most spoken lines

characterList = [] #list of character names for plotting
linesList = [] # list of character spoken lines for plotting
for character in charLineCounts:
    if charLineCounts[character] >= cutoff: #then they are in the top 10 most spoken characters
        characterList.append(character)
        linesList.append(charLineCounts[character])

#make plot
plt.bar(characterList,linesList)
plt.xlabel("Character")
plt.ylabel("Number of lines")
plt.show()






In [None]:
#The above plot's x axis tick labels aren't readable... lets fix that


plt.bar(characterList,linesList)
plt.xlabel("Character")
plt.ylabel("Number of lines")
plt.xticks(rotation = 90)
plt.show()

In [None]:
# What about counting total words? - Take a minute and think how we would do that

# if we want to count total words

got = open("Files/Game_of_Thrones_Script.csv")
scriptReader = csv.reader(got)

charWordCounts = dict()

#get word counts for each character
counter = 0
for line in scriptReader:
    if counter != 0: #we want to skip the header line
        character = line[4]
        words = line[5].split()
        if character not in charWordCounts:
            charWordCounts[character] = len(words)
        else: 
            charWordCounts[character] += len(words)  
    counter += 1
got.close()   
    
#determine cutoff for top 10 character words spoken
wordCounts = list(charWordCounts.values())
wordCounts = sorted(wordCounts, reverse = True) #sort in descending order
cutoff = wordCounts[9] #10th most spoken words

characterList = [] #list of character names for plotting
wordsList = [] # list of character spoken words for plotting
for character in charWordCounts:
    if charWordCounts[character] >= cutoff: #then they are in the top 10 most spoken characters
        characterList.append(character)
        wordsList.append(charWordCounts[character])

#make plot
plt.bar(characterList,wordsList)
plt.xlabel("Character")
plt.ylabel("Number of words spoken")
plt.xticks(rotation = 90)
plt.show()







In [None]:
#as a pie chart

#make plot
plt.figure(figsize = (10,5), dpi = 200)
plt.pie(wordsList, autopct='%1.1f%%')#autopct adds percent to the plot
plt.legend(characterList, loc = 5) #loc = 5 puts the legend on the right
plt.axis("equal") #makes sure plot is drawn as a circle
plt.show()




# Example 3
Download apple.txt and gamestop.txt from Canvas

First create 4 total lists: gamestopDate, appleDate, gamestopPrice, applePrice

Use these lists to plot the share price of the 2 companies for the last year, apple in red and gamestop in black. Add a legend, x- and y-axis labels, and a title

In [None]:
appleData = list(open('Files/apple.txt'))
gamestopData = list(open('Files/gamestop.txt'))


gamestopDate = []
gamestopPrice = []
appleDate = []
applePrice = []


# split the data into the x and y axis
for line in gamestopData:
    line = line.split()
    gamestopDate.append(int(line[0]))
    gamestopPrice.append(float(line[1]))
    
for line in appleData:
    line = line.split()
    appleDate.append(int(line[0]))
    applePrice.append(float(line[1]))
    
    
plt.plot(appleDate,applePrice, c = "r", label = "Apple")
plt.plot(gamestopDate,gamestopPrice,c = 'k', label = "Gamestop")
plt.legend()
plt.xlabel("Closing day since April 11, 2020")
plt.ylabel("Closing price")

plt.show()



# Change size/resolution/save the plot to your computer

In [None]:
#what if you want to change the size of the plot?
plt.figure(figsize=[10,10], dpi= 300) #set width and height in inches, dpi is resolution
plt.plot(appleDate,applePrice, c = "r", label = "Apple")
plt.plot(gamestopDate,gamestopPrice,c = 'k', label = "Gamestop")
plt.legend()
plt.xlabel("Closing day since April 11, 2020")
plt.ylabel("Closing price")

plt.savefig("MyFig") #must be before plt.show(), plt.show() clears the figure
plt.show()

