In [None]:
# Matplotlib
# Visualising data
# Matplotlib provides a module called pyplot as a convenient way to access the Matplotlib functionality, and it is actually with pyplot that we do the plotting.
import matplotlib.pyplot as plt
import pandas as pd # plot() method is part of pandas
import seaborn as sns # Seaborn is a library that is designed to further simplify the task of using pyplot
sns.set()           # You can set a style by calling seaborn's set() function




In [None]:
# Example #1 - Pyplot basic plotting

import matplotlib.pyplot as plt
data = {
    'NSW': {'Q1': 3.2, 'Q2': 3.4, 'Q3': 3.4, 'Q4': 3.6},
    'VIC': {'Q1': 3.5, 'Q2': 3.4, 'Q3': 3.0, 'Q4': 3.1},
}

# Set the size to be 10 inches wide by 8 inches tall
fig = plt.figure(figsize=[10, 8])

fig.suptitle('Unemployment Rates')
ax1 = fig.add_subplot(1, 2, 1)
ax1.plot(data['NSW'].keys(), data['NSW'].values())
ax1.set_title('NSW')
ax1.set_xlabel('Quarter')
ax1.set_ylabel('Unemployment (%)')

# Set the y-axis values to go from 3 to 4
ax1.set_ylim(3, 4)

# Set the y-axis ticks to be 3.0, 3.1, 3.2, ..., 4.0 
ax1.set_yticks([x/10 for x in range(30, 41)])

# Show gridlines on the axes
ax1.grid()

# Tell ax2 to share its y-axis with ax1
ax2 = fig.add_subplot(1, 2, 2, sharey=ax1)

# Specify the colour of the line
ax2.plot(data['VIC'].keys(), data['VIC'].values(), color='green')

ax2.set_title('VIC')
ax2.set_xlabel('Quarter')

# Show gridlines on the axes
ax2.grid()

# Create a bit more space around the axes, to stop overlap
fig.tight_layout()

fig.savefig('plot.png')

In [None]:
# Example #2 - Pyplot basic plotting 

import matplotlib.pyplot as plt
fig = plt.figure(figsize=(8, 8))
fig.suptitle('A figure with three axes')

ax = fig.add_subplot(2, 2, 1)
ax.bar([1,2,3,4], [1,3,1,4])
ax.set_title('An axes with bar plot')
ax.set_xlabel('x-axis, with major ticks')
ax.set_ylabel('y-axis, with major ticks')

ax = fig.add_subplot(2, 2, 2)
ax.scatter([1,2,3,4], [2,4,1,2])
ax.set_title('An axes with scatter plot and grid')
ax.set_xlabel('x-axis, with major ticks')
ax.set_ylabel('y-axis, with major ticks')
ax.set_xlim(0,5)
ax.set_ylim(0,5)
ax.grid()

ax = fig.add_subplot(2, 1, 2)
ax.plot([1,2,3,4], [1,3,2,4], label='Line 1')
ax.plot([1,2,3,4], [2,1,4,3], label='Line 2')
ax.set_title('An axes with line plot')
ax.set_xlabel('x-axis, with major and minor ticks')
ax.set_ylabel('y-axis, with major and minor ticks')
ax.minorticks_on()
ax.legend(title='Legend')

fig.tight_layout()
fig.savefig('plot.png')

In [None]:
# Example #3 - PyPlot basic plotting 
import matplotlib.pyplot as plt

nyc_temp = [53.9, 56.3, 56.4, 53.4, 54.5, 55.8, 56.8, 55.0, 55.3, 54.0, 56.7, 56.4, 57.3]

plt.plot(nyc_temp, marker='o')

print(f'Original axes ranges {plt.axis()}') # Display existing axis values (auto-generated)

newAxisRange = [0, 12, 53.4, 57.3]
plt.axis(newAxisRange)

print(f'updated axes ranges {plt.axis()}')

plt.savefig('a.png')

In [None]:
# Example #4 - Pandas plot() method 

In [None]:
# Quiz 5 (WIP)

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Question #1 
# Answer = Both pandas and seaborn are built on top of Matplotlib (confluence).
# pandas uses Matplotlib under the hood for its .plot() functionality.
# seaborn uses Matplotlib for rendering and pandas for data structures.

# Question #2
# Answer = A figure can have multiple axes and those axes only belong to that figure
# Axes object is contained within and belongs exclusively to one Figure object.

# Question #3 
# Line plots are the default kind of plot for the plot method
# Answer = If the values are numerical, it will create a line plot of the values. Otherwise, it will raise an error.
# LACK OF CONFLUENCE - gpt and gemini agree
s1 = pd.Series([1, 3, 2, 4]) # Numerical Series 
s1.plot()  # Line plot
s2 = pd.Series(['apple', 'banana', 'apple', 'orange', 'banana']) # Categorical Series
# s2.plot()  # Error

# Question #4
df = pd.DataFrame([
    {'A': 12, 'B': 20, 'C': 'D'},
    {'A': 15, 'B': 16, 'C': 'S'},
    {'A': 5, 'B': 17, 'C': 'A'},
    {'A': 18, 'B': 17, 'C': 'A'}
])
df.plot()
# Answer - For each numerical column it will create a line plot of the values. All other columns will be ignored. All plots will be on the same axes.

# Question #5
# There are 'kinds' of plots
    # hexbin kind - Type of scatter plot for overlapping points
    # Density kind - Shows density of plot
    # barh kind - horizontal bar
# Answer = hbar  (confluence + not in ed)
# 
# Question #6
data = {
    'Year': list(range(1995, 2025)),
    'SalesRevenue_Million': [
        50, 53, 55, 58, 60, 62, 65, 67, 70, 72,
        75, 78, 77, 70, 72, 80, 88, 93, 95, 98,
        100, 103, 105, 108, 110, 113, 115, 118, 120, 117
    ]
}
df = pd.DataFrame(data)
# pairplot creates a matrix of scatter plots comparing every possible pair of numerical columns of a dataframe
# histogram shows frequency 
# box plot shows spread
# Line is most appropriate given time-series (CLEARLY CORRECT ANSWER + CONFLUENCE)
df.plot(
    kind = 'line',
    x = "Year",
    y = 'SalesRevenue_Million',
    style = ':',
    title = 'Company Sales Revenue (1995-2024)',
)
plt.savefig('plot.png')

# Question #7
np.random.seed(42)
weights = np.random.randint(300, 701, size=300)
prices = weights * 3 + np.random.randint(-100, 101, size=300)
df = pd.DataFrame({
    'Weight': weights,
    'Price': prices
})
print(df.head())
df.plot(
    kind = 'scatter',
    x = 'Weight', 
    y = 'Price',
    subplots = True, layout = [3,2],
)
plt.savefig('cowcost.png')
# CORRECT ANSWER = SCATTER PLOT (confluence)

# Question #8
# Hexbin best use: good for OVERLAPPING points slightly different to a scatter plot
# Answer = D (+ confluence)
# Two numerical columns = YES --> Like scatter plot
# Show relationship = YES --> Like scatter plot 
# Points of scatter point would overlep = YES --> Perfect for hexbin

# Question #9
# From content NEED: "subplots = true"
# 3,2 as visually seems two columns will be created, 3 rows
# Answer = "subplots = True, layout = [3,2]" (confluence)

# Question #10
# Clearly shows title = ['Minimum temp', 'Mean temp', 'Maximum temp'] in Ed Discussion
# HOWEVER the ed example for 3 row graphs stacked on top of eachother
data = {
    "Year": [2021, 2022, 2023, 2024, 2025],
    "Jan": [27.1, 26.4, 27.8, 28.2, 27.5],
    "Feb": [26.3, 25.9, 26.7, 27.1, 26.8],
    "Mar": [24.5, 24.0, 24.8, 25.3, 24.9],
    "Apr": [21.2, 20.8, 21.5, 22.1, 21.7]
}
df = pd.DataFrame(data)
df.plot(
    kind = 'line',
    style = ':',
    x = 'Year',
    subplots = True, 
    layout = [2,2],
    title = ['Jan','Feb','Mar','Apr'],
    sharey = True,
)
plt.savefig("tempbymonth.png")
# Answer = "title = ['Jan','Feb','Mar','Apr']"


# Question #11
data = {
    "Jan_Temp": [27.1, 26.4, 27.8, 28.2, 27.5],
    "Jan_Precip": [102, 85, 120, 95, 110],
    "Feb_Temp": [26.3, 25.9, 26.7, 27.1, 26.8],
    "Feb_Precip": [98, 90, 115, 92, 108],
    "Mar_Temp": [24.5, 24.0, 24.8, 25.3, 24.9],
    "Mar_Precip": [110, 87, 123, 99, 105],
    "Apr_Temp": [21.2, 20.8, 21.5, 22.1, 21.7],
    "Apr_Precip": [95, 80, 100, 85, 92],
    "May_Temp": [18.5, 18.0, 18.8, 19.2, 18.7],
    "May_Precip": [80, 75, 88, 78, 82],
    "Jun_Temp": [15.2, 14.8, 15.5, 15.9, 15.3],
    "Jun_Precip": [70, 65, 75, 68, 72]
}
df = pd.DataFrame(data)
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun']
fig, axes = plt.subplots(2, 3, figsize=(15, 8), sharex=True, sharey=True)
for ax, month in zip(axes.flatten(), months):
    ax.scatter(df[f"{month}_Precip"], df[f"{month}_Temp"])
    ax.set_xlabel("Precipitation (mm)")
    ax.set_ylabel("Temperature (Â°C)")
    ax.set_title(f"{month} Temp vs Precip")
plt.savefig("tempvsprecip.png")
# Answer = sharex=True, sharey=True) (+confluence)

# Question #12
data = {
    "X": [0.2, 0.4, 0.6, 0.8, 1.0, 1.2, 1.4, 1.6, 1.8, 2.0],
    "Y": [52, 93, 15, 72, 61, 21, 83, 87, 75, 75]  
}
df = pd.DataFrame(data)
df.plot(
    kind = "bar", 
    y = "Y",
    #xticks = [0.0,0.2,0.4,0.6,0.8,1.0,1.2,1.4,1.6,1.8,2.0],
    #xticks = range(0,2, 0.2),
    #xticks = [x/10 for x in range (0,20)]
)
plt.savefig("spreads.png")
# UNCLEAR: Seems: #xticks = [0.0,0.2,0.4,0.6,0.8,1.0,1.2,1.4,1.6,1.8,2.0] is best 


# Question #13
data = {
    'Year': list(range(1995, 2025)),
    'SalesRevenue_Million': [50, 53, 55, 58, 60, 62, 65, 67, 70, 72, 75, 78, 77, 70, 72, 80, 88, 93, 95, 98, 100, 103, 105, 108, 110, 113, 115, 118, 120, 117 ],
    'NPAT_Million': [ 8, 9, 9, 10, 11, 11, 13, 13, 14, 15, 16, 17, 16, 14, 15, 17, 19, 20, 21, 22, 22, 23, 24, 25, 25, 26, 27, 28, 28, 27 ], 
}
df = pd.DataFrame(data)
df.plot(
    kind = 'line',
    x = 'Year',
    y = ['SalesRevenue_Million', 'NPAT_Million'],
    style = [':','--'],
    # grid = True, # WORKS
    # gridlines = True # ERROR
    # grid = 'On', # WORKS
    # hidegrid = False, # ERROR
)
plt.savefig('revenuevsnpat.png')
# Answer = style = [':','--']  (+confluence)


# Question #14
# Answer = grid = True  (+confluence)

# Question #15
data = {
    "cat": ["A", "B", "C", "A", "B", "C", "A", "B", "C"],
    "num1": [10, 20, 30, 15, 25, 35, 12, 22, 32],
    "num2": [5, 15, 25, 8, 18, 28, 6, 16, 26]
}
df = pd.DataFrame(data)

colours = {
    'A': 'red',
    'B': 'green',
    'C': 'blue',
}
    
df.plot(
    kind = 'scatter',
    x = 'num1',
    y = 'num2',
    c = df['cat'].map(colours),
) 
plt.savefig('num1vsnum2.png')
# The values of cat cannot be intepreted as colours (confluence)

# Question #16
# Its hue for seaplot (confluence)

# Question #17
# Answer = All of the above is true (+confluence)
# pandas.plotting = TRUE from Ed
# Histograms = TRUE from Ed
# same plots = TRUE from Ed

# Question #18
s1 = pd.Series([12, 15, 18, 14, 20, 22, 19, 25, 23, 21])
s2 = pd.Series([5.2, 6.1, 5.8, 6.5, 7.0, 6.8, 7.3, 7.5, 7.1, 6.9])
s1.plot()
s2.plot()
# Answer = Same axes, same plot with new line

# Question #19
df1 = pd.DataFrame({
    'A': [12, 15, 18, 14, 20, 22, 19, 25, 23, 21],
    'B': [5.2, 6.1, 5.8, 6.5, 7.0, 6.8, 7.3, 7.5, 7.1, 6.9]
})
df2 = pd.DataFrame({
    'C': [8, 9, 7, 10, 11, 12, 9, 13, 10, 11],
    'D': [3.5, 3.8, 4.0, 3.9, 4.1, 3.7, 4.2, 4.0, 3.6, 3.8]
})
df1.plot()
df2.plot()
# Answer is clearly = Another two line plots are created, but on a different figure from the first two line plots

# Question #20
data = {
    'Year': list(range(1995, 2025)),
    'SalesRevenue_Million': [50, 53, 55, 58, 60, 62, 65, 67, 70, 72, 75, 78, 77, 70, 72, 80, 88, 93, 95, 98, 100, 103, 105, 108, 110, 113, 115, 118, 120, 117 ],
    'NPAT_Million': [ 8, 9, 9, 10, 11, 11, 13, 13, 14, 15, 16, 17, 16, 14, 15, 17, 19, 20, 21, 22, 22, 23, 24, 25, 25, 26, 27, 28, 28, 27 ], 
}
df = pd.DataFrame(data)
df.plot(
    kind = 'line',
    x = 'Year',
    y = ['SalesRevenue_Million', 'NPAT_Million'],
    style = [':','--'],
    figsize = [11.7, 8.3],
)

# As per google: [11.7, 8.3]
# https://stackoverflow.com/questions/15571267/python-a4-size-for-a-plot
# Answer = figsize = [11.7, 8.3]

In [None]:
# Assessed Task W5 - Space Missions 

import matplotlib.pyplot as plt
import pandas as pd 

try:
    df = pd.read_csv('data.csv')     # Make sure data can be read properly
    print("Data loaded successfully.\n")    
except: 
    print("Error in reading file")   # If data cannot be read properly print error message 
    exit()                           # Exit if not read properly

print('Please view images attached.')

# Plot #1
colours = ['#000080','#C21807','#4F4F4F','#90EE90']
outcomes = df['Outcome'].value_counts()
percentages = outcomes / outcomes.sum() * 100

labels = []
for label, percent in zip(outcomes.index, percentages):
    labels.append(f"{label} ({percent:.1f}%)")

outcomes.plot(
    kind = 'pie',
    title = 'Space Mission Outcomes since 1957',
    labels = labels,
    colors = colours,
    figsize = [12, 8],
    wedgeprops={'edgecolor': 'black'},
    textprops={'color': 'black'},
)
plt.ylabel('')
plt.title('Space Mission Outcomes since 1957', color = 'black', fontsize = 24)
plt.show


# Plot #2
companysum = df.groupby('Company').size().nlargest(30)
companysum = companysum.sort_values()

companysum.plot(
    kind = 'barh',
    figsize = [16, 8],
    color = '#000080',
)

plt.title('Number of Missions by Company since 1957 (top 30 companies shown)', color = 'black', fontsize = 22)
plt.xlabel('Number of Missions', color='black',fontsize = 16)
plt.ylabel('Company', color='black',fontsize = 16)
plt.yticks(fontsize = 10, color='black')
plt.xticks(fontsize = 10, color='black')
plt.show


# Plot #3
failure_list = ['Prelaunch Failure', 'Partial Failure', 'Failure']
df['Outcome'] = df['Outcome'].apply(lambda x: 'Failure' if x in failure_list else x)

company_outcome = df.groupby(['Company', 'Outcome']).size().unstack(fill_value=0)

company_outcome['Total'] = company_outcome.sum(axis = 1)
company_outcome = company_outcome.sort_values('Total', ascending = False).head(30)
company_outcome = company_outcome.drop(columns = 'Total')

figure = company_outcome.plot(
    kind = 'barh',
    stacked = 'True',
    figsize = [16, 8],
    color=['#C21807', '#4CAF50'],
)

figure.invert_yaxis()
plt.title('Number of Missions by Company since 1957 (top 30 companies shown)', color = 'black', fontsize = 22)
plt.xlabel('Number of Missions', color='black',fontsize = 16)
plt.ylabel('Company', color='black',fontsize = 16)
plt.yticks(fontsize = 10, color='black')
plt.xticks(fontsize = 10, color='black')
plt.savefig('companymission_success_or_failure.png')
plt.show
plt.close

In [None]:
df_CPI = pd.read_csv("g01hist.csv",
    header = 1,       # header row
    index_col = 0,    # first column as index
    skiprows = range(2, 11)  # skip metadata rows
)
df_CPI.index.name = None

#print(df_CPI.columns)  # To look at current column names 
#print(df_CPI.shape)    # To analyse number of columns not including title column

df_CPI = df_CPI.iloc[:, [1, 3, 9]]
df_CPI.columns = ['CPI_change','Core_CPI_Change','TM_CPI_Change']
df_CPI = df_CPI.dropna(how='all')

df_CPI.index = pd.to_datetime(df_CPI.index, dayfirst=True)
df_CPI.index = df_CPI.index.strftime('%d/%m/%Y')

print(df_CPI.iloc[:10, :3])

## Australian Quarterly CPI Data Economist Forecast
df_CPI_fore = pd.read_csv("j01hist.csv",
    header = 1,       # header row
    index_col = 0,    # first column as index
    skiprows = range(2, 11)  # skip metadata rows
)
df_CPI_fore.index.name = None

#print(df_CPI_fore.columns)  # To look at current column names 
#print(df_CPI_fore.shape)    # To analyse number of columns not including title column

df_CPI_fore = df_CPI_fore.iloc[:, 0:5]
df_CPI_fore.columns = ['CPI_fore_period','CPI_med','CPI_mean','CPI_low','CPI_high']

df_CPI_fore.index = pd.to_datetime(
    df_CPI_fore.index, format='mixed', dayfirst=True
).strftime('%d-%m-%Y')

print(df_CPI_fore.iloc[:5, :5])