## HDB Resale Price Index 

### Aim: What is the HDB Resale Price Index Trend Over The Last 10 Years?

### Dataset 2:

#### This dataset shows the overall price movement of the public residential market and the index is based on the quarterly average resale price by the date of registration, which comprises of the quarter and index variables respectively.

#### Chart Type: Line Graph

#### Source: https://data.gov.sg/dataset/hdb-resale-price-index

### Methodology

#### Step 1: Import the required libraries

In [None]:
import numpy as np
import matplotlib.pyplot as plt

#### Step 2: Import the required dataset

In [None]:
filename = 'C:\\Users\Jeffrey Wong\SP_Assignment_Python\HDB_resale_price_index.csv'

data = np.genfromtxt(filename, skip_header = 1, dtype = [('quarter', 'U10'), ('index', 'f8')], delimiter = ',',
                    missing_values = ['na', '-', ''], filling_values = 0)

#### Step 3: Data Cleaning, Manipulation & Extraction

##### Use subsetting to extract the exact location of an element in the dataset and store them into new variables respectively. 

In [None]:
### get the index of an element for the first quarter of year 2008
index_2008 = np.where(data['quarter'] == '2008-Q1') # subsetting
first_quarter_2008 = index_2008[0][0] # indexing 
print("The indexing value for the first quarter of 2008 is " + str(first_quarter_2008))

### get the index of an element for the first quarter of year 2019
index_2019 = np.where(data['quarter'] == '2019-Q1')
first_quarter_2019 = index_2019[0][0]
print("The indexing value for the first quarter of 2019 is " + str(first_quarter_2019))

##### Extract the data over the last 10 years (from 2008 to 2018) through slicing method and store them into a new variable

In [None]:
data_10_years = data[first_quarter_2008:first_quarter_2019]
print(data_10_years)

##### Extract the values for both quarter and index from the new dataset and store them with new variables respectively

In [None]:
x = data_10_years['quarter']
y = data_10_years['index']

#### Step 4: Data Visualization using Matplotlib

##### Define the function to create a line graph

In [None]:
def line_graph(x, y):
    ### to create line graph  
    fig, ax = plt.subplots(1, figsize = (15,7))

    ax.plot(x, y, color = 'cyan', marker = 'o', mfc = 'blue', mec = 'black', markersize = 8, label = 'Resale Price Index')

    # add a title and axes labels 
    ax.set_title('HDB Resale Price Index Trend)', fontsize = 20, fontweight = 'bold')
    ax.set_xlabel('Quarter', fontsize = 15, fontweight = 'bold')
    ax.set_ylabel('Index', fontsize = 15, fontweight = 'bold')

    # adjust both axes ticks values
    ax.tick_params(axis = "y", labelsize = 12, length = 10, width = 2.0, labelcolor = 'black', colors = 'red')
    ax.tick_params(axis = "x", labelsize = 12, length = 10, width = 2.0, labelcolor = 'black', colors = 'red', rotation = 90)

    # removing top and right borders
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)

    # add major gridlines
    ax.grid(axis = 'y', color = 'blue', linestyle = '--', linewidth = 0.8, alpha = 0.5)

    # add text on data points 
    # add text on the quarter year with max resale price index value
    max_resale_values = np.max(y) # y-coordinate 
    max_resale_quarter = np.where(x[np.argmax(y)] == x) # x-coordinate 
    ax.annotate("Quarter: {}\n Index: {}".format(x[max_resale_quarter[0]], max_resale_values) , xy = (max_resale_quarter[0],max_resale_values), 
                xytext = (max_resale_quarter[0], 135), arrowprops = dict(arrowstyle = "-", color = 'r', linewidth = 2),
                horizontalalignment = 'center', verticalalignment = 'top', fontweight = 'bold')

    # add text on last quarter year
    ax.annotate("Quarter: {}\n Index: {}".format(x[43], y[43]),xy = (43,y[43]), xytext = (43, 110), 
                arrowprops = dict(arrowstyle = "-", color = 'm', linewidth = 2), horizontalalignment = 'right', 
                verticalalignment = 'top', fontweight = 'bold')
    
    # add text on second last quarter year 
    ax.annotate("Quarter: {}\n Index: {}".format(x[42], y[42]),xy = (42,y[43]), xytext = (42, 140), 
                arrowprops = dict(arrowstyle = "-", color = 'm', linewidth = 2), horizontalalignment = 'right', 
                verticalalignment = 'bottom', fontweight = 'bold')

    # add text on third last quarter year
    ax.annotate("Quarter: {}\n Index: {}".format(x[41], y[41]),xy = (41,y[41]), xytext = (41, 120), 
                arrowprops = dict(arrowstyle = "-", color = 'm', linewidth = 2), horizontalalignment = 'right', 
                verticalalignment = 'top', fontweight = 'bold')
    
    # add text on forth last quarter year
    ax.annotate("Quarter: {}\n Index: {}".format(x[40], y[40]),xy = (40,y[40]), xytext = (35, 125), 
                arrowprops = dict(arrowstyle = "-", color = 'm', linewidth = 2), horizontalalignment = 'right', 
                verticalalignment = 'top', fontweight = 'bold')
    
    ### add legend
    plt.legend(loc = 'lower right', facecolor = 'white', edgecolor = 'black', shadow = True, fontsize = 12)
    
    fig.savefig('linegraph.png')
    plt.show()

##### Call the function to display the line graph

In [None]:
line_graph(x, y)

### Simple Text Based Analysis using NUMPY

In [None]:
print("***** HDB Resale Price Index Trend Over The Last 10 Years *****")
print()

### Number of rows of data in this dataset
no_of_rows = len(data)
print("There are " + str(no_of_rows) + " rows in this dataset from " + filename)
print()

### display the classification of the data type structure on each column in this dataset
print("Classification of the Data Type Structure on Each Column: ")
print("---------------------------------------------------------------")
data_type_1 = type(data['quarter'][0])
data_type_2 = type(data['index'][0])
print("The data type for 'quarter' in column 1 is " + str(data_type_1))
print("The data type for 'index' in column 2 is " + str(data_type_2))
print()

### display the resale price index values for the last 4 quarters years 
print("The HDB resale price index over the last 4 quarterly are: ")
for i in range(len(x)-4, len(x)):
    print("Quarter: " + x[i] + " Index: " + str(y[i]))
print()

### display the value which has the lowest resale price index occurs in a particular year
minimum_resale_price_index = y.min()
minimum_resale_price_indexing = y.argmin()
print("The minimum HDB resale price index is " + str(minimum_resale_price_index) + " in the year of " 
      + x[minimum_resale_price_indexing])
print()

### display the value which has the highest resale price index occurs in a particular year
maximum_resale_price_index = y.max()
maximum_resale_price_indexing = y.argmax()
print("The maximum HDB resale price index is " + str(maximum_resale_price_index) + " in the year of " 
      + x[maximum_resale_price_indexing])
print()

### compute and display the median resale price index over the last 10 years 
median_resale_price_index = np.median(y)
print("The median HDB resale price index over the last 10 years is " + str(median_resale_price_index))
print()

### compute and display the average resale price index over the last 10 years 
average_resale_price_index = np.mean(y)
print("The average HDB resale price index over the last 10 years is {:.2f}".format(average_resale_price_index))
print()

### compute and display the standard deviation resale price index over the last 10 years
std_dev_resale_price_index = np.std(y)
print("The standard deviation HDB resale price index over the last 10 years is {:.2f}".format(std_dev_resale_price_index))
print()

### compute and display the 25th and 75th percentile of resale price index over the last 10 years
percentile_resale_price_index = np.percentile(y, [25,75])
print("The 25th and 75th percentile of resale price index over the last 10 years is " + 
      str(percentile_resale_price_index) + " respectively.")