In [None]:
import os
import numpy as np
import panel as pn
pn.extension('plotly')
import plotly.express as px

import matplotlib.pyplot as plt 
import seaborn as sns
import pandas as pd
import holoviews as hv
import hvplot.pandas
from plotly import subplots 
from plotly import tools 
import chart_studio
import chart_studio.plotly as py

import cufflinks as cf
cf.set_config_file(theme='solar', 
                   sharing='public', 
                   offline=True,
                   offline_connected=True,
                   offline_show_link=True)
%matplotlib inline 

## 

## Read in all csv files for analysis for Australia

In [None]:
#Load csv files
asx = pd.read_csv("../Data/ProcessedData/asx_june_cleaned.csv")
btc = pd.read_csv("../Data/ProcessedData/BTC_USD_df.csv")
gdp = pd.read_csv("../Data/ProcessedData/GDP_selected_countries_cleaned.csv")
gdp_bar = pd.read_csv("../Data/ProcessedData/GDP_selected_countries_cleaned.csv")
cash_rate = pd.read_csv("../Data/ProcessedData/cashrate_cleaned.csv")
population = pd.read_csv("../Data/ProcessedData/Population_June_Cleaned.csv")
employment = pd.read_csv("../Data/ProcessedData/employment_cleaned.csv")
economy = pd.read_csv("../Data/ProcessedData/Economy_June_Cleaned.csv")
household_income = pd.read_csv("../Data/ProcessedData/Household_Income_cleaned.csv")
sp500 = pd.read_csv("../Data/ProcessedData/SP500_history_cleaned.csv")

## 

## What does the Australian GDP look like? Is AUS GDP similar for the same time-period (1995 - 2020) across countries in similar economic arenas?

In [None]:
# Rename Year column
gdp_new = gdp.rename(columns={"Unnamed: 0": "Year"})
gdp_bar_new = gdp.rename(columns={"Unnamed: 0": "Year"})

# Set index for 1st df and convert Year to object for 2nd df
gdp_new.set_index("Year", inplace=True)
gdp_bar_new["Year"] = gdp_bar_new.Year.astype(str)
#gdp_drop_unnamed = gdp_new.loc[:, ~gdp_new.columns.str.contains('^Unnamed')]

In [None]:
#Create bar chart
gdp_hvplot_bar = gdp_bar_new.hvplot.bar(groupby="Year",
                                        title="GDP across all countries")
gdp_hvplot_bar

In [None]:
#Create line plots
gdp_hvplot_line = gdp_new.hvplot.line(title="GDP across all countries")
gdp_hvplot_line

In [None]:
# Combine the plots above to put them into one pane
comb_gdp = gdp_hvplot_line + gdp_hvplot_bar 
comb_gdp

In [None]:
# Generate same plots as above but using cufflinks
gdp_fig1 = gdp_new.iplot(kind='line',
                         dash='dash',
                         title="GDP across major countries",
                         xTitle="Year",
                         yTitle='GDP (% change)',
                         asImage=True,
                         filename='GDP_world_line')             
             
gdp_fig2 = gdp_new.iplot(kind='bar',
                         dash='dash',
                         title="GDP across major countries",
                         xTitle="Year",
                         yTitle='GDP (% change)',
                         asImage=True,
                         filename='GDP_world_bar')

#Create the plot matrix:
fig = subplots.make_subplots(rows=2, cols=1)

#Add traces, use  'add_traces', or 'append_trace'.
fig.add_traces(gdp_fig1)
fig.add_traces(gdp_fig2)

fig['layout'].update(height=600,
                     width=600)

## 

### 

## What does Australia's demographic information look like?

In [None]:
# Get demogrpahic data
year = population[["Year"]]

total_population = population[["EstimatedResidentPopulation"]]
total_employed = employment[["TotalEmployed"]]
total_employed_males = employment[["TotalEmployed_M"]]
total_employed_females = employment[["TotalEmployed_F"]]
total_unemployed = employment[["TotalUnemployed"]]
total_unemployed_males = employment[["TotalUnemployed_M"]]
total_unemployed_females = employment[["TotalUnemployed_F"]]

In [None]:
# Create df to correlate
aus_demographics_df = pd.concat([year,      
                                 total_population,
                                 total_employed, 
                                 total_employed_males,
                                 total_employed_females,
                                 total_unemployed,
                                 total_unemployed_males,
                                 total_unemployed_females], axis=1)

# Rename to easy convention
aus_demographics_renamed_df = aus_demographics_df.rename(columns={"EstimatedResidentPopulation": "Pop",
                                                                  "TotalEmployed": "Tot Emp",
                                                                  "TotalEmployed_M": "Tot Emp_M",
                                                                  "TotalEmployed_F": "Tot Emp_F",
                                                                  "TotalUnemployed": "Tot Unemp",
                                                                  "TotalUnemployed_M": "Tot Unemp_M",
                                                                  "TotalUnemployed_F": "Tot Unemp_F"})

#Check dataset
aus_demographics_renamed_df.set_index("Year", inplace=True)
aus_demographics_renamed_df.head()

In [None]:
# Create second dataframe without total population
aus_demographics_renamed_d2 = aus_demographics_renamed_df.drop("Pop", axis=1)

aus_demographics_renamed_d2.head()

In [None]:
# Clean dataframe
demographic_corr_df = aus_demographics_renamed_df.reset_index()
del demographic_corr_df["Year"]
demographic_corr_df.head()

# Run correlation
demographic_corr = demographic_corr_df.corr()
demographic_corr

In [None]:
# Generate line graphs using Cufflinks
aus_demographics_fig1 = aus_demographics_renamed_df.iplot(kind='line',
                                             title="Aus demographics",
                                             xTitle="Year",
                                             yTitle='Demographics')         
             
aus_demographics_fig2 = aus_demographics_renamed_d2.iplot(kind='line',
                                                          title="Aus demographics dopped total population",
                                                          xTitle="Year",
                                                          yTitle='Demographics')  

#Create the plot matrix:
fig = subplots.make_subplots(rows=3, cols=1)

#Add traces, use  'add_traces', or 'append_trace'.
fig.add_traces(aus_demographics_fig1)
fig.add_traces(aus_demographics_fig2)

fig['layout'].update(height=600,
                     width=600)

In [None]:
# Generate heatmap using Seaborn
matrix = np.triu(demographic_corr)
cmap = sns.diverging_palette(230, 20, as_cmap=True)
h = plt.axes()

plot = sns.heatmap(demographic_corr, 
                   vmin=-1, 
                   vmax=1, 
                   annot=True,
                   cmap=cmap, 
                   mask=matrix)

sns.set(rc={'figure.figsize':(10,7)})
h.set_title('Aus demographic data')

## 

### 

## What does Australia's financials look like?

In [None]:
# Get Australia's financial data
year = population[["Year"]]
asx_price = asx[["Price"]]
cash_rate_target = cash_rate[["Cash Rate Target"]]
overnight_cash_rate = cash_rate[["Interbank Overnight Cash Rate"]]
household_income_total = household_income[["TotalIncome"]]
household_savings_total = household_income[["TotalSavings"]]
net_savings = economy[["Net Savings"]]
gross_income = economy[["Total Use of Gross Income"]]

In [None]:
# Create df to correlate
aus_financials_df = pd.concat([year,
                               cash_rate_target,
                               overnight_cash_rate,
                               household_income_total,
                               household_savings_total,
                               net_savings,
                               gross_income], axis=1)

# Rename to easy convention
aus_financials_renamed_df = aus_financials_df.rename(columns={"Total Use of Gross Income": "Gross Income"})

#Check dataset
aus_financials_renamed_df.set_index("Year", inplace=True)
aus_financials_renamed_df.head()

In [None]:
# Create dataframe without Gross Income
aus_financials_renamed_d2 = aus_financials_renamed_df.drop(["Gross Income"], axis=1)

aus_financials_renamed_d2.head()

In [None]:
# Run correlation
aus_financials_corr_df = aus_financials_renamed_df.reset_index()
del aus_financials_corr_df["Year"]
aus_financials_corr = aus_financials_corr_df.corr()
aus_financials_corr

In [None]:
# Create line plots using Cufflink
aus_demographics_fig1 = aus_financials_renamed_df.iplot(kind='line',
                                                        title="Aus financials",
                                                        xTitle="Year",
                                                        yTitle='Australian Dollars ($)')        
             
aus_demographics_fig2 = aus_financials_renamed_d2.iplot(kind='line',
                                                        title="Aus financials dropped Gross Income",
                                                        xTitle="Year",
                                                        yTitle='Australian Dollars ($)')    

#Create the plot matrix:
fig = subplots.make_subplots(rows=2, cols=1)

#Add traces, use  'add_traces', or 'append_trace'.
fig.add_traces(aus_demographics_fig1)
fig.add_traces(aus_demographics_fig2)

fig['layout'].update(height=600,
                     width=600)

In [None]:
# Create heatmap using Seaborn
matrix = np.triu(aus_financials_corr)
cmap = sns.diverging_palette(230, 20, as_cmap=True)
h = plt.axes()

plot = sns.heatmap(aus_financials_corr, 
                   vmin=-1, 
                   vmax=1, 
                   annot=True,
                   cmap=cmap, 
                   mask=matrix)

sns.set(rc={'figure.figsize':(10,7)})
h.set_title('Aus financials')

## 

## What are the key factors driving GDP - demonstrate using CorrPlot


In [None]:
# Clean population data
del population["Unnamed: 0"]
population.head()

In [None]:
# Get data to run correlation
year = population[["Year"]]
gdp_aus_new = gdp_new[["Australia"]].reset_index(drop=True)

total_population = population[["EstimatedResidentPopulation"]]
total_employed = employment[["TotalEmployed"]]
total_employed_males = employment[["TotalEmployed_M"]]
total_employed_females = employment[["TotalEmployed_F"]]
total_unemployed = employment[["TotalUnemployed"]]
total_unemployed_males = employment[["TotalUnemployed_M"]]
total_unemployed_females = employment[["TotalUnemployed_F"]]

cash_rate_target = cash_rate[["Cash Rate Target"]]
overnight_cash_rate = cash_rate[["Interbank Overnight Cash Rate"]]
household_income_total = household_income[["TotalIncome"]]
household_savings_total = household_income[["TotalSavings"]]
net_savings = economy[["Net Savings"]]
gross_income = economy[["Total Use of Gross Income"]]

In [None]:
# Create df to correlate
correlation_df = pd.concat([gdp_aus_new,
                            total_population,
                            total_employed, 
                            total_employed_males,
                            total_employed_females,
                            total_unemployed,
                            total_unemployed_males,
                            total_unemployed_females,
                            cash_rate_target,
                            overnight_cash_rate,
                            household_income_total,
                            household_savings_total,
                            net_savings,
                            gross_income], axis=1)

# Rename to easy convention
correlation_renamed_df = correlation_df.rename(columns={"Australia": "Aus_GDP",
                                                        "EstimatedResidentPopulation": "Pop",
                                                        "TotalEmployed": "Tot Emp",
                                                        "TotalEmployed_M": "Tot Emp_M",
                                                        "TotalEmployed_F": "Tot Emp_F",
                                                        "TotalUnemployed": "Tot Unemp",
                                                        "TotalUnemployed_M": "Tot Unemp_M",
                                                        "TotalUnemployed_F": "Tot Unemp_F",
                                                        "Total Use of Gross Income": "Gross Income"})

#Check dataset
correlation_renamed_df.head()

In [None]:
# Run correlation on the full Aus dataset
correlation = correlation_renamed_df.corr()
correlation

In [None]:
# Create heatmap using hvplot
heatmap_plot = correlation.hvplot.heatmap(title="Aus economy",
                                          ymarks_hover_color='cyan',
                                          hover_color='cyan',
                                          alpha=0.9,
                                          colorbar=True,
                                          cmap='bjy')
heatmap_plot

In [None]:
# Create heatmap using Cufflinks
corrplot = correlation.iplot(kind='heatmap',
                             title="Aus economy",
                             asFigure=True,
                             asImage=True,
                             colorscale='rdbu')
corrplot

In [None]:
# Create heatmap using Seaborn
matrix = np.triu(correlation)
cmap = sns.diverging_palette(230, 20, as_cmap=True)
h = plt.axes()

plot = sns.heatmap(correlation, 
                   vmin=-1, 
                   vmax=1, 
                   annot=True,
                   cmap=cmap, 
                   mask=matrix)


sns.set(rc={'figure.figsize':(10,7)})
h.set_title('Aus Economy')
plot.figure.savefig("../Images/CorrelationPlot_Aus_all.png")

In [None]:
# Create lower triangle using Seaborn
correlation_renamed_df['Year'] = population[["Year"]]

seaborn_corrplot = sns.pairplot(correlation_renamed_df[['Year','Aus_GDP', 'Pop', 'Tot Emp', 'Tot Unemp', 'Cash Rate Target', 
                                                        'TotalIncome', 'TotalSavings', 'Net Savings', 'Gross Income']],
                                hue='Year', palette='husl', markers="o", diag_kind='kde', corner=True)


seaborn_corrplot.fig.suptitle('Feature Relations')
seaborn_corrplot.fig.suptitle("Aus Economy", y=1.08) 

plt.show()
seaborn_corrplot.savefig("../Images/CorrelationPairPlot_Aus_all.png")

## 

## Read in all csv files for analysis for World

## GDP growth

In [None]:
# Get world data 
gdp_growth = pd.read_csv("../Data/ProcessedData/aus_vs_world_gdp_growth_cleaned.csv")
goods_export = pd.read_csv("../Data/ProcessedData/aus_vs_world_goods_export_cleaned.csv")
world_inflation = pd.read_csv("../Data/ProcessedData/aus_vs_world_inflation_cleaned.csv")
world_population = pd.read_csv("../Data/ProcessedData/aus_vs_world_population_cleaned.csv")

In [None]:
# Create line plot for GDP growth rate across all countries
gdp_growth_clean = gdp_growth.drop('Unnamed: 0', axis=1).set_index("Year")

gdp_growth_line = gdp_growth_clean.hvplot.line(title='GDP growth',
                                               ylabel='Percentage change')
gdp_growth_line

In [None]:
# Create bar plot for GDP growth rate across all countries
gdp_growth["Year"] = gdp_growth.Year.astype(str)

gdp_growth_clean2 = gdp_growth.drop('Unnamed: 0', axis=1)
gdp_growth_bar = gdp_growth_clean2.hvplot.bar(groupby="Year")
gdp_growth_bar

In [None]:
# Combine the plots for GDP growth rate across all countries
gdp_growth_comb = gdp_growth_line + gdp_growth_bar
gdp_growth_comb

In [None]:
# Create line plot and bar graph using Cufflinks
gdp_growth_fig1 = gdp_growth_clean.iplot(kind='line',
                                         dash='dash',
                                         title="GDP growth across major countries",
                                         xTitle="Year",
                                         yTitle='GDP (% change)')             
             
gdp_growth_fig2 = gdp_growth_clean.iplot(kind='bar',
                                         dash='dash',
                                         title="GDP growth across major countries",
                                         xTitle="Year",
                                         yTitle='GDP (% change)')

#Create the plot matrix:
fig = subplots.make_subplots(rows=2, cols=1)

#Add traces, use  'add_traces', or 'append_trace'.
fig.add_traces(gdp_growth_fig1)
fig.add_traces(gdp_growth_fig2)

fig['layout'].update(height=600,
                     width=600) 

## 

## Goods export

In [None]:
# Clean world data for goods export and plot line graph
goods_export_clean = goods_export.drop('Unnamed: 0', axis=1).set_index("Year")

goods_export_line = goods_export_clean.hvplot.line(title='Goods export',
                                                   ylabel='Percentage change')
goods_export_line

In [None]:
# Clean bar graph for goods export
goods_export["Year"] = goods_export.Year.astype(str)

goods_export_clean2 = goods_export.drop('Unnamed: 0', axis=1)
goods_export_bar = goods_export_clean2.hvplot.bar(groupby="Year")
goods_export_bar

In [None]:
# Combine the plots for goods exported from all countries
goods_export_comb = goods_export_line + goods_export_bar
goods_export_comb

In [None]:
# Create line plot and bar graph using Cufflinks
goods_export_fig1 = goods_export_clean.iplot(kind='line',
                                             dash='dash',
                                             title="Goods export",
                                             xTitle="Year",
                                             yTitle='GDP (% change)')             
             
goods_export_fig2 = goods_export_clean.iplot(kind='bar',
                                             dash='dash',
                                             title="Goods export",
                                             xTitle="Year",
                                             yTitle='GDP (% change)')

#Create the plot matrix:
fig = subplots.make_subplots(rows=2, cols=1)

#Add traces, use  'add_traces', or 'append_trace'.
fig.add_traces(goods_export_fig1)
fig.add_traces(goods_export_fig2)

fig['layout'].update(height=600,
                     width=600) 

## 

## World Inflation

In [None]:
# Clean world data for world inflation and plot line graph
world_inflation_clean = world_inflation.drop('Unnamed: 0', axis=1).set_index("Year")

world_inflation_line = world_inflation_clean.hvplot.line(title='World inflation',
                                                         ylabel='Percentage change')
world_inflation_line

In [None]:
# Clean bar graph for world inflation 
world_inflation["Year"] = world_inflation.Year.astype(str)

world_inflation_clean2 = world_inflation.drop('Unnamed: 0', axis=1)
world_inflation_bar = world_inflation_clean2.hvplot.bar(groupby="Year")
world_inflation_bar

In [None]:
# Combine the plots for goods exported from all countries
world_inflation_comb = world_inflation_line + world_inflation_bar
world_inflation_comb

In [None]:
# Create line plot and bar graph using Cufflinks
world_inflation_fig1 = world_inflation_clean.iplot(kind='line',
                                             dash='dash',
                                             title="world_inflation",
                                             xTitle="Year",
                                             yTitle='GDP (% change)')             
             
world_inflation_fig2 = world_inflation_clean.iplot(kind='bar',
                                             dash='dash',
                                             title="World Inflation",
                                             xTitle="Year",
                                             yTitle='GDP (% change)')

#Create the plot matrix:
fig = subplots.make_subplots(rows=2, cols=1)

#Add traces, use  'add_traces', or 'append_trace'.
fig.add_traces(world_inflation_fig1)
fig.add_traces(world_inflation_fig2)

fig['layout'].update(height=600,
                     width=600) 

## 

## World Population

In [None]:
# Clean world data for world population and plot line graph
world_population_clean = world_population.drop('Unnamed: 0', axis=1).set_index("Year")

world_population_line = world_population_clean.hvplot.line(title='World population',
                                                           ylabel='Percentage change')
world_population_line

In [None]:
# Clean bar graph for world population 
world_population["Year"] = world_population.Year.astype(str)

world_population_clean2 = world_population.drop('Unnamed: 0', axis=1)
world_population_bar = world_population_clean2.hvplot.bar(groupby="Year")
world_population_bar

In [None]:
# Combine the plots for goods exported from all countries
world_population_comb = world_population_line + world_population_bar
world_population_comb

In [None]:
# Create line plot and bar graph using Cufflinks
world_population_fig1 = world_population_clean.iplot(kind='line',
                                                     dash='dash',
                                                     title="World Population",
                                                     xTitle="Year",
                                                     yTitle='GDP (% change)')             
             
world_population_fig2 = world_population_clean.iplot(kind='bar',
                                                     dash='dash',
                                                     title="World Population",
                                                     xTitle="Year",
                                                     yTitle='GDP (% change)')

#Create the plot matrix:
fig = subplots.make_subplots(rows=2, cols=1)

#Add traces, use  'add_traces', or 'append_trace'.
fig.add_traces(world_population_fig1)
fig.add_traces(world_population_fig2)

fig['layout'].update(height=600,
                     width=600) 

## 

## All world data combined

In [None]:
# Read in world data and clean it
world_data = pd.read_csv("../Data/RawData/World_data.csv")
world_data.drop("year", axis=1, inplace=True)
world_data.head()

In [None]:
# Correlate world data
world_data_corr = world_data.corr()

In [None]:
# Create heatmap using Seaborn
matrix = np.triu(world_data_corr)
cmap = sns.diverging_palette(230, 20, as_cmap=True)
h = plt.axes()

plot = sns.heatmap(world_data_corr, 
                   vmin=-1, 
                   vmax=1, 
                   cmap=cmap, 
                   mask=matrix)


sns.set(rc={'figure.figsize':(10,7)})
h.set_title('World data')
plot.figure.savefig("../Images/World_data_corr.png")

## 

# ASX v S&P500

In [None]:
# Get stock market info
asx_price = asx[["Price"]]
sp500_price = sp500[["Price"]]

In [None]:
asx_price = asx_price.rename(columns={'Price': 'ASX'})
sp500_price = sp500_price.rename(columns={'Price': 'SP500'})

In [None]:
stocks_df = pd.concat([year,
                       asx_price,
                       sp500_price], axis=1)

#Check dataset
stocks_index_df = stocks_df.set_index('Year')
stocks_index_df.head()

In [None]:
# Create plots using Cufflinks
stocks_df_fig1 = stocks_index_df.iplot(kind='line',
                                       dash='dash',
                                       title="ASX v S&P500",
                                       xTitle="Year",
                                       yTitle='GDP (% change)',
                                       asImage=True)             
             
stocks_df_fig2 = stocks_index_df.iplot(kind='bar',
                                       dash='dash',
                                       title="ASX v S&P500",
                                       xTitle="Year",
                                       yTitle='GDP (% change)',
                                       asImage=True)

#Create the plot matrix:
fig = subplots.make_subplots(rows=2, cols=1)

#Add traces, use  'add_traces', or 'append_trace'.
fig.add_traces(stocks_df_fig1)
fig.add_traces(stocks_df_fig2)

fig['layout'].update(height=600,
                     width=600) 