# Australia Rainfall Exploratory Data Analysis

[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/biodatlab/community-notebooks/blob/main/visualization/visualize_australia_rainfall.ipynb)

Visualizing Australia rainfall dataset using matplotlib, plotly, and dash

Reference: https://www.kaggle.com/code/zohrehtofighizavareh/australia-rainfall-eda

In [None]:
!pip install dash
!pip install jupyter-dash

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns

In [None]:
data = pd.read_csv(
    "https://raw.githubusercontent.com/gchoi/Dataset/master/weatherAUS.csv"
)

In [None]:
data.columns

In [None]:
data.head(5)

In [None]:
data.describe()

In [None]:
data.drop(columns=["Sunshine", "Evaporation", "Cloud9am", "Cloud3pm"], inplace=True)

In [None]:
# Replace numerical columns with median
def replace_numerical(df1):
    for col in df1.select_dtypes(['int', 'float']):
        df1[col] = df1[col].fillna(df1[col].median())
    return df1

# Replace object columns with mode
def replace_object(df1):
    for col in df1.select_dtypes('object'):
        df1[col] = df1[col].fillna(method='ffill')
    return df1

In [None]:
data = replace_numerical(data)
data = replace_object(data)

In [None]:
data.head()

## Rainfall distribution each month

In [None]:
data['Date'] = pd.to_datetime(data['Date'], errors='coerce')
data["Month"] = data.Date.map(lambda x: x.month)

In [None]:
data.groupby("Month").agg({"Rainfall": "sum"}).plot(kind="bar", color="blue")
plt.title('Rainfall distribution in each month', fontsize=15)
plt.xlabel('Month', fontsize=10)
plt.ylabel('Rainfall (in mm)', fontsize=10)
plt.xticks(rotation=0)
plt.show()

In [None]:
plt.figure(figsize=(10,5))
plt.scatter(data['Location'], data['Rainfall'], alpha=0.4)
plt.xlabel("Location")
plt.xticks(rotation=80)
plt.ylabel("Rainfall")
plt.show()

## Min and Max Temperature based on location

In [None]:
min_temp_df = data.groupby('Location').agg({'MinTemp':'mean'})
max_temp_df = data.groupby('Location').agg({'MaxTemp':'mean'})

In [None]:
w = 0.6
x = np.arange(data['Location'].nunique())
locations = data["Location"].unique()

fig = plt.figure()
ax = fig.add_axes([0, 0, 1, 1])
ax.bar(x - w/2, min_temp_df[:]['MinTemp'], label='Average MinTemp by Location', color='maroon', width=w)
ax.bar(x + w/2, max_temp_df[:]['MaxTemp'], label='Average MaxTemp by Location', color='salmon', width=w)
ax.set_xticks(x)
ax.set_xticklabels(locations, rotation=90)
plt.title("Comparing MinTemp and MaxTemp based on location", fontsize=15)
plt.legend(fontsize=10)
plt.show()

In [None]:
sns.histplot(data["Humidity9am"], kde=True)
plt.title('Histograms of humidity at 9 AM')
plt.show()

In [None]:
sns.displot(data["WindSpeed3pm"], kind="kde")
plt.title('Histograms of Windspeed at 3 PM')
plt.show()

## **Plotly**

In [None]:
import plotly.express as px
import plotly.graph_objects as go

In [None]:
rainfall_df = data.groupby("Month").agg({"Rainfall": "sum"}).reset_index()
rainfall_loc_df = data.groupby(["Month", "Location"]).agg({"Rainfall": "sum"}).reset_index()

In [None]:
rainfall_loc_df.head()

In [None]:
fig = px.bar(rainfall_df, x="Month", y="Rainfall",
             labels={"Rainfall": "Total rainfall"},
             height=400)
fig.show()

In [None]:
rainfall_loc_df.head()

In [None]:
fig_rainfall = px.bar(
    rainfall_loc_df, x="Month", y="Rainfall",
    labels={"Rainfall": "Total rainfall"},
    height=400,
    color="Location",
    title="Total rainfall by city"
)
fig_rainfall.show()

In [None]:
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
          'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
rainfall_loc_df["Month_"] = rainfall_loc_df.Month.map(
    lambda x: months[x - 1]
)

In [None]:
min_temp_df = data.groupby('Location').agg({'MinTemp':'mean'}).reset_index()
max_temp_df = data.groupby('Location').agg({'MaxTemp':'mean'}).reset_index()

In [None]:
min_temp_df.head()

In [None]:
fig = go.Figure()
fig.add_trace(go.Bar(
    x=min_temp_df.Location,
    y=min_temp_df.MinTemp,
    name='Minimum temperature',
    marker_color='indianred'
))
fig.add_trace(go.Bar(
    x=max_temp_df.Location,
    y=max_temp_df.MaxTemp,
    name='Maximum temperature',
    marker_color='lightsalmon'
))
fig.update_layout(
    title="City and Minumum/Maximum Rainfall",
    xaxis_title="City",
    yaxis_title="Minimum/Maximum Rainfall"
)

## **Use dash to combine all figures**

In [None]:
import plotly.graph_objects as go
import dash
import dash_core_components as dcc
from dash import html
from jupyter_dash import JupyterDash

In [None]:
app = dash.Dash()
app.layout = html.Div([
    dcc.Graph(figure=fig),
    dcc.Graph(figure=fig_rainfall)
])
app.run_server(debug=True, use_reloader=False) # on local computer

In [None]:
app = JupyterDash()
app.layout = html.Div([
    html.H1("Australia RainFall Dashboard"),
    dcc.Graph(figure=fig),
    dcc.Graph(figure=fig_rainfall)
])
app.run_server(mode="inline", host="0.0.0.0") # on google colab