In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
from scipy.stats import spearmanr
%matplotlib inline

## Get the data

In [None]:
df = pd.read_csv('../../../datasets/parte1/weatherAUS.csv')

In [None]:
df.columns
df.shape

In [None]:
print(df.info())

In [None]:
print(df["Location"].unique())
print(df["WindGustDir"].unique())
print(df["WindDir9am"].unique())
print(df["WindDir3pm"].unique())
print(df["RainToday"].unique())
print(df["RainTomorrow"].unique())


In [None]:
print(df.describe())

In [None]:
print(df.isna().sum())

In [None]:
fig, axs = plt.subplots(4, 4, figsize=(12, 15))
fig.suptitle("Histograms")

sns.histplot(df["MinTemp"], ax=axs[0,0], kde=True)
sns.histplot(df["MaxTemp"], ax=axs[0,1], kde=True)
sns.histplot(df["Rainfall"], ax=axs[0,2], kde=True)
sns.histplot(df["Evaporation"], ax=axs[0,3], kde=True)
sns.histplot(df["Sunshine"], ax=axs[1,0], kde=True)
sns.histplot(df["WindGustSpeed"], ax=axs[1,1], kde=True)
sns.histplot(df["WindSpeed9am"], ax=axs[1,2], kde=True)
sns.histplot(df["WindSpeed3pm"], ax=axs[1,3], kde=True)
sns.histplot(df["Humidity9am"], ax=axs[2,0], kde=True)
sns.histplot(df["Humidity3pm"], ax=axs[2,1], kde=True)
sns.histplot(df["Pressure9am"], ax=axs[2,2], kde=True)
sns.histplot(df["Pressure3pm"], ax=axs[2,3], kde=True)
sns.histplot(df["Cloud9am"], ax=axs[3,0], kde=True)
sns.histplot(df["Cloud3pm"], ax=axs[3,1], kde=True)
sns.histplot(df["Temp9am"], ax=axs[3,2], kde=True)
sns.histplot(df["Temp3pm"], ax=axs[3,3], kde=True)

In [None]:
df_copy = df.copy()
replace_map = {"RainToday": {"Yes": 1, "No": 0}}
labels = df_copy["RainToday"].astype('category').cat.categories.tolist()
replace_map_comp = {'RainToday' : {k: v for k,v in zip(labels,list(range(1,len(labels)+1)))}}
df_copy.replace(replace_map_comp, inplace=True)

replace_map = {"RainTomorrow": {"Yes": 1, "No": 0}}
labels = df_copy["RainTomorrow"].astype('category').cat.categories.tolist()
replace_map_comp = {'RainTomorrow' : {k: v for k,v in zip(labels,list(range(1,len(labels)+1)))}}
df_copy.replace(replace_map_comp, inplace=True)

In [None]:
fig = plt.figure(figsize=(20, 20))
satisfaction_corr = df_copy.corr(method='pearson', numeric_only=True)
sns.heatmap(satisfaction_corr, linecolor='black', linewidths=0.5, annot=True)

In [None]:
raintomorrow_count = df['RainTomorrow'].value_counts()
sns.set_style('darkgrid')
sns.barplot(x=raintomorrow_count.index, y=raintomorrow_count.values)
plt.title('Frequency Distribution of RainTomorrow')
plt.ylabel('Number of Occurrences', fontsize=12)
plt.xlabel('RainTomorrow', fontsize=12)
plt.show()

In [None]:
raintoday_count = df['RainToday'].value_counts()
sns.set_style('darkgrid')
sns.barplot(x=raintoday_count.index, y=raintoday_count.values)
plt.title('Frequency Distribution of RainToday')
plt.ylabel('Number of Occurrences', fontsize=12)
plt.xlabel('RainToday', fontsize=12)
plt.show()

In [None]:
labels = df['RainTomorrow'].astype('category').cat.categories.tolist()
counts = df['RainTomorrow'].value_counts()
sizes = [counts[var_cat] for var_cat in labels]
fig1, ax1 = plt.subplots()
ax1.pie(sizes, labels=labels, autopct='%1.1f%%', shadow=True)
ax1.axis('equal')
plt.show()

In [None]:
labels = df['RainToday'].astype('category').cat.categories.tolist()
counts = df['RainToday'].value_counts()
sizes = [counts[var_cat] for var_cat in labels]
fig1, ax1 = plt.subplots()
ax1.pie(sizes, labels=labels, autopct='%1.1f%%', shadow=True)
ax1.axis('equal')
plt.show()

In [None]:
plt.figure(figsize=(8, 5))
sns.scatterplot(x=df['Humidity9am'], y=df['Rainfall'], color="blue", label='Humidity9am')
sns.scatterplot(x=df['Humidity3pm'], y=df['Rainfall'], color="red", label='Humidity3pm')
plt.title("Comparison Humidity with Rain")
plt.xlabel('Humidity')
plt.ylabel('Rain (mm)')
plt.xticks(rotation=45)
plt.legend(loc='best')
plt.grid()
plt.show()

In [None]:
plt.figure(figsize=(8, 5))
sns.scatterplot(x=df['Temp9am'], y=df['Temp3pm'])
plt.title("Temperature")
plt.xlabel('Temp9am')
plt.ylabel('Temp3pm')
plt.xticks(rotation=45)
plt.grid()
plt.show()

In [None]:
plt.figure(figsize=(8, 5))
sns.scatterplot(x=df['Humidity9am'], y=df['Humidity3pm'])
plt.title("Humidity")
plt.xlabel('Humidity9am')
plt.ylabel('Humidity3pm')
plt.xticks(rotation=45)
plt.grid()
plt.show()

In [None]:
plt.figure(figsize=(8, 5))
sns.scatterplot(x=df['Pressure9am'], y=df['Pressure3pm'])
plt.title("Pressure")
plt.xlabel('Pressure9am')
plt.ylabel('Pressure3pm')
plt.xticks(rotation=45)
plt.grid()
plt.show()

In [None]:
fig, axs = plt.subplots(4, 4, figsize=(15, 15))
fig.suptitle("Boxplots")

sns.boxplot(y=df["MinTemp"], ax=axs[0,0])
sns.boxplot(y=df["MaxTemp"], ax=axs[0,1])
sns.boxplot(y=df["Rainfall"], ax=axs[0,2])
sns.boxplot(y=df["Evaporation"], ax=axs[0,3])
sns.boxplot(y=df["Sunshine"], ax=axs[1,0])
sns.boxplot(y=df["WindGustSpeed"], ax=axs[1,1])
sns.boxplot(y=df["WindSpeed9am"], ax=axs[1,2])
sns.boxplot(y=df["WindSpeed3pm"], ax=axs[1,3])
sns.boxplot(y=df["Humidity9am"], ax=axs[2,0])
sns.boxplot(y=df["Humidity3pm"], ax=axs[2,1])
sns.boxplot(y=df["Pressure9am"], ax=axs[2,2])
sns.boxplot(y=df["Pressure3pm"], ax=axs[2,3])
sns.boxplot(y=df["Cloud9am"], ax=axs[3,0])
sns.boxplot(y=df["Cloud3pm"], ax=axs[3,1])
sns.boxplot(y=df["Temp9am"], ax=axs[3,2])
sns.boxplot(y=df["Temp3pm"], ax=axs[3,3])

In [22]:
sns.pairplot(df, hue="RainTomorrow")

In [None]:
#sns.pairplot(df, hue="RainToday")