In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

***Load and Inspect Data***

In [None]:
# loaded the dataset and made a dataframe
df = pd.read_csv('data\test_weather.csv')
df = pd.DataFrame(df)

In [None]:
# gathering data info
print('Data Samples: \n',end='')
print(df.head())

In [None]:
print('\n Data info:\n')
print(df.info())

***Data Cleaning***

In [None]:
# handeling the missing values
print('Missing value count: \n')
print(df.isnull().sum())

In [None]:
# replacing the missing values
print("Replacing the missing values with appropiriate values: \n")
df = df.fillna({
    'Precip Type': 0
})
print(df.isnull().sum())

In [None]:
# converting the date in the data to date time format
df['Formatted Date'] = pd.to_datetime(df['Formatted Date'])
print(df['Formatted Date'].head())

In [None]:
# simplyfing column name
df.rename(columns={
    'Temperature (C)':'Temperature'
},inplace = True)
print(df.head())

***Descriptive Statistics with NumPy***

In [None]:
# caculating basic statistics of the data
print("Temperature -->")
print(f'mean: {np.mean(df['Temperature'].values):.2f}')
print(f'median: {np.median(df['Temperature'].values):.2f}')
print(f'standard deviation: {np.std(df['Temperature'].values):.2f}')

In [None]:
print("Humidity -->")
print(f'mean: {np.mean(df['Humidity'].values):.2f}')
print(f'median: {np.median(df['Humidity'].values):.2f}')
print(f'standard deviation: {np.std(df['Humidity'].values):.2f}')

In [None]:
# print("Precipitation -->")
# print(f'mean: {np.mean(df['Precipitation'].values):.2f}')
# print(f'median: {np.median(df['Precipitation'].values):.2f}')
# print(f'standard deviation: {np.std(df['Precipitation'].values):.2f}')

***Monthly and Seasonal Analysis (Using Pandas Group By)***

In [None]:
# creating month and year column
df['Month'] = df['Formatted Date'].apply(lambda x:x.month)
df['Year'] = df['Formatted Date'].apply(lambda x:x.year)
print(df.head())

In [None]:
def get_season(month):
  if month in [12, 1, 2]:
    return 'Winter'
  elif month in [3, 4, 5]:
    return 'Spring'
  elif month in [6, 7, 8]:
    return 'Summer'
  else:
    return 'Autumn'
  
df['Season'] = df['Month'].apply(get_season)

In [None]:
# grouping by months and comparing by average values of key metrics
monthly_avg = df.groupby('Month')[['Temperature', 'Humidity']].mean()
print("\nMonthly Averages:")
print(monthly_avg)

In [None]:
# grouping by seasons and comparing by average values of key metrics
seasonal_avg = df.groupby('Season')[['Temperature', 'Humidity']].mean()
print("\nSeasonal Averages:")
print(seasonal_avg)

***Visualizations with Matplotlib***

In [None]:
plt.figure(figsize=(10,5))
monthly_trend = df.groupby(['Year', 'Month'])['Temperature'].mean().reset_index()
plt.plot(pd.to_datetime(monthly_trend[['Year', 'Month']].assign(DAY=1)), monthly_trend['Temperature'], marker='o')
plt.title('Average Monthly Temperature Over Time')
plt.xlabel('Date')
plt.ylabel('Temperature (°C)')
plt.grid(True)
plt.show()

In [None]:
# plt.figure(figsize=(8,5))
# plt.bar(monthly_avg.index, monthly_avg['Precipitation'], color='skyblue')
# plt.title('Average Monthly Precipitation')
# plt.xlabel('Month')
# plt.ylabel('Precipitation (mm)')
# plt.show()

In [None]:
plt.figure(figsize=(8,5))
plt.hist(df['Temperature'], bins=20, color='orange', edgecolor='black')
plt.title('Temperature Distribution')
plt.xlabel('Temperature (°C)')
plt.ylabel('Frequency')
plt.show()

***Identify Extreme Weather***

In [None]:
# finding extreme hot days
extreme_hot = df[df['Temperature'] > df['Temperature'].mean() + 2 * df['Temperature'].std()]
print("\nExtreme Hot Days:")
print(extreme_hot[['Formatted Date', 'Temperature']].head())

In [None]:
# finding extreme cold days
extreme_cold = df[df['Temperature'] < df['Temperature'].mean() - 2 * df['Temperature'].std()]
print("\nExtreme Cold Days:")
print(extreme_cold[['Formatted Date', 'Temperature']].head())

In [None]:
# # finding extreme rainy days
# extreme_rain = df[df['Precipitation'] > df['Precipitation'].mean() + 2 * df['Precipitation'].std()]
# print("\nExtreme Rainy Days:")
# print(extreme_cold[['Formatted Date', 'Precipitation']].head())