In [None]:
# Data acquisition and loading  

import pandas as pd

# Load the raw CSV file
df = pd.read_csv(r"C:\Users\aarav\Desktop\weather-data-visualizer\data\weather_raw.csv")

# Inspect structure
print(df.head())
print(df.info())
print(df.describe())
print(df.columns)   # Check actual column names

In [None]:
# Data cleaning and preprocessing

df = df.rename(columns={
    'Date/Time': 'date',
    'Temp_C': 'temperature',
    'Rel Hum_%': 'humidity',
    'Weather': 'condition',
    'Dew Point Temp_C': 'dew_point',
    'Wind Speed_km/h': 'wind_speed',
    'Visibility_km': 'visibility',
    'Press_kPa': 'pressure'
})

# Convert date column to datetime
df['date'] = pd.to_datetime(df['date'])

# Handle missing values
df = df.dropna()   # or df.fillna(method='ffill')

# Keep relevant columns for assignment
df_cleaned = df[['date', 'temperature', 'humidity', 'condition']]

# Save cleaned dataset
df_cleaned.to_csv(r"C:\Users\aarav\Desktop\weather-data-visualizer\data\weather_cleaned.csv", index=False)

In [None]:
# Statistical analysis with NumPy

import numpy as np

# Daily statistics (overall)
daily_mean = np.mean(df_cleaned['temperature'])
daily_std = np.std(df_cleaned['temperature'])
print("Daily Mean Temp:", daily_mean)
print("Daily Std Dev Temp:", daily_std)

# Monthly statistics
monthly_stats = df_cleaned.groupby(df_cleaned['date'].dt.month)[['temperature','humidity']].agg(['mean','min','max','std'])
print(monthly_stats)

# Yearly statistics
yearly_stats = df_cleaned.groupby(df_cleaned['date'].dt.year)[['temperature','humidity']].agg(['mean','min','max','std'])
print(yearly_stats)

In [None]:
# Visualization with Matplotlib

import matplotlib.pyplot as plt

# Line chart: daily temperature trends
plt.figure(figsize=(10,5))
plt.plot(df_cleaned['date'], df_cleaned['temperature'])
plt.title("Daily Temperature Trends")
plt.xlabel("Date")
plt.ylabel("Temperature (°C)")
plt.savefig(r"C:\Users\aarav\Desktop\weather-data-visualizer\images\daily_temperature.png")
plt.close()

# Bar chart: monthly rainfall proxy (count of 'Rain' in condition)
monthly_rain = df_cleaned[df_cleaned['condition'].str.contains("Rain")].groupby(df_cleaned['date'].dt.month).size()
monthly_rain.plot(kind='bar', figsize=(8,5))
plt.title("Monthly Rainfall Events (Proxy)")
plt.xlabel("Month")
plt.ylabel("Rainy Days Count")
plt.savefig(r"C:\Users\aarav\Desktop\weather-data-visualizer\images\monthly_rainfall.png")
plt.close()

# Scatter plot: humidity vs temperature
plt.scatter(df_cleaned['temperature'], df_cleaned['humidity'])
plt.title("Humidity vs Temperature")
plt.xlabel("Temperature (°C)")
plt.ylabel("Humidity (%)")
plt.savefig(r"C:\Users\aarav\Desktop\weather-data-visualizer\images\humidity_vs_temperature.png")
plt.close()

# Combined plots
fig, ax = plt.subplots(2,1, figsize=(10,8))
ax[0].plot(df_cleaned['date'], df_cleaned['temperature'], color='red')
ax[0].set_title("Temperature Trend")
ax[1].scatter(df_cleaned['temperature'], df_cleaned['humidity'], color='blue')
ax[1].set_title("Humidity vs Temperature")
plt.tight_layout()
plt.savefig(r"C:\Users\aarav\Desktop\weather-data-visualizer\images\combined_plots.png")
plt.close()

In [None]:
# Grouping and aggregation

monthly_group = df_cleaned.groupby(df_cleaned['date'].dt.month)[['temperature','humidity']].mean()
print(monthly_group)

# Group by season
def get_season(month):
    if month in [12,1,2]:
        return 'Winter'
    elif month in [3,4,5]:
        return 'Spring'
    elif month in [6,7,8]:
        return 'Summer'
    else:
        return 'Autumn'

df_cleaned['season'] = df_cleaned['date'].dt.month.apply(get_season)

season_group = df_cleaned.groupby('season')[['temperature','humidity']].mean()
print(season_group)