In [2]:
import pandas as pd

# Define the file path
file_path = 'sample_weather.txt'  # Change this if your file is located elsewhere

# 1. Load the weather data
try:
    weather_data = pd.read_csv(file_path)  # assuming it's a comma-separated file
    print("✅ Data loaded successfully.\n")
except FileNotFoundError:
    print("❌ File not found. Please check the path:", file_path)
    exit()

# 2. Print initial info for debugging
print("📋 Columns:", weather_data.columns.tolist())
print("\n🧾 Data Types:\n", weather_data.dtypes)
print("\n🔍 Sample Data:\n", weather_data.head())

# 3. Ensure 'date' column exists and convert it to datetime
if 'date' not in weather_data.columns:
    print("❌ Column 'date' not found in the dataset.")
    exit()

try:
    weather_data['date'] = pd.to_datetime(weather_data['date'])
    print("\n✅ 'date' column converted to datetime.")
except Exception as e:
    print("❌ Error converting 'date' column:", e)
    exit()

# 4. Extract year and month from the date
weather_data['year'] = weather_data['date'].dt.year
weather_data['month'] = weather_data['date'].dt.month

# 5. Ensure numeric columns exist
required_columns = ['temperature', 'dew_point', 'wind_speed']
missing = [col for col in required_columns if col not in weather_data.columns]

if missing:
    print("❌ Missing required columns:", missing)
    exit()

# 6. Year-wise Averages
yearly_avg = weather_data.groupby('year').agg({
    'temperature': 'mean',
    'dew_point': 'mean',
    'wind_speed': 'mean'
}).reset_index()

print("\n📆 Year-wise Averages:")
print(yearly_avg)

# 7. Month-wise Averages
monthly_avg = weather_data.groupby(['year', 'month']).agg({
    'temperature': 'mean',
    'dew_point': 'mean',
    'wind_speed': 'mean'
}).reset_index()

print("\n🗓️ Month-wise Averages:")
print(monthly_avg)

# 8. Save results to CSV
yearly_avg.to_csv('yearly_avg_weather.csv', index=False)
monthly_avg.to_csv('monthly_avg_weather.csv', index=False)
print("\n💾 Results saved as 'yearly_avg_weather.csv' and 'monthly_avg_weather.csv'")


✅ Data loaded successfully.

📋 Columns: ['Year', 'Month', 'Temperature', 'DewPoint', 'WindSpeed']

🧾 Data Types:
 Year             int64
Month            int64
Temperature    float64
DewPoint       float64
WindSpeed        int64
dtype: object

🔍 Sample Data:
    Year  Month  Temperature  DewPoint  WindSpeed
0  2020      1         15.2       7.4         10
1  2020      2         16.1       8.3         12
2  2020      3         17.5       9.0         15
3  2021      1         18.2      10.1         14
4  2021      2         19.3      11.2         13
❌ Column 'date' not found in the dataset.
❌ Error converting 'date' column: 'date'


KeyError: 'date'