In [2]:
!pip install meteostat

Collecting meteostat
  Downloading meteostat-1.6.8-py3-none-any.whl.metadata (4.6 kB)
Downloading meteostat-1.6.8-py3-none-any.whl (31 kB)
Installing collected packages: meteostat
Successfully installed meteostat-1.6.8


In [3]:
from meteostat import Point, Hourly
from datetime import datetime
import pandas as pd

# Set the start and end date for the data collection
start = datetime(2010, 1, 1)
end = datetime(2025, 4, 22)

# Coordinates for Chennai, India (Latitude: 13.0827, Longitude: 80.2707)
chennai = Point(13.0827, 80.2707)

#other places
#vellore = Point(12.9165, 79.1325)
#mumbai = Point(19.0760, 72.8777)

# Fetch hourly data for Chennai
data = Hourly(chennai, start, end)
data = data.fetch()

# Display the first few rows of the fetched data for inspection
print("Fetched data (first few rows):")
print(data.head())

# Handle missing values using linear interpolation
data = data.interpolate(method='linear', axis=0)

# Verify that missing values have been filled
print("Missing values after interpolation:")
print(data.isnull().sum())

# Save the cleaned data to a CSV file (optional)
data.to_csv('chennai_hourly_weather_data.csv')




Fetched data (first few rows):
                     temp  dwpt  rhum  prcp  snow  wdir  wspd  wpgt    pres  \
time                                                                          
2010-01-01 00:00:00  22.2  21.0  93.0   NaN   NaN   NaN   0.0   NaN  1011.3   
2010-01-01 01:00:00   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN     NaN   
2010-01-01 02:00:00   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN     NaN   
2010-01-01 03:00:00  24.0  21.9  88.0   NaN   NaN   NaN   0.0   NaN  1014.9   
2010-01-01 04:00:00   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN     NaN   

                     tsun  coco  
time                             
2010-01-01 00:00:00   NaN   NaN  
2010-01-01 01:00:00   NaN   NaN  
2010-01-01 02:00:00   NaN   NaN  
2010-01-01 03:00:00   NaN   NaN  
2010-01-01 04:00:00   NaN   NaN  
Missing values after interpolation:
temp         0
dwpt         0
rhum         0
prcp     96449
snow    134161
wdir         6
wspd         0
wpgt    134161
pres         0
tsun  

In [5]:
# Fetch hourly data for Chennai
data = Hourly(chennai, start, end)
data = data.fetch()

# Reset index to move datetime into a column
data.reset_index(inplace=True)

# Display the first few rows of the fetched data for inspection
print("Fetched data (first few rows):")
print(data.head())

# Handle missing values using linear interpolation
data = data.interpolate(method='linear', axis=0)

# Verify that missing values have been filled
print("Missing values after interpolation:")
print(data.isnull().sum())

# Preprocess the fetched data (Renaming columns to match your desired format)
data = data.rename(columns={
    'time': 'date',                 # Rename datetime column to 'date'
    'temp': 'temperature_2m',
    'precip': 'precipitation',
    'wspd': 'wind_speed_10m',
    'rh': 'humidity',
    'slp': 'pressure',
    'wdsp': 'wind_speed_10m',       # Overwrites 'wspd', be cautious
    'weather': 'weather_condition',
    'cloud': 'cloud_cover',
    'dew': 'dew_point',
    'windgust': 'wind_gusts',
})

# Verify the column names after renaming
print("Renamed columns:")
print(data.columns)

# Save the processed data to a new CSV file
data.to_csv('chennai_weather_processed.csv', index=False)

print("✅ Preprocessed Chennai weather data saved to 'chennai_weather_processed.csv'.")




Fetched data (first few rows):
                 time  temp  dwpt  rhum  prcp  snow  wdir  wspd  wpgt    pres  \
0 2010-01-01 00:00:00  22.2  21.0  93.0   NaN   NaN   NaN   0.0   NaN  1011.3   
1 2010-01-01 01:00:00   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN     NaN   
2 2010-01-01 02:00:00   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN     NaN   
3 2010-01-01 03:00:00  24.0  21.9  88.0   NaN   NaN   NaN   0.0   NaN  1014.9   
4 2010-01-01 04:00:00   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN     NaN   

   tsun  coco  
0   NaN   NaN  
1   NaN   NaN  
2   NaN   NaN  
3   NaN   NaN  
4   NaN   NaN  
Missing values after interpolation:
time         0
temp         0
dwpt         0
rhum         0
prcp     96449
snow    134161
wdir         6
wspd         0
wpgt    134161
pres         0
tsun    134161
coco     51622
dtype: int64
Renamed columns:
Index(['date', 'temperature_2m', 'dwpt', 'rhum', 'prcp', 'snow', 'wdir',
       'wind_speed_10m', 'wpgt', 'pres', 'tsun', 'coco'],
      dty