In [10]:
import http.client
import json
import csv
from datetime import datetime, timedelta

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder




import xgboost as xgb
from sklearn.metrics import mean_absolute_error
from urllib.parse import urlencode

In [16]:

def fetch_weather_data(city):
    
    # Get today's date and the date 7 days ago
    today = datetime.today()
    seven_days_ago = today - timedelta(days=6)
    
    # Format dates to strings
    today_str = today.strftime('%Y-%m-%d')
    seven_days_ago_str = seven_days_ago.strftime('%Y-%m-%d')

    # API Connection to RapidAPI Weather API
    conn = http.client.HTTPSConnection("weatherapi-com.p.rapidapi.com")

    headers = {
           'x-rapidapi-key': "4765efd0e4msh0e6f4310f441125p115acdjsnc5b407e62f78",
    'x-rapidapi-host': "weatherapi-com.p.rapidapi.com"
    }
    params =urlencode({"q": city})

    # API URL to fetch historical weather data
    url = f"/history.json?{params}&lang=en&dt={seven_days_ago_str}&end_dt={today_str}"

    # Make the API request
    conn.request("GET", url, headers=headers)

    # Get the response
    res = conn.getresponse()
    data = res.read()
    

    # Parse the JSON response
 
    weather_data = json.loads(data.decode("utf-8"))
    print(weather_data)
   
    return weather_data

In [18]:
fetch_weather_data("New York")

{'location': {'name': 'New York', 'region': 'New York', 'country': 'United States of America', 'lat': 40.7142, 'lon': -74.0064, 'tz_id': 'America/New_York', 'localtime_epoch': 1735640226, 'localtime': '2024-12-31 05:17'}, 'forecast': {'forecastday': [{'date': '2024-12-25', 'date_epoch': 1735084800, 'day': {'maxtemp_c': 2.1, 'maxtemp_f': 35.8, 'mintemp_c': -4.7, 'mintemp_f': 23.5, 'avgtemp_c': -1.6, 'avgtemp_f': 29.2, 'maxwind_mph': 8.1, 'maxwind_kph': 13.0, 'totalprecip_mm': 0.0, 'totalprecip_in': 0.0, 'totalsnow_cm': 0.0, 'avgvis_km': 10.0, 'avgvis_miles': 6.0, 'avghumidity': 77, 'daily_will_it_rain': 0, 'daily_chance_of_rain': 0, 'daily_will_it_snow': 0, 'daily_chance_of_snow': 0, 'condition': {'text': 'Sunny', 'icon': '//cdn.weatherapi.com/weather/64x64/day/113.png', 'code': 1000}, 'uv': 2.0}, 'astro': {'sunrise': '07:19 AM', 'sunset': '04:34 PM', 'moonrise': '02:14 AM', 'moonset': '12:46 PM', 'moon_phase': 'Waning Crescent', 'moon_illumination': 29}, 'hour': [{'time_epoch': 1735102

{'location': {'name': 'New York',
  'region': 'New York',
  'country': 'United States of America',
  'lat': 40.7142,
  'lon': -74.0064,
  'tz_id': 'America/New_York',
  'localtime_epoch': 1735640226,
  'localtime': '2024-12-31 05:17'},
 'forecast': {'forecastday': [{'date': '2024-12-25',
    'date_epoch': 1735084800,
    'day': {'maxtemp_c': 2.1,
     'maxtemp_f': 35.8,
     'mintemp_c': -4.7,
     'mintemp_f': 23.5,
     'avgtemp_c': -1.6,
     'avgtemp_f': 29.2,
     'maxwind_mph': 8.1,
     'maxwind_kph': 13.0,
     'totalprecip_mm': 0.0,
     'totalprecip_in': 0.0,
     'totalsnow_cm': 0.0,
     'avgvis_km': 10.0,
     'avgvis_miles': 6.0,
     'avghumidity': 77,
     'daily_will_it_rain': 0,
     'daily_chance_of_rain': 0,
     'daily_will_it_snow': 0,
     'daily_chance_of_snow': 0,
     'condition': {'text': 'Sunny',
      'icon': '//cdn.weatherapi.com/weather/64x64/day/113.png',
      'code': 1000},
     'uv': 2.0},
    'astro': {'sunrise': '07:19 AM',
     'sunset': '04:34 PM'

In [5]:

def convert_to_csv(weather_data, city):
    header = ["Date", "AvgTemp", "Humidity", "WindSpeed", "Precipitation", "Condition"]
    rows = []

    # Loop through the forecast data and add each day's weather information
    for day in weather_data['forecast']['forecastday']:
        rows.append([
            day['date'],  # Date
            day['day']['avgtemp_c'],  # Average Temperature (Celsius)
            day['day']['avghumidity'],  # Average Humidity
            day['day']['maxwind_kph'],  # Max Wind Speed (kph)
            day['day']['totalprecip_mm'],  # Precipitation (mm)
            day['day']['condition']['text']  # Weather Condition (text)
        ])
    
    # Write the data to a CSV file using the csv module
    csv_filename = f"{city}_weather_data.csv"
    with open(csv_filename, mode="w", newline="") as file:
        writer = csv.writer(file)
        writer.writerow(header)  # Write the header row
        writer.writerows(rows)  # Write the weather data

    print(f"CSV file '{csv_filename}' created successfully.")


def preprocess_data(csv_filename):
     # Load the CSV data into a DataFrame
    df = pd.read_csv(csv_filename)

    # Ensure the 'Date' column is in datetime format (if you need to keep it)
    df['Date'] = pd.to_datetime(df['Date'])

    # Drop the 'Date' column as it's not useful for prediction
    df = df.drop(columns=["Date"])

    # One-hot encode the 'Condition' column (convert categorical to numeric)
    encoder = OneHotEncoder(sparse_output=False,handle_unknown='ignore') 
    condition_encoded = encoder.fit_transform(df[['Condition']])
    condition_df = pd.DataFrame(condition_encoded, columns=encoder.get_feature_names_out(['Condition']))

    # Concatenate the one-hot encoded columns back into the original dataframe (without 'Condition' column)
    df = pd.concat([df.drop(columns=['Condition']), condition_df], axis=1)

    # Handle missing values (simple imputation strategy)
    imputer = SimpleImputer(strategy="mean")
    df = pd.DataFrame(imputer.fit_transform(df), columns=df.columns)

    # Feature columns (excluding the target column 'AvgTemp')
    X = df.drop(columns=["AvgTemp"])

    # Target column (assuming you're predicting 'AvgTemp')
    y = df["AvgTemp"]
    X = X.values

    # Normalize the features (optional)
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    print("Shape of X_scaled:", X_scaled.shape)
    return X_scaled, y,scaler,encoder


def split_data(X, y):
    # Split the data into training and testing sets (80% train, 20% test)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    return X_train, X_test, y_train, y_test



def train_model(X_train, y_train):
    # Initialize the XGBoost model
    model = xgb.XGBRegressor(objective="reg:squarederror", n_estimators=100, random_state=42)
    
    # Train the model
    model.fit(X_train, y_train)
    
    return model


def evaluate_model(model, X_test, y_test):
    # Make predictions on the test set
    y_pred = model.predict(X_test)

    # Evaluate the model using Mean Absolute Error (MAE)
    mae = mean_absolute_error(y_test, y_pred)
    print(f"Mean Absolute Error: {mae:.2f}")

    # You can also print predicted vs actual values if needed
    comparison = pd.DataFrame({"Actual": y_test, "Predicted": y_pred})
    print(comparison.head())


def predict_next_day_temperature(model, input_data):
    # Make a prediction for the next day's temperature
    predicted_temp = model.predict(input_data)
    print(f"Predicted temperature for next day: {predicted_temp[0]:.2f}°C")




In [6]:
city='chennai'
weather_data=fetch_weather_data(city)
convert_to_csv(weather_data,city)
X_scaled, y,scaler,encoder = preprocess_data(f"{city}_weather_data.csv")
X_train, X_test, y_train, y_test = split_data(X_scaled, y)
model = train_model(X_train, y_train)
evaluate_model(model, X_test, y_test)
input_data = [[26.4, 81, 22, 0.05, 6]]  # Example input data for the model (replace with your own)
predict_next_day_temperature(model, input_data)
    

CSV file 'chennai_weather_data.csv' created successfully.
Shape of X_scaled: (7, 5)
Mean Absolute Error: 1.05
   Actual  Predicted
0    25.1  26.300867
1    25.4  26.300867
Predicted temperature for next day: 26.30°C


In [13]:
import random
import numpy as np
def predict_next_7_days(model, last_known_data, scaler, encoder, days=7):
    predictions = []

    # Get the last known input data (scaled and encoded)
    current_input = last_known_data
    print('last known dats is',last_known_data)
    last_known_humidity = current_input[0, 1]  # Assuming 2nd column is Humidity
    last_known_windspeed = current_input[0, 2]  # Assuming 3rd column is WindSpeed
    last_known_precipitation = current_input[0, 3]  # Assuming 4th column is Precipitation
    last_known_condition_encoded = current_input[0, 4]
    # last_known_condition_encoded = current_input[0, -len(encoder.categories_[0]):]  # The encoded "Condition" features from the last data point
    
    # Predict for 7 days
    for day in range(days):
        # Use the model to predict the next day's weather
        prediction = model.predict(current_input)

        # Store the prediction (assuming a single value like AvgTemp)
        predictions.append(prediction[0])  
        
        # Simulate small random changes in weather features
        next_day_humidity = last_known_humidity + np.random.normal(0, 10)  
        next_day_windspeed = last_known_windspeed + np.random.normal(0, 2)  
        next_day_precipitation = last_known_precipitation + np.random.normal(0, 0.1)  

        possible_conditions = ['Clear', 'Cloudy', 'Rainy', 'Windy', 'Snow']
        random_condition = random.choice(possible_conditions)
        
        # Get the one-hot encoded vector for the chosen condition
        next_day_condition = encoder.transform([[random_condition]])  # Encoding the new condition
        next_day_condition_feature = next_day_condition[0, 0] 
        # Prepare the new data point (in the same order as the features used for training)
        # Now, the new data point should have exactly 4 numerical features + 1 encoded condition feature
        new_data_point = np.array([[prediction[0], next_day_humidity, next_day_windspeed, next_day_precipitation]])

        # Concatenate the selected condition feature (just one feature) to the new data point
        new_data_point = np.concatenate([new_data_point, np.array([[next_day_condition_feature]])], axis=1)

        # Ensure the new data point has exactly 5 features (check before scaling)
        print(f"Shape of new_data_point before scaling: {new_data_point.shape}")  # Debugging print
        assert new_data_point.shape[1] == 5, f"Expected 5 features, got {new_data_point.shape[1]}"

        # Scale the entire new data point (numerical features + encoded condition)
        current_input = scaler.transform(new_data_point)  # The input now has the correct number of features (5)

    return predictions


# Example of usage

# Load the data and preprocess it, get the encoder and scaler
X_scaled, y, scaler, encoder = preprocess_data(f"{city}_weather_data.csv")

# Assume the model is already trained, and we have the last known data point (e.g., the last row from the dataset)
last_known_data = X_scaled[-1:]  # Use the last row as the input for prediction

# Make predictions for the next 7 days
predictions = predict_next_7_days(model, last_known_data, scaler, encoder, days=7)

# Print or display the predictions for the next 7 days
for i, prediction in enumerate(predictions, 1):
    print(f"Day {i}: Predicted AvgTemp = {prediction:.2f}°C")


Shape of X_scaled: (7, 5)
last known dats is [[-0.15471319  0.18402945 -0.84310438 -1.15470054  1.15470054]]
Shape of new_data_point before scaling: (1, 5)
Shape of new_data_point before scaling: (1, 5)
Shape of new_data_point before scaling: (1, 5)
Shape of new_data_point before scaling: (1, 5)
Shape of new_data_point before scaling: (1, 5)
Shape of new_data_point before scaling: (1, 5)
Shape of new_data_point before scaling: (1, 5)
Day 1: Predicted AvgTemp = 25.90°C
Day 2: Predicted AvgTemp = 26.57°C
Day 3: Predicted AvgTemp = 26.57°C
Day 4: Predicted AvgTemp = 26.57°C
Day 5: Predicted AvgTemp = 26.57°C
Day 6: Predicted AvgTemp = 26.60°C
Day 7: Predicted AvgTemp = 26.57°C


