In [118]:
import numpy as np
import pandas as pd
import requests as rq
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

In [119]:
weather_types = { 'NA': 'Not available', '-1': 'Trace rain', '0': 'Clear night', '1': 'Sunny day', '2': 'Partly cloudy (night)', '3': 'Partly cloudy (day)', '4': 'Not used', '5': 'Mist', '6': 'Fog', '7': 'Cloudy', '8': 'Overcast', '9': 'Light rain shower (night)', '10': 'Light rain shower (day)', '11': 'Drizzle', '12': 'Light rain', '13': 'Heavy rain shower (night)', '14': 'Heavy rain shower (day)', '15': 'Heavy rain', '16': 'Sleet shower (night)', '17': 'Sleet shower (day)', '18': 'Sleet', '19': 'Hail shower (night)', '20': 'Hail shower (day)', '21': 'Hail', '22': 'Light snow shower (night)', '23': 'Light snow shower (day)', '24': 'Light snow', '25': 'Heavy snow shower (night)', '26': 'Heavy snow shower (day)', '27': 'Heavy snow', '28': 'Thunder shower (night)', '29': 'Thunder shower (day)', '30': 'Thunder'}

visibility_dict = { 'UN': 'Unknown', 'VP': 'Very poor - Less than 1 km', 'PO': 'Poor - Between 1-4 km', 'MO': 'Moderate - Between 4-10 km', 'GO': 'Good - Between 10-20 km', 'VG': 'Very good - Between 20-40 km', 'EX': 'Excellent - More than 40 km'}
 

In [120]:
def fetch_api_data():
    """
    Fetching the data from the MetOffice API.
    
    Args:
        None
    Returns: 
        JSON object
    Raises:
        None
    """
    data_url = "http://datapoint.metoffice.gov.uk/public/data/val/wxobs/all/json/3772?res=hourly&key=12a3f254-6c4a-4f6d-a464-a7a2290a7226"
    response = rq.get(data_url)
    return response.json()


In [121]:
def get_location(data):
    """
    Returns the location of the weather data.
    
    Args:
        data (JSON): The weather data.
    Returns:
        str: The location of the weather data.
    Raises:
        KeyError: If the location is not found in the data.
    """
    return f"{data['SiteRep']['DV']['Location']['name']}, {data['SiteRep']['DV']['Location']['country']}"

In [122]:
def make_dataframe(data):
    """
    Converts the JSON data into a pandas dataframe.
    
    Args:
        data (JSON): JSON data from the Met Office DataPoint API
    Returns:
        weather_data (pandas dataframe): dataframe containing the weather data
    Raises:
        None
    """

    weather_data = None
    for day in data['SiteRep']['DV']['Location']['Period']:
        df = pd.DataFrame(day['Rep'])
        df['date'] = day['value']
        weather_data = pd.concat([weather_data, df], ignore_index=True)
    return weather_data

In [123]:
def clean_dataframe(weather_data):
    """
    Applies data manipulations to the weather data and prepare dataframe.
    
    Args:
        weather_data (DataFrame): weather data
    Returns:
        weather_data (DataFrame): weather data after manipulations
    Raises:
        None
    """
    headers = {'D':'Wind Direction(compass)', 'G':'Wind Gust(mph)', 'H':'Screen Relative Humidity(%)', 'P':'Pressure(hpa)', 'S':'Wind Speed(mph)', 'T':'Temperature(C)', 'V':'Visibility(m)', 'W':'Weather Type', 'Pt':'Pressure Tendency', 'Dp':'Dew Point(C)', '$':'Minutes Since 12o Clock'}
    weather_data.rename(columns=headers, inplace=True)
    weather_data.dropna(how='any', inplace=True)
    compass_directions_map = { 'N': 1, 'NNE':2, 'NE': 3, 'ENE':4, 'E': 5, 'ESE':6, 'SE': 7, 'SSE':8, 'S': 9, 'SSW':10, 'SW': 11, 'WSW':12, 'W': 13, 'WNW':14, 'NW': 15, 'NNW':16}
    weather_data['Wind Direction(compass)'] = weather_data['Wind Direction(compass)'].map(compass_directions_map)
    Pressure_tendency_map = { 'F': 0, 'R': 1, 'S': 2}   
    weather_data['Pressure Tendency'] = weather_data['Pressure Tendency'].map(Pressure_tendency_map)
    weather_data['date'] = pd.to_datetime(weather_data['date'], format='%Y-%m-%dZ')
    column_datatypes = {'Wind Direction(compass)':int, 'Wind Gust(mph)':float, 'Screen Relative Humidity(%)':float, 'Pressure(hpa)':float, 'Wind Speed(mph)':float, 'Temperature(C)':float, 'Visibility(m)':float, 'Weather Type':int, 'Pressure Tendency':int, 'Dew Point(C)':float, 'Minutes Since 12o Clock':int}
    weather_data = weather_data.astype(column_datatypes)
    weather_data['day'] = weather_data['date'].dt.day
    weather_data['month'] = weather_data['date'].dt.month
    weather_data['year'] = weather_data['date'].dt.year
    weather_data.drop('date', axis=1, inplace=True)
    return weather_data

In [124]:
def prepare_model_data(weather_data):
    """This function prepares the data for the model by sorting the data by date and time and then shifting the output columns by 1 row.
    
    args:
        weather_data: The dataframe containing the weather data.
    returns:
        A dataframe containing the weather data with the output columns shifted by 1 row.
    raises:
        None
    """
    
    weather_data.sort_values(by=['year','month','day', 'Minutes Since 12o Clock'], kind='mergesort', inplace=True)
    output = weather_data[['Wind Direction(compass)', 'Pressure(hpa)', 'Wind Speed(mph)', 'Temperature(C)', 'Visibility(m)', 'Weather Type']].shift(-1)
    output.columns = ['Wind Direction(compass) (t+1)', 'Pressure(hpa) (t+1)', 'Wind Speed(mph) (t+1)', 'Temperature(C) (t+1)', 'Visibility(m) (t+1)', 'Weather Type (t+1)']
    data_for_model = pd.concat([weather_data, output], axis=1)
    data_for_model.dropna(inplace=True)
    return data_for_model
    

In [125]:
data = fetch_api_data()
weather_data = make_dataframe(data)
weather_data = clean_dataframe(weather_data)
weather_data = prepare_model_data(weather_data)

In [126]:
weather_data

Unnamed: 0,Wind Direction(compass),Wind Gust(mph),Screen Relative Humidity(%),Pressure(hpa),Wind Speed(mph),Temperature(C),Visibility(m),Weather Type,Pressure Tendency,Dew Point(C),Minutes Since 12o Clock,day,month,year,Wind Direction(compass) (t+1),Pressure(hpa) (t+1),Wind Speed(mph) (t+1),Temperature(C) (t+1),Visibility(m) (t+1),Weather Type (t+1)
0,9,24.0,57.9,1016.0,15.0,23.6,40000.0,1,0,14.9,1020,11,8,2023,9.0,1016.0,14.0,22.6,45000.0,8.0
1,9,22.0,58.8,1016.0,14.0,22.6,45000.0,8,0,14.2,1080,11,8,2023,10.0,1016.0,15.0,21.6,40000.0,8.0
2,10,23.0,58.2,1016.0,15.0,21.6,40000.0,8,0,13.1,1140,11,8,2023,10.0,1016.0,13.0,20.2,40000.0,8.0
3,10,21.0,64.3,1016.0,13.0,20.2,40000.0,8,0,13.3,1200,11,8,2023,10.0,1016.0,11.0,18.9,30000.0,0.0
4,10,18.0,72.1,1016.0,11.0,18.9,30000.0,0,1,13.8,1260,11,8,2023,10.0,1016.0,9.0,18.1,30000.0,8.0
5,10,17.0,75.8,1016.0,9.0,18.1,30000.0,8,1,13.8,1320,11,8,2023,10.0,1016.0,9.0,17.3,30000.0,8.0
6,10,14.0,78.8,1016.0,9.0,17.3,30000.0,8,0,13.6,1380,11,8,2023,11.0,1016.0,9.0,16.6,30000.0,2.0
7,11,13.0,82.9,1016.0,9.0,16.6,30000.0,2,0,13.7,0,12,8,2023,10.0,1015.0,7.0,15.8,29000.0,0.0
8,10,11.0,86.7,1015.0,7.0,15.8,29000.0,0,0,13.6,60,12,8,2023,11.0,1015.0,7.0,15.0,23000.0,0.0
9,11,11.0,91.3,1015.0,7.0,15.0,23000.0,0,0,13.6,120,12,8,2023,11.0,1015.0,3.0,13.7,19000.0,0.0


In [127]:

class Weather(nn.Module):
    def __init__(self, input_dim, output_dim, hidden_dim):
        super(Weather, self).__init__()

        # Shared hidden layers
        self.shared_layer = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU()
        )

        # Output layers for each output
        self.temparature_layer = nn.Linear(hidden_dim, output_dim)
        self.visibility_layer = nn.Linear(hidden_dim, output_dim)
        self.wind_direction_layer = nn.Linear(hidden_dim, output_dim)
        self.wind_speed_layer = nn.Linear(hidden_dim, output_dim)
        self.weather_type_layer = nn.Linear(hidden_dim, output_dim)
        self.pressure_layer = nn.Linear(hidden_dim, output_dim)

    def forward(self, input):

        # Pass through shared hidden layer
        shared_output = self.shared_layer(input)

        # Separate output predictions
        temperature = self.temparature_layer(shared_output)
        visibility = self.visibility_layer(shared_output)
        wind_direction = self.wind_direction_layer(shared_output)
        wind_speed = self.wind_speed_layer(shared_output)
        weather_type = self.weather_type_layer(shared_output)
        pressure = self.pressure_layer(shared_output)
        
        return wind_direction, pressure, wind_speed, temperature, visibility, weather_type

In [128]:
class WeatherData(Dataset):
    def __init__(self, data):
       self.X = torch.tensor(data[['Wind Direction(compass)', 'Wind Gust(mph)',
       'Screen Relative Humidity(%)', 'Pressure(hpa)', 'Wind Speed(mph)',
       'Temperature(C)', 'Visibility(m)', 'Weather Type', 'Pressure Tendency',
       'Dew Point(C)', 'Minutes Since 12o Clock', 'day', 'month', 'year']].values, dtype=torch.float32)
       self.y = torch.tensor(data[['Wind Direction(compass) (t+1)', 'Pressure(hpa) (t+1)',
       'Wind Speed(mph) (t+1)', 'Temperature(C) (t+1)', 'Visibility(m) (t+1)',
       'Weather Type (t+1)']].values, dtype=torch.float32)

    def __len__(self):
       return len(self.X)
    
    def __getitem__(self, idx):
       return self.X[idx], self.y[idx]

In [129]:
batch_size = 1
num_batches = 1
num_epochs = 100

weather_data_class = WeatherData(weather_data)
weather_data_loader = DataLoader(weather_data_class, batch_size=batch_size, shuffle=True)

In [131]:
# Define the input and output dimensions
input_dim = 14
output_dim = 1
hidden_dim = 20

# Create an instance of the model
model = Weather(input_dim, output_dim, hidden_dim)

# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [205]:
def train_model(model, num_epochs, weather_data_loader, criterion, optimizer):
    for epoch in range(num_epochs):
        for idx, (X, y) in enumerate(weather_data_loader):

            # Zero the gradients
            optimizer.zero_grad()
            # Forward pass
            wind_direction, pressure, wind_speed, temperature, visibility, weather_type = model(X)

            # Compute loss
            loss_wind_direction = criterion(wind_direction[0][0], y[0,0])
            loss_pressure = criterion(pressure[0][0], y[0,1])
            loss_wind_speed = criterion(wind_speed[0][0], y[0,2])
            loss_temperature = criterion(temperature[0][0], y[0,3])
            loss_visibility = criterion(visibility[0][0], y[0,4])
            loss_weather_type = criterion(weather_type[0][0], y[0,5])
            #total loss
            loss = loss_wind_direction + loss_pressure + loss_wind_speed + loss_temperature + loss_visibility + loss_weather_type

            # Backpropagation and optimization
            loss.backward()
            optimizer.step()
            
            # Print progress
            if idx % 100 == 0:
                print(f"Epoch [{epoch+1}/{num_epochs}], Batch [{idx+1}/{num_batches}], Loss: {loss.item():.4f}")


Epoch [1/100], Batch [1/1], Loss: 2797030.0000
Epoch [2/100], Batch [1/1], Loss: 15399.9404
Epoch [3/100], Batch [1/1], Loss: 16308914.0000
Epoch [4/100], Batch [1/1], Loss: 15465727.0000
Epoch [5/100], Batch [1/1], Loss: 4051204.5000
Epoch [6/100], Batch [1/1], Loss: 6076201.0000
Epoch [7/100], Batch [1/1], Loss: 2380452.2500
Epoch [8/100], Batch [1/1], Loss: 7935.5083
Epoch [9/100], Batch [1/1], Loss: 954595.6875
Epoch [10/100], Batch [1/1], Loss: 422191.0625
Epoch [11/100], Batch [1/1], Loss: 542162.6250
Epoch [12/100], Batch [1/1], Loss: 15260878.0000
Epoch [13/100], Batch [1/1], Loss: 3343235.7500
Epoch [14/100], Batch [1/1], Loss: 11945523.0000
Epoch [15/100], Batch [1/1], Loss: 16123149.0000
Epoch [16/100], Batch [1/1], Loss: 9400.1602
Epoch [17/100], Batch [1/1], Loss: 221530784.0000
Epoch [18/100], Batch [1/1], Loss: 3267877.0000
Epoch [19/100], Batch [1/1], Loss: 404810.5938
Epoch [20/100], Batch [1/1], Loss: 1673273.8750
Epoch [21/100], Batch [1/1], Loss: 10903.5088
Epoch [2

In [133]:
# use this to streamline the process
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import FunctionTransformer

# Import your functions
from your_module import fetch_data_using_api, apply_custom_transformations, create_dataframe, train_model

# Wrap your functions with FunctionTransformer
fetch_transformer = FunctionTransformer(fetch_data_using_api, validate=False)
transform_transformer = FunctionTransformer(apply_custom_transformations, validate=False)
df_transformer = FunctionTransformer(create_dataframe, validate=False)
train_transformer = FunctionTransformer(train_model, validate=False)

# Create a pipeline
pipeline = Pipeline([
    ('fetch', fetch_transformer),             # Fetch data using API
    ('transform', transform_transformer),     # Apply custom transformations
    ('create_df', df_transformer),            # Create DataFrame
    ('train', train_transformer)              # Train model
])

# Now, you can use the pipeline to process new data
new_data = ...  # Your new data in the required format

# Fit and transform the new data through the pipeline
pipeline.fit_transform(new_data)


ModuleNotFoundError: No module named 'your_module'