In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

In [3]:
# Load the dataset
data_path = 'C:\\Users\\jackm\\Desktop\\projects\\fog_predictor\\weather_data.csv'
data = pd.read_csv(data_path, encoding='ISO-8859-1')

# Update the 'Fog' column based on dates
data['Fog'] = data['Observation Time'].apply(lambda x: 1 if '2024-05-11' in x else 0)

# Convert features
data['Temperature'] = data['Temperature'].str.extract(r'(\d+)').astype(float)
data['Dew Point'] = data['Dew Point'].str.extract(r'(\d+)').astype(float)
data['Humidity'] = data['Humidity'].str.replace('%', '').astype(float)

# Encode categorical data
trend_mapping = {'rising': 1, 'falling': 0}  # Example encoding
data['Trend'] = data['Trend'].map(trend_mapping).astype(float)

# Shuffle the data
data = data.sample(frac=1, random_state=420).reset_index(drop=True)

# Prepare feature tensor and labels
features = torch.tensor(data[['Temperature', 'Humidity', 'Dew Point', 'Trend']].values, dtype=torch.float32)
labels = torch.tensor(data['Fog'].values, dtype=torch.float32)

# Drop rows with NaN values
features, labels = features[~torch.isnan(features).any(dim=1)], labels[~torch.isnan(features).any(dim=1)]

# Split data into train and test
train_features = features[:int(0.8 * len(features))]
test_features = features[int(0.8 * len(features)):]
train_labels = labels[:int(0.8 * len(labels))]
test_labels = labels[int(0.8 * len(labels)):]
print(train_features)

tensor([[ 52.0000, 100.0000,  51.0000,   0.0000],
        [ 52.0000, 100.0000,  51.0000,   1.0000],
        [ 52.0000, 100.0000,  51.0000,   1.0000],
        [ 52.0000, 100.0000,  51.0000,   1.0000],
        [ 52.0000, 100.0000,  51.0000,   1.0000],
        [ 62.0000,  72.3600,  53.0000,   1.0000],
        [ 52.0000, 100.0000,  51.0000,   1.0000],
        [ 52.0000, 100.0000,  51.0000,   1.0000],
        [ 52.0000, 100.0000,  51.0000,   1.0000],
        [ 52.0000, 100.0000,  51.0000,   0.0000],
        [ 52.0000, 100.0000,  51.0000,   0.0000],
        [ 52.0000, 100.0000,  51.0000,   0.0000],
        [ 52.0000, 100.0000,  51.0000,   0.0000],
        [ 52.0000, 100.0000,  51.0000,   1.0000],
        [ 52.0000, 100.0000,  51.0000,   1.0000],
        [ 52.0000, 100.0000,  51.0000,   1.0000],
        [ 52.0000, 100.0000,  51.0000,   1.0000],
        [ 52.0000, 100.0000,  51.0000,   1.0000],
        [ 52.0000, 100.0000,  51.0000,   1.0000],
        [ 52.0000, 100.0000,  51.0000,   1.0000],


In [4]:
# Define a simple MLP model
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(4, 10),  # 4 features to 10 hidden nodes
            nn.ReLU(),
            nn.Linear(10, 1),
            nn.Sigmoid()  # Output a single probability for binary classification
        )
    
    def forward(self, x):
        return self.layers(x)


In [5]:
# Initialize the model
model = MLP()
loss_function = nn.BCELoss()  # Binary cross-entropy loss for binary classification
optimizer = optim.Adam(model.parameters(), lr=0.01)  # Adam optimizer

In [6]:
# Training loop
def train(model, features, labels, optimizer, loss_function, epochs):
    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        
        # Forward pass
        predictions = model(features)
        loss = loss_function(predictions.squeeze(), labels)
        
        # Backward pass and optimize
        loss.backward()
        optimizer.step()
        
        if epoch % 100 == 0:
            print(f'Epoch {epoch+1}, Loss: {loss.item()}')

# Run training
train(model, train_features, train_labels, optimizer, loss_function, epochs=500)

Epoch 1, Loss: 2.3386991024017334
Epoch 101, Loss: 0.6372416615486145
Epoch 201, Loss: 0.540835976600647
Epoch 301, Loss: 0.4577319025993347
Epoch 401, Loss: 0.43126165866851807


In [6]:
def evaluate_model(model, test_features, test_labels):
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():  # No need to track gradients
        predictions = model(test_features).squeeze()
        # Convert probabilities to binary output
        predicted_classes = (predictions > 0.5).float()
        
        # Calculate metrics
        accuracy = accuracy_score(test_labels, predicted_classes)
        precision = precision_score(test_labels, predicted_classes)
        recall = recall_score(test_labels, predicted_classes)
        f1 = f1_score(test_labels, predicted_classes)
        roc_auc = roc_auc_score(test_labels, predictions)  # Use probabilities for ROC AUC

    print(f"Accuracy: {accuracy}\nPrecision: {precision}\nRecall: {recall}\nF1 Score: {f1}\nROC AUC: {roc_auc}")

# Run evaluation
evaluate_model(model, test_features, test_labels)


Accuracy: 0.8888888888888888
Precision: 0.8571428571428571
Recall: 1.0
F1 Score: 0.9230769230769231
ROC AUC: 0.8333333333333334


In [7]:
torch.save(model.state_dict(), 'mlp_model.pth')