In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Step 1: Load the dataset
data = pd.read_csv('Weather_data.csv')

# Step 2: Check for the presence of 'precip_mm' (for Rainfall)
if 'precip_mm' in data.columns:
    # Convert 'precip_mm' to binary (0: No Rain, 1: Rain) using threshold of 0.1 mm
    data['Rainfall'] = np.where(data['precip_mm'] > 0.1, 1, 0)
else:
    raise KeyError("'precip_mm' column is missing in the dataset.")

# Step 3: Fill missing values in the dataset (if any)
data.fillna(0, inplace=True)

# Step 4: Define features (X) and target (y)
X = data[['temperature_celsius', 'wind_kph', 'humidity', 'cloud', 'pressure_mb']]  # Use relevant columns from your dataset
y = data['Rainfall']  # Target is Rainfall (0 or 1)

# Step 5: Split the dataset into training (80%) and testing sets (20%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 6: Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Step 7: Initialize and train the Logistic Regression model
logistic_model = LogisticRegression(random_state=42)
logistic_model.fit(X_train_scaled, y_train)

# Step 8: Make predictions on the test data
y_pred = logistic_model.predict(X_test_scaled)

# Step 9: Evaluate the model using accuracy, confusion matrix, and classification report
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("\nConfusion Matrix:")
print(conf_matrix)

# Classification Report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Step 10: Function to predict if rainfall will occur based on input weather conditions
def predict_rainfall(weather_data):
    """
    Predict if rainfall will occur based on input weather conditions.
    
    :param weather_data: A list or array of [Temperature, WindSpeed, Humidity, CloudCover, Pressure]
    :return: "Rainfall expected" or "No rainfall expected"
    """
    weather_data_scaled = scaler.transform([weather_data])
    predicted_class = logistic_model.predict(weather_data_scaled)[0]
    
    if predicted_class == 1:
        return "Rainfall expected"
    else:
        return "No rainfall expected"

# Step 11: Make predictions with the trained model using test weather data

# Test Case 1: Predict rainfall for given weather conditions (Rainfall expected)
test_weather_data_1 = [25.0, 15.0, 90, 80, 1008]  # [Temperature, WindSpeed, Humidity, CloudCover, Pressure]
rainfall_prediction_1 = predict_rainfall(test_weather_data_1)
print(f'\nTest case 1 result: {rainfall_prediction_1}')

# Test Case 2: Predict rainfall for given weather conditions (No rainfall expected)
test_weather_data_2 = [30.0, 10.0, 40, 10, 1015]  # [Temperature, WindSpeed, Humidity, CloudCover, Pressure]
rainfall_prediction_2 = predict_rainfall(test_weather_data_2)
print(f'Test case 2 result: {rainfall_prediction_2}')


Accuracy: 0.80

Confusion Matrix:
[[3476  312]
 [ 650  376]]

Classification Report:
              precision    recall  f1-score   support

           0       0.84      0.92      0.88      3788
           1       0.55      0.37      0.44      1026

    accuracy                           0.80      4814
   macro avg       0.69      0.64      0.66      4814
weighted avg       0.78      0.80      0.78      4814


Test case 1 result: No rainfall expected
Test case 2 result: No rainfall expected




In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# Assuming 'data' is already loaded and preprocessed
y = data['Rainfall']

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train the model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train_scaled, y_train)

# Make predictions
y_pred = model.predict(X_test_scaled)

# Evaluate the model (Mean Squared Error)
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

# Define a custom accuracy function for regression
def calculate_accuracy(y_true, y_pred, tolerance=0.1):
    return (abs(y_true - y_pred) <= tolerance).mean()

# Calculate custom accuracy
accuracy = calculate_accuracy(y_test, y_pred)
print(f'Accuracy: {accuracy}')

# Function to predict if rainfall is going to happen (yes/no)
def predict_rainfall(weather_data):
    """
    Predict if rainfall will occur based on input weather conditions.
    
    :param weather_data: A list or array of [Temperature, WindSpeed, Humidity, CloudCover, Pressure]
    :return: "Rainfall expected" or "No rainfall expected"
    """
    weather_data_scaled = scaler.transform([weather_data])
    predicted_rainfall = model.predict(weather_data_scaled)[0]
    
    # If predicted rainfall is greater than 0.1 mm, assume rainfall will occur
    if predicted_rainfall > 0.1:
        return f"Rainfall expected ({predicted_rainfall:.2f} mm)"
    else:
        return "No rainfall expected"

# Test Case 1: Predict rainfall for given weather conditions (Rainfall expected)
test_weather_data_1 = [25.0, 15.0, 90, 80, 1008]  # Example: [Temperature, WindSpeed, Humidity, CloudCover, Pressure]
rainfall_prediction_1 = predict_rainfall(test_weather_data_1)
print(f'Test case 1 result: {rainfall_prediction_1}')

# Test Case 2: Predict rainfall for given weather conditions (No rainfall expected)
test_weather_data_2 = [30.0, 10.0, 40, 10, 1015]  # Example: [Temperature, WindSpeed, Humidity, CloudCover, Pressure]
rainfall_prediction_2 = predict_rainfall(test_weather_data_2)
print(f'Test case 2 result: {rainfall_prediction_2}')

Mean Squared Error: 0.6675940573732939
Accuracy: 0.652471956792688
Test case 1 result: Rainfall expected (0.82 mm)
Test case 2 result: No rainfall expected


