# Load and Explore the Data

In [2]:
import pandas as pd

In [3]:
# Load the dataset from the CSV file
data = pd.read_csv('generated_weather_data.csv')

In [4]:
# Show the first few rows of the dataset
print(data.head())

   Temperature (C)  Humidity (%)  Light Intensity (lux)  Weather Label
0        -9.184457     33.371744           35383.408245              1
1        45.066938     52.429304           28623.038540              0
2        21.563390     91.611261           12307.903044              0
3        10.111886     34.520671           61988.810796              1
4       -35.499204     97.138714           59181.337597              0


In [5]:
# Check for any missing values
print(data.isnull().sum())

Temperature (C)          0
Humidity (%)             0
Light Intensity (lux)    0
Weather Label            0
dtype: int64


In [6]:
# Show basic statistics of the dataset
print(data.describe())

       Temperature (C)  Humidity (%)  Light Intensity (lux)  Weather Label
count      5000.000000   5000.000000            5000.000000    5000.000000
mean         -0.279126     49.214984           50101.208136       0.601800
std          28.981263     28.819346           29175.430271       0.489576
min         -54.809389      0.000000               0.000000       0.000000
25%         -25.541465     24.593455           24838.801253       0.000000
50%          -0.421021     48.738539           49761.708000       1.000000
75%          24.908937     73.410699           75558.495881       1.000000
max          54.450128    100.000000          100000.000000       1.000000


# Preprocess the Data

In [7]:
from sklearn.model_selection import train_test_split

In [10]:
# Features: Temperature, Humidity, Light Intensity
X = data[['Temperature (C)', 'Humidity (%)', 'Light Intensity (lux)']]

# Target: Label (Good weather = 1, Bad weather = 0)
y = data['Weather Label']

In [11]:
# Split the dataset into training (80%) and testing (20%) sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [12]:
# Display the shapes of the training and test sets
print(f"Training set shape: {X_train.shape}")
print(f"Test set shape: {X_test.shape}")

Training set shape: (4000, 3)
Test set shape: (1000, 3)


# Train a Machine Learning Model

In [13]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

In [14]:
# Create the Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

In [15]:
# Train the model using the training data
rf_model.fit(X_train, y_train)

In [16]:
# Make predictions on the test data
y_pred = rf_model.predict(X_test)

In [17]:
# Evaluate the model accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")

Model Accuracy: 1.00


In [18]:
# Print a classification report to see detailed metrics
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00       396
           1       1.00      1.00      1.00       604

    accuracy                           1.00      1000
   macro avg       1.00      1.00      1.00      1000
weighted avg       1.00      1.00      1.00      1000



# Deploy the Model for Predictions

In [31]:
# Example: New data coming from sensors
new_data = pd.DataFrame({
    'Temperature (C)': [29.5],
    'Humidity (%)': [50.2],
    'Light Intensity (lux)': [1250]
})

In [32]:
# Predict the weather condition (Good or Bad)
weather_prediction = rf_model.predict(new_data)

In [33]:
# Interpret the prediction
if weather_prediction[0] == 1:
    print("The weather is Good.")
else:
    print("The weather is Bad.")

The weather is Good.


# Save the Trained Model

In [21]:
!pip install joblib



In [34]:
import joblib

In [35]:
# Save the trained model to a file using joblib
joblib.dump(rf_model, 'weather_model.pkl')

['weather_model.pkl']