In [6]:
# train_models.py

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
import joblib

dataset = pd.read_csv('year_lahore_weather_data.csv')

# Extract features and targets
features = dataset[['Humidity', 'Wind_Speed', 'Weather_Condition']]
target_max_t = dataset['Max_Temperature']
target_min_t = dataset['Min_Temperature']

# Define the transformer for one-hot encoding 'Weather_Condition'
preprocessor = ColumnTransformer(
    transformers=[
        ('weather', OneHotEncoder(), ['Weather_Condition'])
    ],
    remainder='passthrough'
)

# Combine the preprocessor with the model in a pipeline
max_t_model = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(n_estimators=100, random_state=42))
])

# Split the data into training and testing sets
X_train, X_test, y_train_max_t, y_test_max_t = train_test_split(
    features, target_max_t, test_size=0.2, random_state=42
)

# Train the model for max_t
max_t_model.fit(X_train, y_train_max_t)

# Evaluate the model for max_t
y_pred_max_t = max_t_model.predict(X_test)
mse_max_t = mean_squared_error(y_test_max_t, y_pred_max_t)
print(f'Mean Squared Error for Max_Temperature: {mse_max_t}')

# Save the trained model for max_t
joblib.dump(max_t_model, 'max_t_model.joblib')

# Split the data into training and testing sets for min_t
X_train, X_test, y_train_min_t, y_test_min_t = train_test_split(
    features, target_min_t, test_size=0.2, random_state=42
)

# Train the model for min_t
min_t_model = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(n_estimators=100, random_state=42))
])

min_t_model.fit(X_train, y_train_min_t)

# Evaluate the model for min_t
y_pred_min_t = min_t_model.predict(X_test)
mse_min_t = mean_squared_error(y_test_min_t, y_pred_min_t)
print(f'Mean Squared Error for Min_Temperature: {mse_min_t}')

# Save the trained model for min_t
joblib.dump(min_t_model, 'min_t_model.joblib')


Mean Squared Error for Max_Temperature: 23.874669140224587
Mean Squared Error for Min_Temperature: 14.463464927608484


['min_t_model.joblib']