In [2]:
!pip install deap


Collecting deap
  Downloading deap-1.4.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Downloading deap-1.4.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (135 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m135.4/135.4 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: deap
Successfully installed deap-1.4.1


In [5]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from deap import base, creator, tools, algorithms
import random

In [6]:
# Generate synthetic weather data
def generate_weather_data(region_name, n_samples=300):
    np.random.seed(42 if region_name == "North" else 99)
    dates = pd.date_range(start="2021-11-01", end="2022-01-31", freq='D').tolist()
    temp = np.random.uniform(0 if region_name == "North" else 15,
                             15 if region_name == "North" else 30,
                             size=len(dates))
    humidity = np.random.uniform(40, 90, size=len(dates))
    wind_speed = np.random.uniform(0, 15, size=len(dates))
    data = pd.DataFrame({
        "Date": dates,
        "Temperature": temp,
        "Humidity": humidity,
        "WindSpeed": wind_speed
    })
    return data

In [7]:
# Create datasets for North and South regions
north_data = generate_weather_data("North")
south_data = generate_weather_data("South")

In [8]:
# Preprocess data
def preprocess_data(data):
    X = data[["Humidity", "WindSpeed"]]  # Features
    y = data["Temperature"]  # Target
    return train_test_split(X, y, test_size=0.2, random_state=42)

X_train_north, X_test_north, y_train_north, y_test_north = preprocess_data(north_data)
X_train_south, X_test_south, y_train_south, y_test_south = preprocess_data(south_data)

In [9]:
# Without Genetic Algorithm
def train_model(X_train, y_train, X_test, y_test):
    model = LinearRegression()
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    mse = mean_squared_error(y_test, predictions)
    r2 = r2_score(y_test, predictions)
    return mse, r2

mse_north, r2_north = train_model(X_train_north, y_train_north, X_test_north, y_test_north)
mse_south, r2_south = train_model(X_train_south, y_train_south, X_test_south, y_test_south)

In [10]:
print("Without GA:")
print(f"North Region - MSE: {mse_north}, R2: {r2_north}")
print(f"South Region - MSE: {mse_south}, R2: {r2_south}")


Without GA:
North Region - MSE: 21.016798201090662, R2: -0.05482277608066788
South Region - MSE: 20.25695821203497, R2: -0.07131426570565158


In [11]:
from sklearn.preprocessing import StandardScaler

def evaluate_model(individual, X_train, y_train, X_test, y_test):
    alpha = individual[0]  # Example hyperparameter
    # Normalize data manually
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    model = LinearRegression()  # No 'normalize' parameter
    model.fit(X_train_scaled, y_train)
    predictions = model.predict(X_test_scaled)
    mse = mean_squared_error(y_test, predictions)
    return mse,

In [12]:
def genetic_algorithm(X_train, y_train, X_test, y_test):
    creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
    creator.create("Individual", list, fitness=creator.FitnessMin)
    toolbox = base.Toolbox()
    toolbox.register("attr_float", random.uniform, 0, 1)
    toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_float, n=1)
    toolbox.register("population", tools.initRepeat, list, toolbox.individual)
    toolbox.register("evaluate", evaluate_model, X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test)
    toolbox.register("mate", tools.cxBlend, alpha=0.5)
    toolbox.register("mutate", tools.mutGaussian, mu=0, sigma=0.2, indpb=0.2)
    toolbox.register("select", tools.selTournament, tournsize=3)

    pop = toolbox.population(n=10)
    hof = tools.HallOfFame(1)
    algorithms.eaSimple(pop, toolbox, cxpb=0.7, mutpb=0.2, ngen=20, stats=None, halloffame=hof, verbose=False)
    return hof[0]

best_params_north = genetic_algorithm(X_train_north, y_train_north, X_test_north, y_test_north)
best_params_south = genetic_algorithm(X_train_south, y_train_south, X_test_south, y_test_south)



In [13]:
# Retrain models with best parameters
print("With GA:")
mse_north_ga, r2_north_ga = train_model(X_train_north, y_train_north, X_test_north, y_test_north)
mse_south_ga, r2_south_ga = train_model(X_train_south, y_train_south, X_test_south, y_test_south)

print(f"North Region - MSE: {mse_north_ga}, R2: {r2_north_ga}")
print(f"South Region - MSE: {mse_south_ga}, R2: {r2_south_ga}")


With GA:
North Region - MSE: 21.016798201090662, R2: -0.05482277608066788
South Region - MSE: 20.25695821203497, R2: -0.07131426570565158


**Inference**

**The North and South regions:**

**Without Genetic Algorithm (GA):**

**North Region: **

MSE (Mean Squared Error): 21.02

R² (R-squared): -0.055


**South Region:**

MSE: 20.26

R²: -0.071

**With Genetic Algorithm (GA):**

**North Region:**

MSE: 21.02

R²: -0.055

**South Region:**

MSE: 20.26

R²: -0.071

**Analysis of Results:**

**MSE (Mean Squared Error):**

MSE measures the average squared difference between the actual and predicted values. The lower the MSE, the better the model’s performance. In this case, MSE is roughly the same for both regions, indicating that the model is performing similarly in both North and South regions, regardless of whether GA is used or not.

**R² (R-squared):**

R² is a statistical measure that indicates how well the model explains the variability of the target variable. R² values close to 1 indicate a good fit, while values close to 0 suggest that the model doesn't explain the variability well. Here, the R² values are negative, which means that the model is performing worse than a simple mean-based model (which would have an R² of 0). The negative R² values show that the linear regression model is not well suited for this particular weather prediction task.


**With and Without GA:**

The results for both regions, with and without the use of GA, are identical. This suggests that the Genetic Algorithm did not improve the performance of the model in this case. There could be several reasons for this:

The features (e.g., humidity, wind speed) may not have enough complexity to benefit from GA optimization.

The model might already be well-tuned for this specific problem without the need for additional hyperparameter optimization.

The model used (Linear Regression) may not be the best choice for this type of forecasting, and more complex models (e.g., Decision Trees, Random Forest, or Neural Networks) could be more appropriate.

**Conclusion:**

The current model, Linear Regression, does not seem to provide an accurate weather forecast, as indicated by the negative R² and moderate MSE values.
Genetic Algorithm did not improve performance, implying that the optimization of the hyperparameters wasn't effective for this specific problem.
It would be advisable to experiment with different models or additional features to better capture the complexity of weather prediction.