In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
import pygad
import warnings
warnings.filterwarnings("ignore")

# Parameters
num_entries = 10000
start_date = datetime(2024, 1, 1)

# Generate timestamps
timestamps = [start_date + timedelta(minutes=15*i) for i in range(num_entries)]

# Generate dummy data
np.random.seed(42)
temperature = np.random.uniform(15, 35, num_entries)  # Temperature in °C
humidity = np.random.uniform(30, 70, num_entries)  # Humidity in %
production_level = np.random.uniform(50, 500, num_entries)  # Production level in units/hour
machine_status = np.random.choice(['On', 'Off'], num_entries)  # Machine status
energy_consumption = (
    0.5 * temperature
    + 0.3 * humidity
    + 0.2 * production_level
    + np.random.normal(0, 10, num_entries)
)  # Energy consumption in kWh

# Create DataFrame
data = pd.DataFrame({
    'Timestamp': timestamps,
    'Temperature': temperature,
    'Humidity': humidity,
    'ProductionLevel': production_level,
    'MachineStatus': machine_status,
    'EnergyConsumption': energy_consumption
})

# Convert MachineStatus to binary
data['MachineStatus'] = data['MachineStatus'].apply(lambda x: 1 if x == 'On' else 0)

# Display the first few rows of the data
print(data.head())

# Data Preprocessing
features = ['Temperature', 'Humidity', 'ProductionLevel', 'MachineStatus']
X = data[features]
y = data['EnergyConsumption']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Model Training
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train_scaled, y_train)

# Predictions
y_train_pred = model.predict(X_train_scaled)
y_test_pred = model.predict(X_test_scaled)

# Evaluation
train_mae = mean_absolute_error(y_train, y_train_pred)
train_mse = mean_squared_error(y_train, y_train_pred)
test_mae = mean_absolute_error(y_test, y_test_pred)
test_mse = mean_squared_error(y_test, y_test_pred)

print(f"Train MAE: {train_mae}, Train MSE: {train_mse}")
print(f"Test MAE: {test_mae}, Test MSE: {test_mse}")

# Define the fitness function for Genetic Algorithm
def fitness_func(ga_instance, solution, solution_idx):
    # Decode the solution (assumes binary representation for simplicity)
    temperature, humidity, production_level, machine_status = solution
    energy_consumption_pred = model.predict(scaler.transform([[temperature, humidity, production_level, machine_status]]))
    
    # Fitness function: minimize energy consumption
    fitness = -energy_consumption_pred[0]
    return fitness

# Genetic Algorithm parameters
ga_instance = pygad.GA(
    num_generations=100,
    num_parents_mating=5,
    fitness_func=fitness_func,
    sol_per_pop=20,
    num_genes=4,
    gene_space=[range(15, 35), range(30, 70), range(50, 500), [0, 1]],  # Constraints for genes
    mutation_percent_genes=10
)

# Run the Genetic Algorithm
ga_instance.run()

# Best solution
solution, solution_fitness, solution_idx = ga_instance.best_solution()
print(f"Best solution: {solution}, Fitness: {solution_fitness}")


            Timestamp  Temperature   Humidity  ProductionLevel  MachineStatus  \
0 2024-01-01 00:00:00    22.490802  44.945633       378.499240              1   
1 2024-01-01 00:15:00    34.014286  43.316484       133.030398              1   
2 2024-01-01 00:30:00    29.639879  37.046157       205.987862              0   
3 2024-01-01 00:45:00    26.973170  54.290667       348.476287              1   
4 2024-01-01 01:00:00    18.120373  49.064966       266.940205              0   

   EnergyConsumption  
0          95.922748  
1          57.677237  
2          59.377569  
3         101.590360  
4          69.171702  
Train MAE: 3.1898012889522067, Train MSE: 16.27137009978264
Test MAE: 8.292881741172028, Test MSE: 109.75283077612548
Best solution: [17. 40. 54.  0.], Fitness: -18.5661293890132
