In [1]:
# Install necessary libraries
!pip install nest_asyncio scikit-learn --upgrade

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

import os
import itertools

# Define the directory on Google Drive
base_dir = "/content/drive/MyDrive/TestPrograms"
os.makedirs(base_dir, exist_ok=True)

# Find the next available program number
existing_files = [f for f in os.listdir(base_dir) if f.startswith("program") and f.endswith(".py")]
numbers = sorted([int(f[7:-3]) for f in existing_files if f[7:-3].isdigit()])
next_number = numbers[-1] + 1 if numbers else 1

# Define hyperparameter options for KNN
hyperparams = {
    'n_neighbors': [3, 5, 7, 10],
    'weights': ['uniform', 'distance'],
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
    'leaf_size': [10, 20, 30],
    'p': [1, 2]
}

# Template for generated programs
template_code = '''
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error, r2_score

def CalculateRMSE():
    url = "https://raw.githubusercontent.com/apownukepcc/ForecastingDailyEmissions/refs/heads/main/SO2TONS_dataset.csv"
    df = pd.read_csv(url)

    df['date'] = pd.to_datetime(df['date'])
    df = df[(df['date'].dt.month >= 5) & (df['date'].dt.month <= 8) & (df['Source'] == 'LAKE-1')]
    df = df[['tavg', 'tmin', 'tmax', 'prcp', 'snow', 'wdir', 'wspd', 'pres', 'Emissions_Load']].dropna()

    X = df.drop('Emissions_Load', axis=1)
    y = df['Emissions_Load']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model = KNeighborsRegressor({params})
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)

    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)

    print("RMSE:", rmse)
    print("R² score:", r2)

    return rmse

if __name__ == "__main__":
    CalculateRMSE()
'''

# Generate combinations
param_names = list(hyperparams.keys())
combinations = list(itertools.product(*hyperparams.values()))

# Generate program files
for i, combo in enumerate(combinations, next_number):
    params_str = ', '.join(f"{name}={repr(value)}" for name, value in zip(param_names, combo))
    code = template_code.format(params=params_str)

    file_name = os.path.join(base_dir, f"program{i}.py")
    with open(file_name, 'w', encoding='utf-8') as file:
        file.write(code)

    print(f"Generated {file_name}")

print(f"\nGenerated {len(combinations)} new programs starting from program{next_number}.py")


Mounted at /content/drive
Generated /content/drive/MyDrive/TestPrograms/program109.py
Generated /content/drive/MyDrive/TestPrograms/program110.py
Generated /content/drive/MyDrive/TestPrograms/program111.py
Generated /content/drive/MyDrive/TestPrograms/program112.py
Generated /content/drive/MyDrive/TestPrograms/program113.py
Generated /content/drive/MyDrive/TestPrograms/program114.py
Generated /content/drive/MyDrive/TestPrograms/program115.py
Generated /content/drive/MyDrive/TestPrograms/program116.py
Generated /content/drive/MyDrive/TestPrograms/program117.py
Generated /content/drive/MyDrive/TestPrograms/program118.py
Generated /content/drive/MyDrive/TestPrograms/program119.py
Generated /content/drive/MyDrive/TestPrograms/program120.py
Generated /content/drive/MyDrive/TestPrograms/program121.py
Generated /content/drive/MyDrive/TestPrograms/program122.py
Generated /content/drive/MyDrive/TestPrograms/program123.py
Generated /content/drive/MyDrive/TestPrograms/program124.py
Generated /con