In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.metrics import mean_absolute_error

# Read the training data
train_data = pd.read_csv("C:\\Users\\apotl\\OneDrive\\Desktop\\train.csv (1)\\train.csv")

# Separate features (X) and target variable (y)
X = train_data.drop('yield', axis=1)
y = train_data['yield']

# Perform feature selection using SelectKBest and f_regression
k = 5  # Number of top features to select
selector = SelectKBest(score_func=f_regression, k=k)
X_selected = selector.fit_transform(X, y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_selected, y, test_size=0.2, random_state=42)

# Define the parameter grid for RandomizedSearchCV
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [5, 10, 15],
    'min_samples_split': [2, 5, 10]
}

# Initialize a random forest regressor model
model = RandomForestRegressor(random_state=42)

# Perform hyperparameter tuning using RandomizedSearchCV
search = RandomizedSearchCV(estimator=model, param_distributions=param_grid, n_iter=10, cv=5, scoring='neg_mean_absolute_error')
search.fit(X_train, y_train)

# Get the best model from the search
best_model = search.best_estimator_

# Evaluate the best model on the validation set
predictions = best_model.predict(X_val)
mae = mean_absolute_error(y_val, predictions)
print("Mean Absolute Error on validation set:", mae)

# Assuming you have a separate test dataset
test_data = pd.read_csv("C:\\Users\\apotl\\OneDrive\\Desktop\\test.csv\\test.csv")

# Perform feature selection on the test data
X_test_selected = selector.transform(test_data)

# Make predictions on the test data using the best model
test_predictions = best_model.predict(X_test_selected)

# Create a DataFrame with 'id' and 'yield_prediction'
output = pd.DataFrame({'id': test_data['id'], 'yield_prediction': test_predictions})

# Save the predictions to a CSV file
output.to_csv("predictions3.csv", index=False)