# Opt-RandomForest-CARSDMG


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.metrics import mean_absolute_percentage_error

data = pd.read_csv('datawithTime.csv', nrows=int(1 * len(pd.read_csv('datawithTime.csv'))))

target_col = 'CARSDMG'

all_feature_cols = ['RAILROAD', 'YEAR', 'MONTH', 'DAY', 'CARS', 'STATE ', 'TEMP', 'VISIBLTY', 'WEATHER',
                    'TRNSPD', 'TONS', 'TYPEQ', 'TRKCLAS', 'TYPTRK', 'POSITON1',
                    'HEADEND1', 'LOADF1', 'EMPTYF1', 'CAUSE', 'ACCTRK',
                    'HIGHSPD', 'hour', 'minute']

X_train, X_test, y_train, y_test = train_test_split(data[all_feature_cols], data[target_col],
                                                    test_size=0.3, random_state=42)

param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

rf_regressor = RandomForestRegressor(random_state=42)

grid_search = GridSearchCV(rf_regressor, param_grid, scoring='neg_mean_absolute_error', cv=5)

grid_search.fit(X_train, y_train)

best_rf_regressor = grid_search.best_estimator_

y_pred = best_rf_regressor.predict(X_test)

mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)
mape = mean_absolute_percentage_error(y_test, y_pred) * 100

print("Best Parameters:", grid_search.best_params_)
print("Mean Absolute Error:", mae)
print("Mean Squared Error:", mse)
print("Root Mean Squared Error:", rmse)
print("R-squared:", r2)
print("Mean Absolute Percentage Error:", mape)
