# opt- KKN-CASINJ


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.metrics import mean_absolute_percentage_error

data = pd.read_csv('datawithTime.csv', nrows=int(1 * len(pd.read_csv('datawithTime.csv'))))

target_col = 'CASINJ'

all_feature_cols = ['RAILROAD', 'YEAR', 'MONTH', 'DAY', 'CARS', 'STATE ', 'TEMP', 'VISIBLTY', 'WEATHER',
                    'TRNSPD', 'TONS', 'TYPEQ', 'TRKCLAS', 'TYPTRK', 'POSITON1',
                    'HEADEND1', 'LOADF1', 'EMPTYF1', 'CAUSE', 'ACCTRK',
                    'HIGHSPD', 'hour', 'minute']

X_train, X_test, y_train, y_test = train_test_split(data[all_feature_cols], data[target_col],
                                                    test_size=0.3, random_state=42)

param_grid = {
    'n_neighbors': [3, 5, 7, 9],
    'weights': ['uniform', 'distance'],
    'p': [1, 2]
}

knn_regressor = KNeighborsRegressor()

grid_search = GridSearchCV(knn_regressor, param_grid, scoring='neg_mean_absolute_error', cv=5)

grid_search.fit(X_train, y_train)

best_knn_regressor = grid_search.best_estimator_

y_pred = best_knn_regressor.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mape = mean_absolute_percentage_error(y_test, y_pred) * 100
percent_within_5 = np.sum(np.abs(y_test - y_pred) <= 0.05 * y_test.size) / y_test.size * 100

print("Best Parameters:", grid_search.best_params_)
print("Mean Squared Error:", mse)
print("Root Mean Squared Error:", rmse)
print("Mean Absolute Error:", mae)
print("R-squared:", r2)
print("Mean Absolute Percentage Error:", mape)
print("Percent Within 5%:", percent_within_5)
