In [1]:
import pandas as pd
import numpy as np
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

In [2]:
train_data = pd.read_csv('Train.csv')
test_data = pd.read_csv('Test.csv')

In [3]:
for data in [train_data, test_data]:
    data['date'] = pd.to_datetime(data['date'])
    data['year'] = data['date'].dt.year
    data['month'] = data['date'].dt.month
    data['day'] = data['date'].dt.day

In [4]:
drop_columns = ['id', 'site_id', 'date', 'city', 'country'] 

In [5]:
for data in [train_data, test_data]:
    for col in data.select_dtypes(include=np.number).columns:
        if data[col].isnull().any():
            data[col].fillna(data[col].median(), inplace=True)

In [6]:
X_train = train_data.drop(columns=drop_columns + ['pm2_5'])
y_train = train_data['pm2_5']

In [7]:
X_test = test_data.drop(columns=drop_columns)
ids_test = test_data['id'] 

In [8]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [9]:
from sklearn.model_selection import GridSearchCV

# Define the parameter grid
param_grid = {
    'C': [0.1, 1, 10],
    'epsilon': [0.01, 0.1, 1],
    'gamma': ['scale', 'auto', 0.1, 1]
}

# Create the GridSearchCV object
grid_search = GridSearchCV(SVR(kernel='rbf'), param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)

# Fit the GridSearchCV object on the scaled training data
grid_search.fit(X_train_scaled, y_train)

# Get the best parameters
best_params = grid_search.best_params_
print("Best Parameters:", best_params)

# Create and fit the SVR model with the best parameters
svr_best = SVR(kernel='rbf', C=best_params['C'], epsilon=best_params['epsilon'], gamma=best_params['gamma'])
svr_best.fit(X_train_scaled, y_train)

# Predict the PM2.5 values for the test set
predictions = svr_best.predict(X_test_scaled)

Best Parameters: {'C': 10, 'epsilon': 1, 'gamma': 'auto'}


In [18]:
# Create a DataFrame for the predictions
predictions_df = pd.DataFrame({
    'id': ids_test,
    'pm2_5': predictions
})

# Save the predictions to a CSV file
predictions_df.to_csv('test_predictions_svr_optimized.csv', index=False)