# imports

In [54]:
import pandas as pd
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_absolute_error

# Load datasets

In [55]:

train_data = pd.read_csv('H1.csv')
test_data = pd.read_csv('H2.csv')

In [56]:
print(train_data.head())

   IsCanceled  LeadTime  ArrivalDateYear ArrivalDateMonth  \
0           0       342             2015             July   
1           0       737             2015             July   
2           0         7             2015             July   
3           0        13             2015             July   
4           0        14             2015             July   

   ArrivalDateWeekNumber  ArrivalDateDayOfMonth  StaysInWeekendNights  \
0                     27                      1                     0   
1                     27                      1                     0   
2                     27                      1                     0   
3                     27                      1                     0   
4                     27                      1                     0   

   StaysInWeekNights  Adults  Children  ...  ReservedRoomType  \
0                  0       2         0  ...  C                  
1                  0       2         0  ...  C                  

# Extract features and target variable

In [57]:

X_train = train_data.drop('ADR', axis=1)
y_train = train_data['ADR']
X_test = test_data.drop('ADR', axis=1)
y_test = test_data['ADR']

# Encode categorical variables

In [58]:
label_encoder = LabelEncoder()
combined_data = pd.concat([X_train, X_test], axis=0)
# Convert mixed data columns to strings
mixed_data_cols = ['ArrivalDateYear', 'ArrivalDateMonth','Meal', 'Country', 'MarketSegment', 'DistributionChannel', 'ReservedRoomType', 'AssignedRoomType', 'DepositType', 'CustomerType', 'ReservationStatus']
combined_data[mixed_data_cols] = combined_data[mixed_data_cols].astype(str)

# Apply label encoding to each categorical column
for col in mixed_data_cols:
    combined_data[col] = label_encoder.fit_transform(combined_data[col])

# Split the combined data back into train and test datasets
X_train = combined_data[:len(X_train)]
X_test = combined_data[len(X_train):]

print(X_train.head(2))

   IsCanceled  LeadTime  ArrivalDateYear  ArrivalDateMonth  \
0           0       342                0                 5   
1           0       737                0                 5   

   ArrivalDateWeekNumber  ArrivalDateDayOfMonth  StaysInWeekendNights  \
0                     27                      1                     0   
1                     27                      1                     0   

   StaysInWeekNights  Adults  Children  ...  PreviousBookingsNotCanceled  \
0                  0       2       0.0  ...                            0   
1                  0       2       0.0  ...                            0   

   ReservedRoomType  AssignedRoomType  BookingChanges  DepositType  \
0                 2                 2               3            0   
1                 2                 2               4            0   

   DaysInWaitingList  CustomerType  RequiredCarParkingSpaces  \
0                  0             2                         0   
1                  0     


# Train SVR model

In [59]:

svr = SVR()
svr.fit(X_train, y_train)

# Make predictions on the testing data
y_pred = svr.predict(X_test)

print("y_pred" ,y_pred)

# Calculate the difference between actual and predicted values
difference = y_test - y_pred

# Save the results to a CSV file
results_df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred, 'Difference': difference})
results_df.to_csv('prediction_results.csv', index=False)