In [None]:
import pandas as pd

# Load the original dataset
df = pd.read_csv('converted.csv')

# Select the rows based on index or any other condition
selected_rows = df.iloc[[91, 33, 27, 56]]  

# Display the selected rows
print(selected_rows)

   host_is_superhost          cancellation_policy instant_bookable  \
91                 f                     flexible                t   
33                 t  strict_14_with_grace_period                f   
27                 t  strict_14_with_grace_period                f   
56                 f                     moderate                f   

    host_total_listings_count neighbourhood_cleansed  latitude  longitude  \
91                       20.0     Financial District  37.79447 -122.40450   
33                        3.0       Western Addition  37.78895 -122.43031   
27                        2.0           Potrero Hill  37.75645 -122.39910   
56                        4.0    Castro/Upper Market  37.76068 -122.43331   

   property_type        room_type  accommodates  ...  bedrooms_na  \
91        Hostel      Shared room           1.0  ...          0.0   
33         House     Private room           2.0  ...          0.0   
27     Apartment     Private room           2.0  ...    

In [44]:
import numpy as np

# One-hot encode the 'room_type' to ensure consistency with the training data
selected_rows = pd.get_dummies(selected_rows, columns=['room_type'], drop_first=True)

# Log transform the 'price'
selected_rows['log_price'] = np.log(selected_rows['price'])

# Ensure the necessary columns are present, creating them if they are missing
required_columns = ['room_type_Private room', 'room_type_Shared room']

for col in required_columns:
    if col not in selected_rows.columns:
        selected_rows[col] = 0

# Ensure columns are in the correct order
selected_rows = selected_rows[['bedrooms', 'accommodates', 'review_scores_rating', 
                               'room_type_Private room', 'bathrooms', 'room_type_Shared room', 'log_price']]

# Display the preprocessed data
print(selected_rows)

    bedrooms  accommodates  review_scores_rating  room_type_Private room  \
91       1.0           1.0                  92.0                       0   
33       1.0           2.0                  92.0                       1   
27       1.0           2.0                  98.0                       1   
56       0.0           2.0                  94.0                       0   

    bathrooms  room_type_Shared room  log_price  
91        1.0                      1   3.806662  
33        1.5                      0   4.700480  
27        1.0                      0   5.043425  
56        1.0                      0   4.753590  


In [45]:
import joblib

final_gb = joblib.load('final_gradient_boosting_model.pkl')

# Predict the log-transformed prices
X_test_cases = selected_rows.drop(columns=['log_price'])
predicted_log_prices = final_gb.predict(X_test_cases)

# Convert the predicted log prices back to actual prices
predicted_prices = np.exp(predicted_log_prices)

# Compare the predicted prices with the actual prices
comparison = pd.DataFrame({
    'Actual Log Price': selected_rows['log_price'],
    'Predicted Log Price': predicted_log_prices,
    'Actual Price': np.exp(selected_rows['log_price']),
    'Predicted Price': predicted_prices,
    'Difference (Price Scale)': np.exp(selected_rows['log_price']) - predicted_prices
})

# Display the comparison
print(comparison)

    Actual Log Price  Predicted Log Price  Actual Price  Predicted Price  \
91          3.806662             3.911337          45.0        49.965711   
33          4.700480             4.482176         110.0        88.426899   
27          5.043425             4.752407         155.0       115.862809   
56          4.753590             4.787325         116.0       119.980046   

    Difference (Price Scale)  
91                 -4.965711  
33                 21.573101  
27                 39.137191  
56                 -3.980046  


In [30]:
import pickle
import numpy as np
import pandas as pd

# Load the trained Gradient Boosting model using pickle
with open('final_gradient_boosting_model_pickle2.pkl', 'rb') as file:
    final_gb = pickle.load(file)

# Ensure that `final_gb` is indeed a model and not just an array
if hasattr(final_gb, 'predict'):
    # Predict the log-transformed prices
    X_test_cases = selected_rows.drop(columns=['log_price'])
    predicted_log_prices = final_gb.predict(X_test_cases)

    # Convert the predicted log prices back to actual prices
    predicted_prices = np.exp(predicted_log_prices)

    # Compare the predicted prices with the actual prices
    comparison = pd.DataFrame({
        'Actual Log Price': selected_rows['log_price'],
        'Predicted Log Price': predicted_log_prices,
        'Actual Price': np.exp(selected_rows['log_price']),
        'Predicted Price': predicted_prices,
        'Difference (Price Scale)': np.exp(selected_rows['log_price']) - predicted_prices
    })

    # Display the comparison
    print(comparison)
else:
    print("Loaded object is not a model.")


    Actual Log Price  Predicted Log Price  Actual Price  Predicted Price  \
91          3.806662             3.911337          45.0        49.965711   
93          4.382027             4.793975          80.0       120.780500   
98          4.787492             5.121646         120.0       167.610989   
99          3.688879             4.435821          40.0        84.421421   

    Difference (Price Scale)  
91                 -4.965711  
93                -40.780500  
98                -47.610989  
99                -44.421421  
