In [67]:
import os
import pandas as pd
import matplotlib.pyplot as plt

In [68]:
long_apart_df = pd.read_csv('data/long_term_housing.csv')
long_room_df = pd.read_csv('data/apartments_detail.csv')
hotel_df = pd.read_csv('data/hotel.csv')
airbnb_df = pd.read_csv('data/airbnb.csv')

In [69]:
static_folder = 'static'
if not os.path.exists(static_folder):
    os.makedirs(static_folder)

In [70]:
# In airbnb_df, turn Price from $1,000 to 1000
airbnb_df['Price'] = airbnb_df['Price'].str.replace('$', '').str.replace(',', '').astype(float)

In [80]:
# Visualize the price distribution of Airbnb, save as airbnb_price.png in static folder
plt.figure(figsize=(6, 5))
plt.hist(airbnb_df['Price'], bins=30, edgecolor='black')
plt.xlabel('Price($/night)')
plt.ylabel('Frequency')

plt.savefig(os.path.join(static_folder, 'airbnb_price.png'), dpi=500)
plt.close()

In [72]:
# In hotel_df, turn Price from $1,000 to 1000
hotel_df['Price'] = hotel_df['Price'].str.replace('$', '').str.replace(',', '').astype(float)

In [78]:
# Visualize the price distribution of Hotel, save as hotel_price.png in static folder
plt.figure(figsize=(6, 5))
plt.hist(hotel_df['Price'], bins=30, edgecolor='black')
plt.xlabel('Price($/night)')
plt.ylabel('Frequency')

plt.savefig(os.path.join(static_folder, 'hotel_price.png'), dpi=500)
plt.close()

In [74]:
long_room_df_copy = long_room_df.copy(deep=True)

# Remove rows where 'Unit Price' contains 'Call for Rent' or 'Person'
long_room_df_copy = long_room_df_copy[~long_room_df_copy['Unit Price'].str.contains('Call for Rent|Person', regex=True)]

# Turn price from "$1,395" to 1395
long_room_df_copy['Unit Price'] = long_room_df_copy['Unit Price'].str.replace(r'[\$,]', '', regex=True)

# For ranges (e.g., '$2,449 – $3,375'), calculate the average
def calculate_average(price_range):
    if '–' in price_range:
        values = price_range.split('–')
        values = [float(value.strip()) for value in values]
        return sum(values) / len(values)
    return float(price_range)

# Apply the average calculation function to the 'Unit Price' column
long_room_df_copy['Unit Price'] = long_room_df_copy['Unit Price'].apply(calculate_average)

In [75]:
# Adjust the 'Unit Price' based on the 'Room Type' column
def adjust_price_based_on_room_type(row):
    room_type = row['Room Type'].lower()  # Convert to lowercase for case-insensitive comparison
    unit_price = row['Unit Price']
    
    if '2 bed' in room_type:
        return unit_price / 2
    elif '3 bed' in room_type:
        return unit_price / 3
    elif '4 bed' in room_type:
        return unit_price / 4
    else:
        return unit_price

# Apply the adjustment function to the 'Unit Price' column
long_room_df_copy['Unit Price'] = long_room_df_copy.apply(adjust_price_based_on_room_type, axis=1)

# Turn to int
long_room_df_copy['Unit Price'] = long_room_df_copy['Unit Price'].astype(int)

0       1395
1       1415
2       1440
3       1450
4       1500
        ... 
1626     460
1627     645
1628     695
1629     475
1630     441
Name: Unit Price, Length: 1541, dtype: int32

In [79]:
# Visualize the price distribution of Long Term Room, save as long_price.png in static folder
plt.figure(figsize=(6, 5))
plt.hist(long_room_df_copy['Unit Price'], bins=30, edgecolor='black')
plt.xlabel('Price($/month)')
plt.ylabel('Frequency')

plt.savefig(os.path.join(static_folder, 'long_price.png'), dpi=500)
plt.close()