In [3]:
def get_views(df_housing):
    # Filter properties with waterfront and scenic views
    views_water = df_housing.query('waterfront == 1 & view > 0')
    views_no_water = df_housing.query('waterfront == 0 & view > 0')
    
    # Calculate average prices
    avg_price_water = views_water['price'].median()
    avg_price_no_water = views_no_water['price'].median()
    
    # Print the results (display only)
    print(f'The average price for a scenic property with a waterfront view is ${avg_price_water:.2f}')
    print(f'The average price for a scenic property without a waterfront view is ${avg_price_no_water:.2f}')
    
    # Return only the objects for further use
    return avg_price_water, avg_price_no_water, views_water, views_no_water



In [6]:
def plot_the_views(df_housing, views_water, views_no_water):
    # Add columns for views with and without water
    df_housing['views_water'] = views_water['view']
    df_housing['views_no_water'] = views_no_water['view']
    
    # Melt the DataFrame for Seaborn compatibility
    df_long = df_housing.melt(
        id_vars=['price'], 
        value_vars=['views_water', 'views_no_water'], 
        var_name='Categories', 
        value_name='Values'
    )
    
    # Create the boxplot
    sns.boxplot(x='Values', y='price', hue='Categories', data=df_long)
    plt.title('Price by View Category')
    plt.show()

In [8]:
def housing_6_lower(df_housing):
    # Calculate the mean price
    mean_price = df_housing['price'].mean()

    # Filter the DataFrame to include only rows where price is less than or equal to the mean
    lower_half_df = df_housing[df_housing['price'] <= mean_price]

    # Total number of entries in the filtered DataFrame
    total_lower_half = len(lower_half_df)

    # Number of grade 6 entries in the filtered DataFrame
    grade_6_count = (lower_half_df['grade'] == 6).sum()

    # Calculate percentage
    grade_6_percentage = (grade_6_count / total_lower_half * 100) if total_lower_half > 0 else 0

    # Print the result
    print(f"Percentage of grade 6 entries in the lower half of prices: {grade_6_percentage:.2f}%")
    
    # Return the percentage
    return grade_6_percentage, lower_half_df, total_lower_half


In [None]:
def hist_grades(lower_half_df):
    # Generate a histogram showing the frequency of grades
    sns.histplot(
        data=lower_half_df, 
        x='grade', 
        hue='grade', 
        multiple='dodge', 
        bins=len(lower_half_df['grade'].unique()), 
        shrink=0.8  # Adjust shrink for better bar spacing
    )

    # Customize the plot
    plt.title("Frequency of Grades")
    plt.xlabel("Grade")
    plt.ylabel("Frequency")
    plt.show()

In [None]:
def plot_bathrooms_boxplot(df_housing):
    # Drop rows with missing waterfront values
    df_cleaned = df_housing.dropna(subset=['waterfront'])

    # Create the boxplot
    sns.boxplot(x='waterfront', y='bathrooms', palette='Accent_r', data=df_cleaned)

    # Customize the plot
    plt.title('Comparison of Bathrooms in Waterfront vs Non-Waterfront Properties')
    plt.xlabel('Waterfront Status (0 = Non-waterfront, 1 = Waterfront)')
    plt.ylabel('Number of Bathrooms')

    # Display the plot
    plt.show()