In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D  # For creating custom legend handles

# File path for the dataset
file_path = "/Users/toyguntez/Downloads/EU_Airbnb_Prices/airbnb_data_geo.csv"

# Load the dataset (adjust the delimiter if needed)
data = pd.read_csv(file_path, delimiter=',')
print("File loaded successfully!")

# Inspect the first few rows and column names
print("Columns in dataset:", data.columns)
print(data.head())

# Check if required columns exist
if "DIST" in data.columns and "REALSUM" in data.columns:
    # Drop missing values in the required columns
    data = data.dropna(subset=["DIST", "REALSUM"])
    print("Missing values removed from required columns.")

    # Create bins for distances
    data['DIST_BIN'] = pd.cut(
        data['DIST'],
        bins=[0, 2, 5, 10, 20, 50, 100],
        labels=["0-2 km", "2-5 km", "5-10 km", "10-20 km", "20-50 km", "50-100 km"]
    )
    print("Distance bins created.")

    # Plot histogram
    plt.figure(figsize=(14, 8))
    hist = sns.histplot(
        data=data,
        x="REALSUM", # Earnings
        hue="DIST_BIN", # Distance bins
        multiple="stack", # Stack bars for different distance ranges
        bins=30, # Number of bins
        palette="coolwarm", # Color palette
        edgecolor="black" # Edge color for bars
    )

    # Explicitly add legend manually
    legend_labels = list(data['DIST_BIN'].cat.categories)  # Convert to a list
    legend_colors = sns.color_palette("coolwarm", n_colors=len(legend_labels))  # Match colors to categories
    legend_handles = [
        Line2D([0], [0], color=legend_colors[i], lw=10) for i in range(len(legend_labels))
    ]

    plt.legend(
        handles=legend_handles,
        labels=legend_labels,
        title="Distance Range (km)",
        fontsize=12,
        title_fontsize=14,
        loc='upper right'
    )

    # Add titles and labels
    plt.title("Histogram of Earnings Grouped by Distance Ranges", fontsize=16, fontweight='bold')
    plt.xlabel("Earnings (€)", fontsize=14)
    plt.ylabel("Number of Listings", fontsize=14)
    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    plt.tight_layout()
    plt.show()
else:
    print("Required columns 'DIST' and 'REALSUM' are missing from the dataset.")


In [None]:
# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt

# Define the file path to the dataset
file_path = "/Users/toyguntez/Downloads/EU_Airbnb_Prices/airbnb_data_geo.csv"

# Load the cleaned dataset into a pandas DataFrame
data_cleaned = pd.read_csv(file_path)

# Define the locations of the 8 cities (longitude, latitude)
city_locations = {
    "Amsterdam": {"lng": 4.8952, "lat": 52.3676},
    "Paris": {"lng": 2.3522, "lat": 48.8566},
    "Barcelona": {"lng": 2.1734, "lat": 41.3851},
    "Rome": {"lng": 12.4964, "lat": 41.9028},
    "London": {"lng": -0.1276, "lat": 51.5074},
    "Brussels": {"lng": 4.3517, "lat": 50.8503},
    "Prague": {"lng": 14.4378, "lat": 50.0755},
    "Istanbul": {"lng": 28.9784, "lat": 41.0082},
}

# Assign unique colors to each city for distinction
city_colors = {
    "Amsterdam": "red",
    "Paris": "blue",
    "Barcelona": "green",
    "Rome": "purple",
    "London": "orange",
    "Brussels": "brown",
    "Prague": "pink",
    "Istanbul": "cyan",
}

# Start creating the scatter plot
plt.figure(figsize=(14, 10))

# Create the scatter plot for all listings
scatter = plt.scatter(
    data_cleaned['LNG'],  # x-axis: Longitude
    data_cleaned['LAT'],  # y-axis: Latitude
    c=data_cleaned['REALSUM'],  # Color intensity based on earnings
    cmap='viridis',  # Colormap for visual appeal
    alpha=0.8,  # Transparency level
    s=10  # Size of each point
)

# Add a colorbar
plt.colorbar(scatter, label='Earnings (€)')

# Plot and label each city with its unique color
for city, coords in city_locations.items():
    plt.scatter(
        coords["lng"], 
        coords["lat"], 
        color=city_colors[city], 
        s=100, 
        edgecolor='black', 
        label=city
    )  # City marker

# Add labels and a title
plt.title('Geographical Distribution: Longitude vs Latitude (Hue: Earnings)', fontsize=16)
plt.xlabel('Longitude', fontsize=14)
plt.ylabel('Latitude', fontsize=14)

# Add a legend to show which color corresponds to each city
plt.legend(title="Cities", fontsize=12, title_fontsize=14, loc="upper right")

# Adjust plot limits for better visualization of city labels
plt.xlim(data_cleaned['LNG'].min() - 1, data_cleaned['LNG'].max() + 1)
plt.ylim(data_cleaned['LAT'].min() - 1, data_cleaned['LAT'].max() + 1)

# Display the plot
plt.show()


In [None]:
# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt

# File path for the dataset
file_path = "/Users/toyguntez/Downloads/EU_Airbnb_Prices/airbnb_data_geo.csv"

# Load the cleaned dataset
data_cleaned = pd.read_csv(file_path)

# Define the locations of the 8 cities (longitude, latitude)
city_locations = {
    "Amsterdam": {"lng": 4.8952, "lat": 52.3676},
    "Paris": {"lng": 2.3522, "lat": 48.8566},
    "Barcelona": {"lng": 2.1734, "lat": 41.3851},
    "Rome": {"lng": 12.4964, "lat": 41.9028},
    "London": {"lng": -0.1276, "lat": 51.5074},
    "Brussels": {"lng": 4.3517, "lat": 50.8503},
    "Prague": {"lng": 14.4378, "lat": 50.0755},
    "Istanbul": {"lng": 28.9784, "lat": 41.0082},
}

# Assign unique colors to each city for distinction
city_colors = {
    "Amsterdam": "red",
    "Paris": "blue",
    "Barcelona": "green",
    "Rome": "purple",
    "London": "orange",
    "Brussels": "brown",
    "Prague": "pink",
    "Istanbul": "cyan",
}

# Start creating the scatter plot
plt.figure(figsize=(14, 10))

# Scatter plot for all listings
scatter = plt.scatter(
    data_cleaned['LNG'],  # x-axis: Longitude
    data_cleaned['LAT'],  # y-axis: Latitude
    c=data_cleaned['GUEST_SATISFACTION_OVERALL'],  # Color intensity based on Guest Satisfaction
    cmap='coolwarm',  # Colormap for visual appeal
    alpha=0.8,  # Transparency level
    s=10  # Size of each point
)

# Add a colorbar
plt.colorbar(scatter, label='Guest Satisfaction')

# Plot and label each city with its unique color
for city, coords in city_locations.items():
    plt.scatter(
        coords["lng"],
        coords["lat"],
        color=city_colors[city],
        s=100,
        edgecolor='black',
        label=city  # Add label for the legend
    )  # City marker

# Add labels and a title
plt.title('Geographical Distribution: Longitude vs Latitude (Hue: Guest Satisfaction)', fontsize=16)
plt.xlabel('Longitude', fontsize=14)
plt.ylabel('Latitude', fontsize=14)

# Add a legend to identify which color corresponds to each city
plt.legend(title="Cities", fontsize=12, title_fontsize=14, loc="upper right")

# Adjust plot limits for better visualization
plt.xlim(data_cleaned['LNG'].min() - 1, data_cleaned['LNG'].max() + 1)
plt.ylim(data_cleaned['LAT'].min() - 1, data_cleaned['LAT'].max() + 1)

# Display the plot
plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# File path for the dataset
file_path = "/Users/toyguntez/Downloads/EU_Airbnb_Prices/airbnb_data_geo.csv"

# Load the dataset
data = pd.read_csv(file_path)

# Check if the required columns exist
if "DIST" in data.columns and "GUEST_SATISFACTION_OVERALL" in data.columns and "REALSUM" in data.columns:
    # Drop missing values for the required columns
    data = data.dropna(subset=["DIST", "GUEST_SATISFACTION_OVERALL", "REALSUM"])

    # Create a figure with two scatter plots side by side
    fig, axes = plt.subplots(1, 2, figsize=(14, 6), sharey=False)

    # Scatter plot: Distance vs Guest Satisfaction
    axes[0].scatter(data["DIST"], data["GUEST_SATISFACTION_OVERALL"], alpha=0.6, color="blue")
    axes[0].set_title("Distance vs Guest Satisfaction", fontsize=14)
    axes[0].set_xlabel("Distance from Landmark (km)", fontsize=12)
    axes[0].set_ylabel("Guest Satisfaction Level", fontsize=12)
    axes[0].grid(alpha=0.4)

    # Scatter plot: Distance vs Earnings
    axes[1].scatter(data["DIST"], data["REALSUM"], alpha=0.6, color="green")
    axes[1].set_title("Distance vs Earnings", fontsize=14)
    axes[1].set_xlabel("Distance from Landmark (km)", fontsize=12)
    axes[1].set_ylabel("Earnings (€)", fontsize=12)
    axes[1].grid(alpha=0.4)

    # Adjust layout
    plt.tight_layout()
    plt.show()
else:
    print("Required columns 'DIST', 'GUEST_SATISFACTION_OVERALL', or 'REALSUM' are missing from the dataset.")
