In [39]:
import dash
import dash_bootstrap_components as dbc
from dash import html, dcc
import sqlite3
import pandas as pd
import plotly.express as px
import numpy as np
import plotly.graph_objects as go
import scipy.stats as stats
import plotly.express as px

_conn = sqlite3.connect("airbnb_cartagena.sqlite")
df_attr = pd.read_sql_query("SELECT * FROM Attributes", _conn, dtype={"ID": str})
df_ts = pd.read_sql_query("SELECT * FROM TimeSeriesRaw", _conn, dtype={"ID": str})
df_ts_interp = pd.read_sql_query("SELECT * FROM TimeSeriesInterpolated", _conn, dtype={"ID": str})
_conn.close()

In [None]:
red = "#7e0d24"  # dark red color for plots

In [3]:
import re
dates = [col for col in df_ts.columns if re.fullmatch(r"\d{1,2}/\d{1,2}/\d{4}", col)]

In [118]:
def price_volatility_heatmap():
    """
    Build a heatmap of (price - mean_price)/mean_price for each listing/date.
    X = dates, Y = listing IDs.
    """
    # Calculate deviation from mean price
    mat = df_ts_interp.set_index("ID")[dates].astype(float)
    row_means = mat.mean(axis=1)
    dev = (mat.sub(row_means, axis=0)).div(row_means, axis=0)
    dev = dev[~dev.isna().any(axis=1)]
    dev = dev.loc[ dev.abs().max(axis=1).sort_values(ascending=True).index]
    date_index = pd.to_datetime(dates, dayfirst=True, format="%d/%m/%Y")
    ids = dev.index.astype(str)
    
    # Create figure
    fig = go.Figure(
        go.Heatmap(
            z=dev.values,
            x=date_index,
            y=ids,
            colorscale='oxy',
            zmid=0,
            colorbar=dict(title="Volatility"),
            hovertemplate="Listing: %{y}<br>Date: %{x|%Y-%m-%d}<br>Dev: %{z:.2f}<extra></extra>"
        )
    )
    fig.update_layout(
        title="Price Volatility (Deviation from Listing Mean)",
        xaxis_title="Date",
        yaxis_title="Listing ID",
        template="plotly_dark",
        height=700
    )
    return fig

price_volatility_heatmap()

In [None]:
def interpolation_adjustment_histogram():
    """
    Histogram of (interpolated - raw) for only those cells that were imputed.
    """
    # Calculate differences where raw data is NaN and interp is not NaN
    raw = df_ts.set_index("ID")[dates].astype(float)
    interp = df_ts_interp.set_index("ID")[dates].astype(float)
    mask = raw.isna() & interp.notna()
    diffs = (interp - raw)[mask].stack().reset_index()
    diffs.columns = ["ID", "Date", "Diff"]
    
    # Create histogram
    fig = px.histogram(
        diffs,
        x="Diff",
        nbins=50,
        title="Distribution of Interpolation Adjustments (interp − raw)",
        labels={"Diff": "Price Difference (USD)"},
        template="plotly_dark"
    )
    return fig

interpolation_adjustment_histogram()