# Setup and Imports

In [36]:
import pandas as pd
import psycopg2
from psycopg2 import sql
from dotenv import load_dotenv
import os
from typing import Tuple, Optional
import warnings
warnings.filterwarnings('ignore')

# Load environment variables
load_dotenv()

print("‚úì Libraries imported successfully")

‚úì Libraries imported successfully


# Database Connection

In [37]:
def create_connection() -> Optional[psycopg2.extensions.connection]:
    """
    Create and return a PostgreSQL database connection.
    
    Uses environment variables for connection parameters:
    - DB_HOST: Database host (default: localhost)
    - DB_PORT: Database port (default: 5432)
    - DB_NAME: Database name (default: airbnb_dimensional)
    - DB_USER: Database user (default: postgres)
    - DB_PASSWORD: Database password
    
    Returns
    -------
    psycopg2.connection or None
        Database connection object if successful, None otherwise
    
    Example
    -------
    >>> conn = create_connection()
    >>> if conn:
    ...     print("Connected!")
    """
    try:
        conn = psycopg2.connect(
            host=os.getenv('DB_HOST', 'localhost'),
            port=os.getenv('DB_PORT', '5432'),
            database=os.getenv('DB_NAME', 'airbnb_dimensional'),
            user=os.getenv('DB_USER', 'postgres'),
            password=os.getenv('DB_PASSWORD')
        )
        print(f"‚úì Connected to database: {os.getenv('DB_NAME', 'airbnb_dimensional')}")
        return conn
    except Exception as e:
        print(f"‚úó Connection failed: {e}")
        return None

# Create connection
conn = create_connection()

‚úì Connected to database: airbnb_dimensional


# Helper Functions

In [38]:
def get_table_info(conn: psycopg2.extensions.connection, table_name: str) -> Tuple[pd.DataFrame, int, pd.DataFrame]:
    """
    Retrieve comprehensive information about a database table.
    
    Parameters
    ----------
    conn : psycopg2.connection
        Active database connection
    table_name : str
        Name of the table to query
    
    Returns
    -------
    tuple of (DataFrame, int, DataFrame)
        - Column metadata (name, type, nullable, etc.)
        - Total row count
        - Sample data (first 10 rows)
    
    Example
    -------
    >>> columns, count, sample = get_table_info(conn, 'dim_host')
    >>> print(f"Table has {count} rows and {len(columns)} columns")
    """
    try:
        # Get column information
        column_query = """
        SELECT 
            column_name,
            data_type,
            is_nullable,
            column_default
        FROM information_schema.columns
        WHERE table_name = %s
        ORDER BY ordinal_position;
        """
        columns_df = pd.read_sql_query(column_query, conn, params=(table_name,))
        
        # Get row count
        count_query = f"SELECT COUNT(*) as count FROM {table_name};"
        count_df = pd.read_sql_query(count_query, conn)
        row_count = count_df['count'].iloc[0]
        
        # Get sample data
        sample_query = f"SELECT * FROM {table_name};"
        sample_df = pd.read_sql_query(sample_query, conn)
        
        return columns_df, row_count, sample_df
    
    except Exception as e:
        print(f"‚úó Error querying {table_name}: {e}")
        return None, 0, None


def display_table_summary(conn: psycopg2.extensions.connection, table_name: str, description: str = "") -> None:
    """
    Display a comprehensive summary of a database table.
    
    Shows column structure, row count, and sample data in a formatted output.
    
    Parameters
    ----------
    conn : psycopg2.connection
        Active database connection
    table_name : str
        Name of the table to display
    description : str, optional
        Brief description of the table's purpose
    
    Returns
    -------
    None
        Displays output directly
    
    Example
    -------
    >>> display_table_summary(conn, 'dim_host', 'Host quality metrics')
    """
    print(f"\n{'='*80}")
    print(f"TABLE: {table_name}")
    if description:
        print(f"Description: {description}")
    print(f"{'='*80}\n")
    
    columns_df, row_count, sample_df = get_table_info(conn, table_name)
    
    if columns_df is not None:
        print(f"üìä Row Count: {row_count:,}\n")
        print(f"üìã Column Structure ({len(columns_df)} columns):\n")
        print(columns_df.to_string(index=False))
        print(f"\nüìÑ Sample Data (all rows):\n")
        if sample_df is not None and len(sample_df) > 0:
            display(sample_df)
        else:
            print("‚ö†Ô∏è  No data available")
    else:
        print(f"‚ö†Ô∏è  Could not retrieve table information")

print("‚úì Helper functions defined")

‚úì Helper functions defined


---
# DIMENSION TABLES

Dimension tables provide descriptive attributes for analysis:
- **dim_host**: Host quality and reputation metrics
- **dim_property**: Physical property characteristics
- **dim_location**: Geographic positioning and clustering
- **dim_category_ratings**: Guest experience quality metrics
- **dim_date**: Time intelligence for temporal analysis

## 4. Dimension Table: dim_host

Host dimension containing quality and reputation metrics for property managers.

In [39]:
if conn:
    display_table_summary(
        conn, 
        'dim_host',
        'Host quality and reputation metrics (superhost status, ratings, experience)'
    )
    
    # Load full table for analysis
    dim_host = pd.read_sql_query("SELECT * FROM dim_host;", conn)
    print(f"\n‚úì Loaded {len(dim_host)} host records into DataFrame 'dim_host'")


TABLE: dim_host
Description: Host quality and reputation metrics (superhost status, ratings, experience)

üìä Row Count: 65

üìã Column Structure (12 columns):

           column_name                   data_type is_nullable                             column_default
              host_key                     integer          NO nextval('dim_host_host_key_seq'::regclass)
               host_id                        text          NO                                       None
             host_name                        text         YES                                       None
           host_rating                     numeric         YES                                       None
host_number_of_reviews                     integer         YES                                          0
    host_response_rate                     integer         YES                                       None
    host_years_hosting                     integer         YES                                

Unnamed: 0,host_key,host_id,host_name,host_rating,host_number_of_reviews,host_response_rate,host_years_hosting,is_superhost,host_tier,experience_level,created_at,updated_at
0,1,276673304,Marc & Elizabeth,5.00,5.0,100.0,2.0,False,Premium,New,2025-11-13 16:01:11.990990,2025-11-13 16:01:11.990990
1,2,48803539,Beth - Your Key Rental Management,4.85,9267.0,100.0,10.0,True,Elite,Expert,2025-11-13 16:01:11.990990,2025-11-13 16:01:11.990990
2,3,508453135,Chloe And Jason,4.83,1060.0,100.0,3.0,True,Elite,Experienced,2025-11-13 16:01:11.990990,2025-11-13 16:01:11.990990
3,4,151562562,Aaron,4.76,1171.0,100.0,8.0,False,Premium,Expert,2025-11-13 16:01:11.990990,2025-11-13 16:01:11.990990
4,5,413977704,Kyle,4.87,5491.0,99.0,4.0,True,Elite,Experienced,2025-11-13 16:01:11.990990,2025-11-13 16:01:11.990990
...,...,...,...,...,...,...,...,...,...,...,...,...
60,61,521014624,Cody,4.88,34.0,100.0,2.0,False,Premium,New,2025-11-13 16:01:11.990990,2025-11-13 16:01:11.990990
61,62,511591130,Amanda,4.85,142.0,100.0,2.0,True,Elite,New,2025-11-13 16:01:11.990990,2025-11-13 16:01:11.990990
62,63,517561329,Sarah,4.50,4.0,100.0,2.0,False,Standard,New,2025-11-13 16:01:11.990990,2025-11-13 16:01:11.990990
63,64,50018005,Virginia,,,,,False,Standard,New,2025-11-13 16:01:11.990990,2025-11-13 16:01:11.990990



‚úì Loaded 65 host records into DataFrame 'dim_host'


## 5. Dimension Table: dim_property

Property dimension containing physical characteristics and capacity information.

In [40]:
if conn:
    display_table_summary(
        conn, 
        'dim_property',
        'Physical property characteristics (bedrooms, beds, baths, guest capacity)'
    )
    
    # Load full table for analysis
    dim_property = pd.read_sql_query("SELECT * FROM dim_property;", conn)
    print(f"\n‚úì Loaded {len(dim_property)} property records into DataFrame 'dim_property'")


TABLE: dim_property
Description: Physical property characteristics (bedrooms, beds, baths, guest capacity)

üìä Row Count: 100

üìã Column Structure (16 columns):

            column_name                   data_type is_nullable                                     column_default
           property_key                     integer          NO nextval('dim_property_property_key_seq'::regclass)
            property_id                        text          NO                                               None
           listing_name                        text         YES                                               None
          listing_title                        text         YES                                               None
               category                        text         YES                                               None
        guests_capacity                     integer         YES                                               None
               bedrooms     

Unnamed: 0,property_key,property_id,listing_name,listing_title,category,guests_capacity,bedrooms,beds,baths,pets_allowed,is_guest_favorite,property_size_tier,guest_per_bedroom_ratio,bath_to_bedroom_ratio,created_at,updated_at
0,1,1426378005713860735,"Entire condo in Calgary, Canada","Modern 1br Apt, 17th Ave, Downtown & Stampede",Stays,4,1.0,2,1.0,False,False,Small,4.0,1.0,2025-11-13 16:01:12.001663,2025-11-13 16:01:12.001663
1,2,779862525321826168,"Entire rental unit in Calgary, Canada","Heart of the City - 2BR, Prk, AC, Mountain Views",Stays,6,2.0,3,1.0,False,True,Medium,3.0,0.5,2025-11-13 16:01:12.001663,2025-11-13 16:01:12.001663
2,3,1375556219860316591,"Entire rental unit in Calgary, Canada",SUB-PENT House Beltline Oasis 2BR with Panoram...,Stays,4,2.0,2,1.0,False,True,Medium,2.0,0.5,2025-11-13 16:01:12.001663,2025-11-13 16:01:12.001663
3,4,1404688484861443653,"Entire condo in Calgary, Canada","Fabulous 2BR w AC, Parking near BMO, Stampede!",Stays,4,2.0,1,1.0,False,True,Medium,2.0,0.5,2025-11-13 16:01:12.001663,2025-11-13 16:01:12.001663
4,5,21869477,"Entire rental unit in Calgary, Canada",Rare! South Facing Boutique 1Bdrm in Mission!,Stays,2,1.0,1,1.0,True,True,Small,2.0,1.0,2025-11-13 16:01:12.001663,2025-11-13 16:01:12.001663
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,96,1385246479518368787,"Entire rental unit in Calgary, Canada",beautiful 1 bed 1 den,Stays,5,,1,0.0,False,False,Studio,,,2025-11-13 16:01:12.001663,2025-11-13 16:01:12.001663
96,97,903645870803612120,"Entire rental unit in Calgary, Canada",Open concept studio with style,Stays,2,,1,1.0,False,False,Studio,,,2025-11-13 16:01:12.001663,2025-11-13 16:01:12.001663
97,98,53541236,"Entire condo in Calgary, Canada","Spotless, steps to top restaurants + free park...",Stays,2,1.0,1,1.0,False,False,Small,2.0,1.0,2025-11-13 16:01:12.001663,2025-11-13 16:01:12.001663
98,99,927801207937272334,"Entire rental unit in Calgary, Canada",Tranquil Coastal Colour Inspired 2Bdrm w Parking,Stays,4,2.0,2,2.0,False,True,Medium,2.0,1.0,2025-11-13 16:01:12.001663,2025-11-13 16:01:12.001663



‚úì Loaded 100 property records into DataFrame 'dim_property'


## 6. Dimension Table: dim_location

Location dimension with geographic coordinates, clustering, and neighborhood information.

In [41]:
if conn:
    display_table_summary(
        conn, 
        'dim_location',
        'Geographic positioning, location clusters, and distance metrics'
    )
    
    # Load full table for analysis
    dim_location = pd.read_sql_query("SELECT * FROM dim_location;", conn)
    print(f"\n‚úì Loaded {len(dim_location)} location records into DataFrame 'dim_location'")


TABLE: dim_location
Description: Geographic positioning, location clusters, and distance metrics

üìä Row Count: 86

üìã Column Structure (12 columns):

            column_name                   data_type is_nullable                                     column_default
           location_key                     integer          NO nextval('dim_location_location_key_seq'::regclass)
                   city                        text         YES                                               None
               province                        text         YES                                               None
                country                        text         YES                                               None
               latitude                     numeric         YES                                               None
              longitude                     numeric         YES                                               None
           neighborhood                

Unnamed: 0,location_key,city,province,country,latitude,longitude,neighborhood,location_cluster_id,distance_to_downtown_km,location_tier,created_at,updated_at
0,1,Calgary,Alberta,Canada,51.04291,-114.07589,,7,0.34,Urban Core,2025-11-13 16:01:13.464632,2025-11-13 16:01:13.464632
1,2,Calgary,Alberta,Canada,51.03126,-114.07150,,2,1.49,Downtown Adjacent,2025-11-13 16:01:13.464632,2025-11-13 16:01:13.464632
2,3,Calgary,Alberta,Canada,51.04014,-114.06605,,0,0.65,Urban Core,2025-11-13 16:01:13.464632,2025-11-13 16:01:13.464632
3,4,Calgary,Alberta,Canada,51.04242,-114.07550,,1,0.36,Urban Core,2025-11-13 16:01:13.464632,2025-11-13 16:01:13.464632
4,5,Calgary,Alberta,Canada,51.04060,-114.06530,,0,0.65,Urban Core,2025-11-13 16:01:13.464632,2025-11-13 16:01:13.464632
...,...,...,...,...,...,...,...,...,...,...,...,...
81,82,Calgary,Alberta,Canada,51.03951,-114.06603,,0,0.71,Urban Core,2025-11-13 16:01:13.464632,2025-11-13 16:01:13.464632
82,83,Calgary,Alberta,Canada,51.04419,-114.07540,,7,0.25,Urban Core,2025-11-13 16:01:13.464632,2025-11-13 16:01:13.464632
83,84,Calgary,Alberta,Canada,51.04416,-114.07527,,7,0.24,Urban Core,2025-11-13 16:01:13.464632,2025-11-13 16:01:13.464632
84,85,Calgary,Alberta,Canada,51.04140,-114.06640,,0,0.53,Urban Core,2025-11-13 16:01:13.464632,2025-11-13 16:01:13.464632



‚úì Loaded 86 location records into DataFrame 'dim_location'


## 7. Dimension Table: dim_category_ratings

Category ratings dimension containing detailed guest experience quality metrics.

In [42]:
if conn:
    display_table_summary(
        conn, 
        'dim_category_ratings',
        'Detailed ratings (cleanliness, accuracy, check-in, communication, location, value)'
    )
    
    # Load full table for analysis
    dim_category_ratings = pd.read_sql_query("SELECT * FROM dim_category_ratings;", conn)
    print(f"\n‚úì Loaded {len(dim_category_ratings)} rating records into DataFrame 'dim_category_ratings'")


TABLE: dim_category_ratings
Description: Detailed ratings (cleanliness, accuracy, check-in, communication, location, value)

üìä Row Count: 93

üìã Column Structure (12 columns):

          column_name                   data_type is_nullable                                           column_default
           rating_key                     integer          NO nextval('dim_category_ratings_rating_key_seq'::regclass)
   cleanliness_rating                     numeric         YES                                                     None
      accuracy_rating                     numeric         YES                                                     None
       checkin_rating                     numeric         YES                                                     None
 communication_rating                     numeric         YES                                                     None
      location_rating                     numeric         YES                                          

Unnamed: 0,rating_key,cleanliness_rating,accuracy_rating,checkin_rating,communication_rating,location_rating,value_rating,overall_quality_score,quality_tier,value_index,created_at,updated_at
0,1,4.0,4.8,4.4,4.6,4.0,4.6,4.37,Good,1.05,2025-11-13 16:01:13.479713,2025-11-13 16:01:13.479713
1,2,5.0,5.0,5.0,5.0,4.9,5.0,4.98,Exceptional,1.00,2025-11-13 16:01:13.479713,2025-11-13 16:01:13.479713
2,3,4.8,4.9,4.8,4.9,4.8,4.8,4.83,Exceptional,0.99,2025-11-13 16:01:13.479713,2025-11-13 16:01:13.479713
3,4,4.7,4.6,4.8,4.8,4.9,4.5,4.70,Excellent,0.96,2025-11-13 16:01:13.479713,2025-11-13 16:01:13.479713
4,5,4.8,4.9,4.8,4.9,4.9,4.8,4.85,Exceptional,0.99,2025-11-13 16:01:13.479713,2025-11-13 16:01:13.479713
...,...,...,...,...,...,...,...,...,...,...,...,...
88,89,5.0,5.0,5.0,5.0,5.0,4.7,4.94,Exceptional,0.95,2025-11-13 16:01:13.479713,2025-11-13 16:01:13.479713
89,90,4.9,5.0,4.9,5.0,5.0,4.9,4.95,Exceptional,0.99,2025-11-13 16:01:13.479713,2025-11-13 16:01:13.479713
90,91,4.9,5.0,4.9,4.9,4.9,4.8,4.90,Exceptional,0.98,2025-11-13 16:01:13.479713,2025-11-13 16:01:13.479713
91,92,5.0,5.0,4.9,5.0,4.9,4.9,4.96,Exceptional,0.99,2025-11-13 16:01:13.479713,2025-11-13 16:01:13.479713



‚úì Loaded 93 rating records into DataFrame 'dim_category_ratings'


## 8. Dimension Table: dim_date

Date dimension for time intelligence and seasonality analysis (Calgary-specific seasons).

In [43]:
if conn:
    display_table_summary(
        conn, 
        'dim_date',
        'Time intelligence (year, quarter, month, season, weekday/weekend)'
    )
    
    # Load full table for analysis
    dim_date = pd.read_sql_query("SELECT * FROM dim_date LIMIT 1000;", conn)
    print(f"\n‚úì Loaded {len(dim_date)} date records into DataFrame 'dim_date' (limited to 1000)")


TABLE: dim_date
Description: Time intelligence (year, quarter, month, season, weekday/weekend)

üìä Row Count: 1,096

üìã Column Structure (11 columns):

 column_name data_type is_nullable column_default
    date_key   integer          NO           None
   full_date      date          NO           None
        year   integer          NO           None
     quarter   integer          NO           None
       month   integer          NO           None
  month_name      text          NO           None
week_of_year   integer          NO           None
 day_of_week   integer          NO           None
    day_name      text          NO           None
  is_weekend   boolean          NO           None
      season      text         YES           None

üìÑ Sample Data (all rows):



Unnamed: 0,date_key,full_date,year,quarter,month,month_name,week_of_year,day_of_week,day_name,is_weekend,season
0,20240101,2024-01-01,2024,1,1,January,1,1,Monday,False,Winter
1,20240102,2024-01-02,2024,1,1,January,1,2,Tuesday,False,Winter
2,20240103,2024-01-03,2024,1,1,January,1,3,Wednesday,False,Winter
3,20240104,2024-01-04,2024,1,1,January,1,4,Thursday,False,Winter
4,20240105,2024-01-05,2024,1,1,January,1,5,Friday,False,Winter
...,...,...,...,...,...,...,...,...,...,...,...
1091,20261227,2026-12-27,2026,4,12,December,52,0,Sunday,True,Winter
1092,20261228,2026-12-28,2026,4,12,December,53,1,Monday,False,Winter
1093,20261229,2026-12-29,2026,4,12,December,53,2,Tuesday,False,Winter
1094,20261230,2026-12-30,2026,4,12,December,53,3,Wednesday,False,Winter



‚úì Loaded 1000 date records into DataFrame 'dim_date' (limited to 1000)


---
# FACT TABLES

Fact tables contain measurable business metrics:
- **fact_listing_metrics**: Central fact table with pricing and performance metrics
- **fact_listing_amenities_summary**: Aggregate amenity metrics per listing
- **fact_competitor_pricing_analysis**: Aggregated competitor pricing statistics

## 9. Fact Table: fact_listing_metrics

Central fact table containing listing performance metrics with dimensional context.

In [44]:
if conn:
    display_table_summary(
        conn, 
        'fact_listing_metrics',
        'Core metrics: price, ratings, reviews, competitiveness scores'
    )
    
    # Load full table for analysis
    fact_listing_metrics = pd.read_sql_query("SELECT * FROM fact_listing_metrics;", conn)
    print(f"\n‚úì Loaded {len(fact_listing_metrics)} listing metric records into DataFrame 'fact_listing_metrics'")


TABLE: fact_listing_metrics
Description: Core metrics: price, ratings, reviews, competitiveness scores

üìä Row Count: 100

üìã Column Structure (21 columns):

          column_name                   data_type is_nullable                                            column_default
          listing_key                     integer          NO nextval('fact_listing_metrics_listing_key_seq'::regclass)
          property_id                        text          NO                                                      None
             host_key                     integer         YES                                                      None
         property_key                     integer         YES                                                      None
         location_key                     integer         YES                                                      None
           rating_key                     integer         YES                                                      No

Unnamed: 0,listing_key,property_id,host_key,property_key,location_key,rating_key,date_key,price_per_night,listing_rating,number_of_reviews,...,price_per_guest,price_per_bedroom,price_per_bed,review_velocity,competitiveness_score,value_score,popularity_index,snapshot_date,created_at,updated_at
0,1,1426378005713860735,1,1,41,89.0,20251113,181.50,5.00,3,...,45.38,181.50,90.75,1.00,30.75,100.00,1.50,2025-11-13,2025-11-13 16:01:13.491582,2025-11-13 16:01:13.491582
1,2,779862525321826168,2,2,68,44.0,20251113,,4.85,151,...,,,,50.33,64.10,,73.23,2025-11-13,2025-11-13 16:01:13.491582,2025-11-13 16:01:13.491582
2,3,1375556219860316591,3,3,40,83.0,20251113,147.50,4.95,20,...,36.88,73.75,73.75,6.67,44.70,100.00,9.90,2025-11-13,2025-11-13 16:01:13.491582,2025-11-13 16:01:13.491582
3,4,1404688484861443653,2,4,11,14.0,20251113,196.50,4.94,18,...,49.13,98.25,196.50,6.00,44.14,100.00,8.89,2025-11-13,2025-11-13 16:01:13.491582,2025-11-13 16:01:13.491582
4,5,21869477,4,5,63,91.0,20251113,135.62,4.93,113,...,67.81,135.62,135.62,37.67,64.58,100.00,55.71,2025-11-13,2025-11-13 16:01:13.491582,2025-11-13 16:01:13.491582
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,96,1385246479518368787,6,96,61,,20251113,204.27,5.00,1,...,40.85,,204.27,0.33,30.25,97.91,0.50,2025-11-13,2025-11-13 16:01:13.491582,2025-11-13 16:01:13.491582
96,97,903645870803612120,63,97,53,37.0,20251113,142.08,4.50,4,...,71.04,,142.08,1.33,28.00,100.00,1.80,2025-11-13,2025-11-13 16:01:13.491582,2025-11-13 16:01:13.491582
97,98,53541236,64,98,55,48.0,20251113,166.63,4.72,138,...,83.32,166.63,166.63,46.00,53.32,100.00,65.14,2025-11-13,2025-11-13 16:01:13.491582,2025-11-13 16:01:13.491582
98,99,927801207937272334,2,99,21,52.0,20251113,172.50,4.83,98,...,43.13,86.25,86.25,32.67,63.48,100.00,47.33,2025-11-13,2025-11-13 16:01:13.491582,2025-11-13 16:01:13.491582



‚úì Loaded 100 listing metric records into DataFrame 'fact_listing_metrics'


## 10. Fact Table: fact_listing_amenities_summary

Aggregate fact table quantifying amenity counts and scores per listing.

In [45]:
if conn:
    display_table_summary(
        conn, 
        'fact_listing_amenities_summary',
        'Amenity counts (total, essential, luxury, safety) and calculated scores'
    )
    
    # Load full table for analysis
    fact_listing_amenities_summary = pd.read_sql_query(
        "SELECT * FROM fact_listing_amenities_summary;", 
        conn
    )
    print(f"\n‚úì Loaded {len(fact_listing_amenities_summary)} amenity summary records into DataFrame 'fact_listing_amenities_summary'")


TABLE: fact_listing_amenities_summary
Description: Amenity counts (total, essential, luxury, safety) and calculated scores

üìä Row Count: 100

üìã Column Structure (10 columns):

              column_name                   data_type is_nullable                                                              column_default
      amenity_summary_key                     integer          NO nextval('fact_listing_amenities_summary_amenity_summary_key_seq'::regclass)
              listing_key                     integer         YES                                                                        None
    total_amenities_count                     integer         YES                                                                           0
essential_amenities_count                     integer         YES                                                                           0
   luxury_amenities_count                     integer         YES                                          

Unnamed: 0,amenity_summary_key,listing_key,total_amenities_count,essential_amenities_count,luxury_amenities_count,safety_amenities_count,amenity_score,amenity_tier,created_at,updated_at
0,1,1,50,7,0,4,18,Standard,2025-11-13 16:01:13.506500,2025-11-13 16:01:13.506500
1,2,2,53,4,0,4,12,Basic,2025-11-13 16:01:13.506500,2025-11-13 16:01:13.506500
2,3,3,44,6,0,4,16,Standard,2025-11-13 16:01:13.506500,2025-11-13 16:01:13.506500
3,4,4,51,8,0,4,20,Standard,2025-11-13 16:01:13.506500,2025-11-13 16:01:13.506500
4,5,5,39,6,1,3,18,Standard,2025-11-13 16:01:13.506500,2025-11-13 16:01:13.506500
...,...,...,...,...,...,...,...,...,...,...
95,96,96,17,6,0,2,14,Basic,2025-11-13 16:01:13.506500,2025-11-13 16:01:13.506500
96,97,97,39,5,0,2,12,Basic,2025-11-13 16:01:13.506500,2025-11-13 16:01:13.506500
97,98,98,57,4,2,3,17,Standard,2025-11-13 16:01:13.506500,2025-11-13 16:01:13.506500
98,99,99,59,4,2,4,18,Standard,2025-11-13 16:01:13.506500,2025-11-13 16:01:13.506500



‚úì Loaded 100 amenity summary records into DataFrame 'fact_listing_amenities_summary'


## 11. Fact Table: fact_competitor_pricing_analysis

Aggregated competitor pricing statistics and price recommendations.

In [46]:
if conn:
    display_table_summary(
        conn, 
        'fact_competitor_pricing_analysis',
        'Competitor pricing statistics (avg, median, percentiles) and recommendations'
    )
    
    # Load full table for analysis
    fact_competitor_pricing_analysis = pd.read_sql_query(
        "SELECT * FROM fact_competitor_pricing_analysis;", 
        conn
    )
    print(f"\n‚úì Loaded {len(fact_competitor_pricing_analysis)} pricing analysis records into DataFrame 'fact_competitor_pricing_analysis'")


TABLE: fact_competitor_pricing_analysis
Description: Competitor pricing statistics (avg, median, percentiles) and recommendations

üìä Row Count: 100

üìã Column Structure (17 columns):

              column_name                   data_type is_nullable                                                                 column_default
     pricing_analysis_key                     integer          NO nextval('fact_competitor_pricing_analysis_pricing_analysis_key_seq'::regclass)
              listing_key                     integer         YES                                                                           None
        analysis_date_key                     integer         YES                                                                           None
         competitor_count                     integer         YES                                                                             25
     avg_competitor_price                     numeric         YES                    

Unnamed: 0,pricing_analysis_key,listing_key,analysis_date_key,competitor_count,avg_competitor_price,min_competitor_price,max_competitor_price,median_competitor_price,percentile_25_price,percentile_75_price,weighted_avg_price,price_premium_discount,recommended_price_lower,recommended_price_upper,recommended_optimal_price,created_at,updated_at
0,1,1,20251113,25,169.80,123.08,250.49,169.75,160.00,179.00,169.60,7.02,152.00,187.95,188.44,2025-11-13 16:01:13.755790,2025-11-13 16:01:13.755790
1,2,2,20251113,25,192.41,144.33,297.84,182.69,163.12,214.46,122.76,,154.97,225.18,132.31,2025-11-13 16:01:13.755790,2025-11-13 16:01:13.755790
2,3,3,20251113,25,177.98,144.33,216.83,177.25,163.12,193.20,114.00,29.38,154.97,202.86,125.41,2025-11-13 16:01:13.755790,2025-11-13 16:01:13.755790
3,4,4,20251113,25,180.57,144.33,237.00,177.25,160.38,194.40,131.26,49.70,152.36,204.12,144.09,2025-11-13 16:01:13.755790,2025-11-13 16:01:13.755790
4,5,5,20251113,25,163.69,115.67,269.16,163.94,145.50,179.00,136.81,-0.87,138.22,187.95,149.88,2025-11-13 16:01:13.755790,2025-11-13 16:01:13.755790
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,96,96,20251113,25,172.81,142.08,204.20,172.00,162.00,183.32,145.90,40.01,153.90,192.49,162.11,2025-11-13 16:01:13.755790,2025-11-13 16:01:13.755790
96,97,97,20251113,25,159.39,115.67,204.27,162.03,147.78,170.95,153.33,-7.34,140.39,179.49,153.33,2025-11-13 16:01:13.755790,2025-11-13 16:01:13.755790
97,98,98,20251113,25,167.15,123.08,269.16,167.79,146.93,179.00,154.02,8.18,139.58,187.95,161.56,2025-11-13 16:01:13.755790,2025-11-13 16:01:13.755790
98,99,99,20251113,25,181.90,144.33,237.00,182.69,160.38,196.12,131.33,31.35,152.36,205.93,140.96,2025-11-13 16:01:13.755790,2025-11-13 16:01:13.755790



‚úì Loaded 100 pricing analysis records into DataFrame 'fact_competitor_pricing_analysis'


---
# BRIDGE TABLE

Bridge table managing many-to-many relationships:
- **bridge_listing_competitors**: Pre-computed top 25 competitors per listing with similarity scores

## 12. Bridge Table: bridge_listing_competitors

Many-to-many relationship table linking listings to their top 25 competitors.

In [47]:
if conn:
    display_table_summary(
        conn, 
        'bridge_listing_competitors',
        'Top 25 competitors per listing with similarity scores and rankings'
    )
    
    # Load full table for analysis
    bridge_listing_competitors = pd.read_sql_query(
        "SELECT * FROM bridge_listing_competitors;", 
        conn
    )
    print(f"\n‚úì Loaded {len(bridge_listing_competitors)} competitor relationship records into DataFrame 'bridge_listing_competitors'")


TABLE: bridge_listing_competitors
Description: Top 25 competitors per listing with similarity scores and rankings

üìä Row Count: 2,500

üìã Column Structure (13 columns):

             column_name                   data_type is_nullable                                                 column_default
              bridge_key                     integer          NO nextval('bridge_listing_competitors_bridge_key_seq'::regclass)
             listing_key                     integer         YES                                                           None
  competitor_listing_key                     integer         YES                                                           None
         similarity_rank                     integer          NO                                                           None
overall_similarity_score                     numeric          NO                                                           None
     location_similarity                     numeric    

Unnamed: 0,bridge_key,listing_key,competitor_listing_key,similarity_rank,overall_similarity_score,location_similarity,property_similarity,quality_similarity,amenity_similarity,price_similarity,weight,is_active,last_updated
0,1,1,92,1,97.00,100.00,95.0,97.6,100.0,87.31,0.047,True,2025-11-13 16:01:13.532857
1,2,1,30,2,86.09,68.06,95.0,100.0,88.0,97.25,0.042,True,2025-11-13 16:01:13.532857
2,3,1,85,3,85.75,67.06,95.0,100.0,88.0,97.25,0.042,True,2025-11-13 16:01:13.532857
3,4,1,52,4,84.85,100.00,60.0,96.6,100.0,55.31,0.041,True,2025-11-13 16:01:13.532857
4,5,1,20,5,83.02,58.71,95.0,99.6,90.0,97.99,0.040,True,2025-11-13 16:01:13.532857
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2495,2496,100,95,21,82.32,60.93,100.0,97.6,82.0,82.69,0.039,True,2025-11-13 16:01:13.532857
2496,2497,100,27,22,82.02,83.32,90.0,98.2,76.0,31.18,0.039,True,2025-11-13 16:01:13.532857
2497,2498,100,57,23,81.67,61.20,95.0,97.6,90.0,79.83,0.038,True,2025-11-13 16:01:13.532857
2498,2499,100,63,24,80.16,59.81,95.0,99.6,96.0,59.58,0.038,True,2025-11-13 16:01:13.532857



‚úì Loaded 2500 competitor relationship records into DataFrame 'bridge_listing_competitors'


---
# VIEWS

Pre-built views for common queries:
- **view_listing_summary**: Denormalized view combining fact and dimensions
- **view_top_competitors**: Materialized view of top 25 competitors (pre-filtered)
- **view_price_recommendations**: Pricing analysis with competitive context

## 13. View: view_listing_summary

Denormalized view combining fact_listing_metrics with all dimension tables.

In [48]:
if conn:
    display_table_summary(
        conn, 
        'view_listing_summary',
        'Complete listing profile: combines metrics with property, location, host, and rating details'
    )
    
    # Load full view for analysis
    view_listing_summary = pd.read_sql_query("SELECT * FROM view_listing_summary;", conn)
    print(f"\n‚úì Loaded {len(view_listing_summary)} listing summary records into DataFrame 'view_listing_summary'")


TABLE: view_listing_summary
Description: Complete listing profile: combines metrics with property, location, host, and rating details

üìä Row Count: 100

üìã Column Structure (44 columns):

            column_name data_type is_nullable column_default
            listing_key   integer         YES           None
            property_id      text         YES           None
           listing_name      text         YES           None
          listing_title      text         YES           None
               category      text         YES           None
        guests_capacity   integer         YES           None
               bedrooms   integer         YES           None
                   beds   integer         YES           None
                  baths   integer         YES           None
           pets_allowed   boolean         YES           None
      is_guest_favorite   boolean         YES           None
     property_size_tier      text         YES           None
             

Unnamed: 0,listing_key,property_id,listing_name,listing_title,category,guests_capacity,bedrooms,beds,baths,pets_allowed,...,price_per_guest,price_per_bedroom,competitiveness_score,value_score,popularity_index,total_amenities_count,amenity_tier,amenity_score,snapshot_date,is_available
0,1,1426378005713860735,"Entire condo in Calgary, Canada","Modern 1br Apt, 17th Ave, Downtown & Stampede",Stays,4,1.0,2,1.0,False,...,45.38,181.50,30.75,100.00,1.50,50,Standard,18,2025-11-13,True
1,2,779862525321826168,"Entire rental unit in Calgary, Canada","Heart of the City - 2BR, Prk, AC, Mountain Views",Stays,6,2.0,3,1.0,False,...,,,64.10,,73.23,53,Basic,12,2025-11-13,False
2,3,1375556219860316591,"Entire rental unit in Calgary, Canada",SUB-PENT House Beltline Oasis 2BR with Panoram...,Stays,4,2.0,2,1.0,False,...,36.88,73.75,44.70,100.00,9.90,44,Standard,16,2025-11-13,True
3,4,1404688484861443653,"Entire condo in Calgary, Canada","Fabulous 2BR w AC, Parking near BMO, Stampede!",Stays,4,2.0,1,1.0,False,...,49.13,98.25,44.14,100.00,8.89,51,Standard,20,2025-11-13,True
4,5,21869477,"Entire rental unit in Calgary, Canada",Rare! South Facing Boutique 1Bdrm in Mission!,Stays,2,1.0,1,1.0,True,...,67.81,135.62,64.58,100.00,55.71,39,Standard,18,2025-11-13,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,96,1385246479518368787,"Entire rental unit in Calgary, Canada",beautiful 1 bed 1 den,Stays,5,,1,0.0,False,...,40.85,,30.25,97.91,0.50,17,Basic,14,2025-11-13,True
96,97,903645870803612120,"Entire rental unit in Calgary, Canada",Open concept studio with style,Stays,2,,1,1.0,False,...,71.04,,28.00,100.00,1.80,39,Basic,12,2025-11-13,True
97,98,53541236,"Entire condo in Calgary, Canada","Spotless, steps to top restaurants + free park...",Stays,2,1.0,1,1.0,False,...,83.32,166.63,53.32,100.00,65.14,57,Standard,17,2025-11-13,True
98,99,927801207937272334,"Entire rental unit in Calgary, Canada",Tranquil Coastal Colour Inspired 2Bdrm w Parking,Stays,4,2.0,2,2.0,False,...,43.13,86.25,63.48,100.00,47.33,59,Standard,18,2025-11-13,True



‚úì Loaded 100 listing summary records into DataFrame 'view_listing_summary'


## 14. Materialized View: view_top_competitors

Pre-computed and pre-filtered view of top 25 competitors with comparison metrics.

In [49]:
if conn:
    display_table_summary(
        conn, 
        'view_top_competitors',
        'Pre-computed top 25 competitors with similarity scores and comparison metrics'
    )
    
    # Load full materialized view for analysis
    view_top_competitors = pd.read_sql_query("SELECT * FROM view_top_competitors;", conn)
    print(f"\n‚úì Loaded {len(view_top_competitors)} competitor comparison records into DataFrame 'view_top_competitors'")


TABLE: view_top_competitors
Description: Pre-computed top 25 competitors with similarity scores and comparison metrics

üìä Row Count: 2,500

üìã Column Structure (0 columns):

Empty DataFrame
Columns: [column_name, data_type, is_nullable, column_default]
Index: []

üìÑ Sample Data (all rows):



Unnamed: 0,listing_key,competitor_listing_key,similarity_rank,overall_similarity_score,location_similarity,property_similarity,quality_similarity,amenity_similarity,price_similarity,weight,source_property_id,source_price,source_rating,competitor_property_id,competitor_price,competitor_rating,source_cluster,competitor_cluster,distance_km
0,1,92,1,97.00,100.00,95.0,97.6,100.0,87.31,0.047,1426378005713860735,181.5,5.00,917958382556358309,169.98,4.88,3,3,0.437099
1,1,30,2,86.09,68.06,95.0,100.0,88.0,97.25,0.042,1426378005713860735,181.5,5.00,52441913,179.00,5.00,3,8,0.769676
2,1,85,3,85.75,67.06,95.0,100.0,88.0,97.25,0.042,1426378005713860735,181.5,5.00,52442502,179.00,5.00,3,8,0.799130
3,1,52,4,84.85,100.00,60.0,96.6,100.0,55.31,0.041,1426378005713860735,181.5,5.00,1326868547821997402,140.94,4.83,3,3,0.476124
4,1,20,5,83.02,58.71,95.0,99.6,90.0,97.99,0.040,1426378005713860735,181.5,5.00,551855963871300920,183.32,4.98,3,1,1.065286
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2495,100,95,21,82.32,60.93,100.0,97.6,82.0,82.69,0.039,1124683783587183030,151.4,4.78,976167921108948089,164.50,4.90,4,0,0.990840
2496,100,27,22,82.02,83.32,90.0,98.2,76.0,31.18,0.039,1124683783587183030,151.4,4.78,622426811556635844,203.50,4.87,4,7,0.364922
2497,100,57,23,81.67,61.20,95.0,97.6,90.0,79.83,0.038,1124683783587183030,151.4,4.78,641534531346733649,166.67,4.66,4,0,0.982106
2498,100,63,24,80.16,59.81,95.0,99.6,96.0,59.58,0.038,1124683783587183030,151.4,4.78,1263079511324144495,182.00,4.76,4,0,1.028012



‚úì Loaded 2500 competitor comparison records into DataFrame 'view_top_competitors'


## 15. View: view_price_recommendations

Pricing analysis view combining current pricing with competitive intelligence.

In [50]:
if conn:
    display_table_summary(
        conn, 
        'view_price_recommendations',
        'Price recommendations with competitor statistics and pricing status'
    )
    
    # Load full view for analysis
    view_price_recommendations = pd.read_sql_query(
        "SELECT * FROM view_price_recommendations;", 
        conn
    )
    print(f"\n‚úì Loaded {len(view_price_recommendations)} price recommendation records into DataFrame 'view_price_recommendations'")


TABLE: view_price_recommendations
Description: Price recommendations with competitor statistics and pricing status

üìä Row Count: 100

üìã Column Structure (24 columns):

              column_name data_type is_nullable column_default
              listing_key   integer         YES           None
              property_id      text         YES           None
             listing_name      text         YES           None
            current_price   numeric         YES           None
           listing_rating   numeric         YES           None
        number_of_reviews   integer         YES           None
                     city      text         YES           None
            location_tier      text         YES           None
                 bedrooms   integer         YES           None
          guests_capacity   integer         YES           None
       property_size_tier      text         YES           None
         competitor_count   integer         YES           None
     a

Unnamed: 0,listing_key,property_id,listing_name,current_price,listing_rating,number_of_reviews,city,location_tier,bedrooms,guests_capacity,...,weighted_avg_price,percentile_25_price,percentile_75_price,recommended_optimal_price,recommended_price_lower,recommended_price_upper,price_premium_discount,price_difference,pricing_status,analysis_date
0,1,1426378005713860735,"Entire condo in Calgary, Canada",181.50,5.00,3,Calgary,Downtown Adjacent,1.0,4,...,169.60,160.00,179.00,188.44,152.00,187.95,7.02,-6.94,OPTIMAL,2025-11-13
1,2,779862525321826168,"Entire rental unit in Calgary, Canada",,4.85,151,Calgary,Urban Core,2.0,6,...,122.76,163.12,214.46,132.31,154.97,225.18,,,OPTIMAL,2025-11-13
2,3,1375556219860316591,"Entire rental unit in Calgary, Canada",147.50,4.95,20,Calgary,Urban Core,2.0,4,...,114.00,163.12,193.20,125.41,154.97,202.86,29.38,22.09,UNDERPRICED,2025-11-13
3,4,1404688484861443653,"Entire condo in Calgary, Canada",196.50,4.94,18,Calgary,Urban Core,2.0,4,...,131.26,160.38,194.40,144.09,152.36,204.12,49.70,52.41,OPTIMAL,2025-11-13
4,5,21869477,"Entire rental unit in Calgary, Canada",135.62,4.93,113,Calgary,Downtown Adjacent,1.0,2,...,136.81,145.50,179.00,149.88,138.22,187.95,-0.87,-14.26,UNDERPRICED,2025-11-13
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,96,1385246479518368787,"Entire rental unit in Calgary, Canada",204.27,5.00,1,Calgary,Urban Core,,5,...,145.90,162.00,183.32,162.11,153.90,192.49,40.01,42.16,OVERPRICED,2025-11-13
96,97,903645870803612120,"Entire rental unit in Calgary, Canada",142.08,4.50,4,Calgary,Urban Core,,2,...,153.33,147.78,170.95,153.33,140.39,179.49,-7.34,-11.25,OPTIMAL,2025-11-13
97,98,53541236,"Entire condo in Calgary, Canada",166.63,4.72,138,Calgary,Downtown Adjacent,1.0,2,...,154.02,146.93,179.00,161.56,139.58,187.95,8.18,5.07,OPTIMAL,2025-11-13
98,99,927801207937272334,"Entire rental unit in Calgary, Canada",172.50,4.83,98,Calgary,Urban Core,2.0,4,...,131.33,160.38,196.12,140.96,152.36,205.93,31.35,31.54,OPTIMAL,2025-11-13



‚úì Loaded 100 price recommendation records into DataFrame 'view_price_recommendations'


---
## 16. Summary Statistics

Quick overview of all loaded DataFrames.

In [51]:
print("\n" + "="*80)
print("DATABASE EXPLORATION SUMMARY")
print("="*80 + "\n")

tables = [
    ('dim_host', 'Dimension'),
    ('dim_property', 'Dimension'),
    ('dim_location', 'Dimension'),
    ('dim_category_ratings', 'Dimension'),
    ('dim_date', 'Dimension'),
    ('fact_listing_metrics', 'Fact'),
    ('fact_listing_amenities_summary', 'Fact'),
    ('fact_competitor_pricing_analysis', 'Fact'),
    ('bridge_listing_competitors', 'Bridge'),
    ('view_listing_summary', 'View'),
    ('view_top_competitors', 'Materialized View'),
    ('view_price_recommendations', 'View')
]

summary_data = []
for table_name, table_type in tables:
    try:
        df = eval(table_name)
        summary_data.append({
            'Table': table_name,
            'Type': table_type,
            'Rows': len(df),
            'Columns': len(df.columns),
            'Memory (MB)': round(df.memory_usage(deep=True).sum() / 1024 / 1024, 2)
        })
    except:
        summary_data.append({
            'Table': table_name,
            'Type': table_type,
            'Rows': 'N/A',
            'Columns': 'N/A',
            'Memory (MB)': 'N/A'
        })

summary_df = pd.DataFrame(summary_data)
display(summary_df)

print("\n‚úì All tables successfully loaded and available as DataFrames")


DATABASE EXPLORATION SUMMARY



Unnamed: 0,Table,Type,Rows,Columns,Memory (MB)
0,dim_host,Dimension,65,12,0.02
1,dim_property,Dimension,100,16,0.05
2,dim_location,Dimension,86,12,0.03
3,dim_category_ratings,Dimension,93,12,0.01
4,dim_date,Dimension,1000,11,0.27
5,fact_listing_metrics,Fact,100,21,0.02
6,fact_listing_amenities_summary,Fact,100,10,0.01
7,fact_competitor_pricing_analysis,Fact,100,17,0.01
8,bridge_listing_competitors,Bridge,2500,13,0.23
9,view_listing_summary,View,100,44,0.12



‚úì All tables successfully loaded and available as DataFrames


# Close Database Connection

In [52]:
if conn:
    conn.close()
    print("‚úì Database connection closed")

‚úì Database connection closed
