In [1]:
import os
import pandas as pd


BASE_PATH = "/Users/braumrussell/Desktop/ProjectData"

cities = [
    "new-york-city",
    "los-angeles",
    "san-francisco",
    "chicago",
    "boston",
    "washington-dc",
    "dallas",
    "new-orleans",
    "seattle",
    "austin",
    "london",
    "paris",
    "amsterdam",
    "barcelona",
    "berlin",
    "sydney",
    "rome",
    "madrid",
    "munich",
    "toronto"
]

# --------------------------------------------
# Function to load data for a single city
# --------------------------------------------
def load_city_data(city):
    city_path = os.path.join(BASE_PATH, city)

    listings_path = os.path.join(city_path, "listings.csv.gz")
    reviews_path = os.path.join(city_path, "reviews.csv.gz")

    # Read compressed files directly — DO NOT UNZIP
    df_listings = pd.read_csv(listings_path, compression="gzip", low_memory=False)
    df_reviews = pd.read_csv(reviews_path, compression="gzip", low_memory=False)

    # Add city name column
    df_listings["city"] = city
    df_reviews["city"] = city

    return df_listings, df_reviews

# --------------------------------------------
# Load ALL cities into combined DataFrames
# --------------------------------------------
all_listings = []
all_reviews = []

for city in cities:
    print(f"Loading data for {city}...")
    df_l, df_r = load_city_data(city)
    all_listings.append(df_l)
    all_reviews.append(df_r)

listings = pd.concat(all_listings, ignore_index=True)
reviews = pd.concat(all_reviews, ignore_index=True)

print("Listings shape:", listings.shape)
print("Reviews shape:", reviews.shape)

# Save combined versions (optional)
listings.to_csv("combined_listings.csv", index=False)
reviews.to_csv("combined_reviews.csv", index=False)

print("Data loaded successfully.")


Loading data for new-york-city...
Loading data for los-angeles...
Loading data for san-francisco...
Loading data for chicago...
Loading data for boston...
Loading data for washington-dc...
Loading data for dallas...
Loading data for new-orleans...
Loading data for seattle...
Loading data for austin...
Loading data for london...
Loading data for paris...


KeyboardInterrupt: 