In [6]:
import geopandas as gpd
import pandas as pd

In [7]:
listings = pd.read_csv("listingsnew.csv")
listings.info

  listings = pd.read_csv("listingsnew.csv")


<bound method DataFrame.info of                         id                                               name  \
0       670339032744709144     Westwood lovely three bedrooms three bathrooms   
1                 37014494      Spanish style lower duplex near Beverly Hills   
2      1024835174766068422                        Charming Beverly Hills Home   
3       850744632375448560                   Tianpu's warm room with bathroom   
4       953950676345326970  Santa Monica apt, free parking, steps to the b...   
...                    ...                                                ...   
45528   892894292387453414                       Cozy 1 Bed, 2 Bath apartment   
45529   944430003646575559                   DTLA Loft | Industrial | Central   
45530   746345243817423719   Long Beach, Rustic Suite 3 Beds, 1 Bath, 2 Rooms   
45531             32943553       Charming Craftsman Bungalow w/ Outdoor Sauna   
45532             37853838                   Views, big backyard, central LA!

In [8]:
# Fill NaN values for numeric columns (with -1)
numeric_columns = listings.select_dtypes(include=['float64', 'int64']).columns
listings[numeric_columns] = listings[numeric_columns].fillna(-1)

# Fill NaN values for object columns (with empty string)
object_columns = listings.select_dtypes(include=['object']).columns
listings[object_columns] = listings[object_columns].fillna('')

In [9]:
neighborhoods = gpd.read_file("neighborhood_boundaries.geojson")


listings_gdf = gpd.GeoDataFrame(
    listings,
    geometry=gpd.points_from_xy(listings.longitude, listings.latitude),
    crs="EPSG:4326"  # WGS84 (common geographic coordinate system)
)
listings_gdf.head()


Unnamed: 0,id,name,host_id,host_name,host_since,host_response_time,host_response_rate,host_is_superhost,neighbourhood_cleansed,neighbourhood_group_cleansed,...,bedrooms,beds,price,minimum_nights,availability_365,number_of_reviews,review_scores_rating,license,instant_bookable,geometry
0,670339032744709144,Westwood lovely three bedrooms three bathrooms,4780152,Moon,20/01/13,within a few hours,0.96,f,West Los Angeles,City of Los Angeles,...,3.0,3.0,399.0,30,365,0,-1.0,,f,POINT (-118.43555 34.04966)
1,37014494,Spanish style lower duplex near Beverly Hills,278288178,Ida,22/07/19,,-1.0,f,Beverlywood,City of Los Angeles,...,2.0,-1.0,-1.0,30,0,0,-1.0,,f,POINT (-118.38751 34.04841)
2,1024835174766068422,Charming Beverly Hills Home,513813179,Tiana,08/05/23,within a day,0.6,f,Beverly Hills,Other Cities,...,3.0,3.0,434.0,30,267,0,-1.0,,f,POINT (-118.39074 34.07058)
3,850744632375448560,Tianpu's warm room with bathroom,432956623,Dan,22/11/21,a few days or more,0.2,f,Temple City,Other Cities,...,1.0,1.0,49.0,1,364,1,3.0,,f,POINT (-118.0731 34.10933)
4,953950676345326970,"Santa Monica apt, free parking, steps to the b...",528669205,Farkhat,29/07/23,within an hour,1.0,t,Santa Monica,Other Cities,...,0.0,1.0,231.0,5,193,44,4.93,Exempt,t,POINT (-118.49408 34.01559)


In [29]:
listings_with_neighbourhoods = gpd.sjoin(listings_gdf, neighborhoods, how="left", predicate="within")
listings_with_neighbourhoods.drop(columns=["geometry","neighbourhood_cleansed","neighbourhood_group_cleansed","name_right","id"], inplace=True)
listings_with_neighbourhoods.rename(columns={"OBJECTID": "neighbourhoodId"}, inplace=True)
listings_with_neighbourhoods.rename(columns={"name_left": "listing_name"}, inplace=True)

listings_with_neighbourhoods.dropna(subset=["neighbourhoodId"], inplace=True)

listings_with_neighbourhoods["neighbourhoodId"] = listings_with_neighbourhoods["neighbourhoodId"].astype(int)

In [None]:
##listings_with_neighbourhoods.to_csv('listings_updated.csv', index=False)

In [30]:
listings_with_neighbourhoods.head()

Unnamed: 0,listing_name,host_id,host_name,host_since,host_response_time,host_response_rate,host_is_superhost,latitude,longitude,property_type,...,beds,price,minimum_nights,availability_365,number_of_reviews,review_scores_rating,license,instant_bookable,index_right,neighbourhoodId
0,Westwood lovely three bedrooms three bathrooms,4780152,Moon,20/01/13,within a few hours,0.96,f,34.04966,-118.43555,Entire condo,...,3.0,399.0,30,365,0,-1.0,,f,107.0,108
1,Spanish style lower duplex near Beverly Hills,278288178,Ida,22/07/19,,-1.0,f,34.04841,-118.38751,Entire rental unit,...,-1.0,-1.0,30,0,0,-1.0,,f,8.0,9
5,Practical Private Room #1398 A,557999481,Christos,22/01/24,within a day,1.0,f,34.17399,-118.36184,Private room in rental unit,...,1.0,62.0,30,278,0,-1.0,,f,66.0,67
6,2 Bedroom Modern Oasis,4151836,Sara,14/11/12,within a few hours,1.0,f,33.99017,-118.42252,Entire home,...,5.0,110.0,30,117,368,4.61,HSR19-004352,f,22.0,23
7,Luxury Waterfront Retreat | Rooftop View,510853557,Tig,19/04/23,within an hour,1.0,t,33.982577,-118.464266,Entire home,...,4.0,1179.0,2,356,35,4.97,HSR23-000668,f,97.0,98


In [31]:
from sqlalchemy import create_engine, MetaData, Table, Column, Integer, String, Float, DateTime, ForeignKey

DATABASE_URI = "mssql+pyodbc://localhost/LosAngelesData?driver=ODBC+Driver+17+for+SQL+Server&Trusted_Connection=yes"
engine = create_engine(DATABASE_URI)

# Create metadata object
metadata = MetaData()

# Define the CrimeData table without the foreign key constraint
crime_data_table = Table(
    "AirbnbLinstingData",
    metadata,
    Column("id", Integer, primary_key=True, autoincrement=True),  # Primary key column
     *[
        Column(col, 
               String if listings_with_neighbourhoods[col].dtype == 'object' else
               DateTime if pd.api.types.is_datetime64_any_dtype(listings_with_neighbourhoods[col]) else
               Float if listings_with_neighbourhoods[col].dtype == 'float64' else
               Integer)  # Assuming Integer for other numeric types
        for col in listings_with_neighbourhoods.columns
    ]
)

metadata.drop_all(engine, [crime_data_table])  # Clean start (optional)
metadata.create_all(engine, [crime_data_table])

inserted_row_count = 0
try:
    with engine.connect() as conn:
        result = conn.execute(crime_data_table.insert(), listings_with_neighbourhoods.to_dict(orient="records"))
        conn.commit()
        inserted_row_count += result.rowcount
except Exception as e:
    print(f"Error: {e}")

print(f"Crime data inserted {inserted_row_count} successfully!")


Crime data inserted 8 successfully!
