In [2]:
# Key for changing state names to their abbreviations
us_state_abbrev = {
    'Alabama': 'AL', 'Alaska': 'AK', 'Arizona': 'AZ', 'Arkansas': 'AR', 'California': 'CA',
    'Colorado': 'CO', 'Connecticut': 'CT', 'Delaware': 'DE', 'Florida': 'FL', 'Georgia': 'GA',
    'Hawaii': 'HI', 'Idaho': 'ID', 'Illinois': 'IL', 'Indiana': 'IN', 'Iowa': 'IA', 'Kansas': 'KS',
    'Kentucky': 'KY', 'Louisiana': 'LA', 'Maine': 'ME', 'Maryland': 'MD', 'Massachusetts': 'MA',
    'Michigan': 'MI', 'Minnesota': 'MN', 'Mississippi': 'MS', 'Missouri': 'MO', 'Montana': 'MT',
    'Nebraska': 'NE', 'Nevada': 'NV', 'New Hampshire': 'NH', 'New Jersey': 'NJ', 'New Mexico': 'NM',
    'New York': 'NY', 'North Carolina': 'NC', 'North Dakota': 'ND', 'Ohio': 'OH', 'Oklahoma': 'OK',
    'Oregon': 'OR', 'Pennsylvania': 'PA', 'Rhode Island': 'RI', 'South Carolina': 'SC',
    'South Dakota': 'SD', 'Tennessee': 'TN', 'Texas': 'TX', 'Utah': 'UT', 'Vermont': 'VT',
    'Virginia': 'VA', 'Washington': 'WA', 'West Virginia': 'WV', 'Wisconsin': 'WI', 'Wyoming': 'WY',
    'Puerto Rico': 'PR', 'Virgin Islands': 'VI', 'District of Columbia':'DI', 'New Brunswick': 'NB',
    'Guam': 'GU'
}

# SECTION 1: CLASSIFYING TEMPERTATURES

In [None]:
import pandas as pd

# Read data
tdf = pd.read_csv("../data/citytemperatures.csv")

# Rename CITY to City
tdf = tdf.rename(columns={"CITY": "City"})

# Convert month columns (2 to 13) to numeric
tdf.iloc[:, 2:14] = tdf.iloc[:, 2:14].apply(pd.to_numeric, errors="coerce")

# Calculate the average temperature across months
tdf["Temperature"] = tdf.iloc[:, 2:14].mean(axis=1)

# Compute the average temperature across multiple instances of the same city if there are multiple inputs (multiple rows for each city)
tdf_avg_temp = tdf.groupby('City', as_index=False)['Temperature'].mean()

# Drop duplicates
tdf = tdf.drop_duplicates(subset=['City'], keep='first')

# Calculate quantiles for splitting data into 3 parts
quantiles = tdf["Temperature"].quantile([0.33, 0.66])

# Function to classify temperature based on quantiles
def classify_temp(temp):
    if temp >= quantiles[0.66]:
        return "Hot"
    elif temp >= quantiles[0.33]:
        return "Medium"
    else:
        return "Cold"

# Apply classification
tdf["Category"] = tdf["Temperature"].apply(classify_temp)

# Select only 'City' and 'Category' columns for final (tdf final = tdff)
tdff = tdf[["City", "Category"]]

# Rename Category to Temperature
tdff = tdff.rename(columns={"Category": "Temperature"})

tdff

Unnamed: 0,City,Temperature
0,"BIRMINGHAM,AL",Hot
1,"HUNTSVILLE,AL",Medium
2,"MOBILE,AL",Hot
3,"MONTGOMERY,AL",Hot
4,"ANCHORAGE,AK",Cold
...,...,...
259,"POHNPEI- CAROLINE IS.,PC",Hot
260,"CHUUK- E. CAROLINE IS.,PC",Hot
261,"YAP- W CAROLINE IS.,PC",Hot
262,"SAN JUAN,PR",Hot


# SECTION 2: COMBINING THIS WITH HOUSING DATA

In [None]:
# Read first houying data
hdf = pd.read_csv("../data/housingdata1.csv")

# Drop rows where 'City' or 'State' is NaN
hdf = hdf.dropna(subset=['City', 'State'])

#revised housing dataframe
hdf = hdf[["State", "City", "Bedroom", "Bathroom", "Area", "LotArea", "MarketEstimate", "RentEstimate", "Price"]]

# Read second housing dataset
hdf2 = pd.read_csv("../data/housingdata2.csv")

# Simplify second dataset
hdf2 = hdf2[["state", "city", "bed", "bath", "house_size", "acre_lot", "price"]]

# Rename columns in second dataset to match the first one
hdf2.columns = ['State', 'City', 'Bedroom', 'Bathroom', 'Area', 'LotArea', 'Price']

# Drop rows where 'City' or 'State' is NaN in hd2
hdf2 = hdf2.dropna(subset=['City', 'State'])

# Rename states to their abbreviations
hdf2['State'] = hdf2['State'].map(us_state_abbrev).fillna(hdf2['State'])

# Filter out all non-abbreviated places
hdf2 = hdf2[hdf2['State'].str.match(r'^[A-Z]{2}$', na=False)]

# Combining the two hdfs
hdf = pd.concat([hdf, hdf2], ignore_index=True)

#reformat housing data city to CITY,SI  (SI = STATE INITIAL)
hdf["City"] = hdf["City"] + "," + hdf["State"]

# Ensure key column (city) is of the same type and normalize it so they are all in the same format
hdf.loc[:, "City"] = hdf["City"].str.strip().str.lower()
tdff.loc[:, "City"] = tdff["City"].str.strip().str.lower()

# Merge datasets on 'city'
merged_df = hdf.merge(tdff[["City", "Temperature"]], on="City", how="left")

merged_df

Unnamed: 0,State,City,Bedroom,Bathroom,Area,LotArea,MarketEstimate,RentEstimate,Price,Temperature
0,AL,"saraland,al",4.0,2.0,1614.0,0.38050,240600.0,1599.0,239900.0,
1,AL,"southside,al",3.0,2.0,1474.0,0.67034,186700.0,1381.0,1.0,
2,AL,"robertsdale,al",3.0,2.0,1800.0,3.20000,,,259900.0,
3,AL,"gulf shores,al",2.0,2.0,1250.0,,,,342500.0,
4,AL,"chelsea,al",3.0,3.0,2224.0,0.26000,336200.0,1932.0,335000.0,
...,...,...,...,...,...,...,...,...,...,...
2249097,WA,"richland,wa",4.0,2.0,3600.0,0.33000,,,359900.0,
2249098,WA,"richland,wa",3.0,2.0,1616.0,0.10000,,,350000.0,
2249099,WA,"richland,wa",6.0,3.0,3200.0,0.50000,,,440000.0,
2249100,WA,"richland,wa",2.0,1.0,933.0,0.09000,,,179900.0,


# SECTION 3: IMPORTING IN OTHER STATEWISE TEMPERATURE DATA TO REPLACE NAN TEMP VALUES

In [None]:
# Read state temperature data
stdf = pd.read_csv("../data/averagestatetemperatures.csv")[["state", "average_temp"]]

# Rename state to State
stdf = stdf.rename(columns={"state": "State"})

# Convert full state names to abbreviations
stdf['State'] = stdf['State'].map(us_state_abbrev)

# Compute the average temperature for each state if there are multiple rows with the same state
stdf_avg_temp = stdf.groupby('State', as_index=False)['average_temp'].mean()

# Drop duplicates
stdf = stdf.drop_duplicates(subset=['State'], keep='first')

# Calculate quantiles for splitting data into 3 parts
quantiles = stdf["average_temp"].quantile([0.33, 0.66])

# Function to classify temperature based on quantiles
def classify_temp(temp):
    if temp >= quantiles[0.66]:
        return "Hot"
    elif temp >= quantiles[0.33]:
        return "Medium"
    else:
        return "Cold"

# Apply classification
stdf["Temperature"] = stdf["average_temp"].apply(classify_temp)

# Remove avg temp
stdf = stdf.drop(columns=['average_temp'])

# Manually input values for states which were not in the state dataset
stdf.loc[len(stdf)] = ['ak', 'Cold']
stdf.loc[len(stdf)] = ['pr', 'Hot']
stdf.loc[len(stdf)] = ['di', 'Medium']
stdf.loc[len(stdf)] = ['vi', 'Hot']
stdf.loc[len(stdf)] = ['nb', 'Cold']
stdf.loc[len(stdf)] = ['hi', 'Hot']
stdf.loc[len(stdf)] = ['gu', 'Hot']

# Ensure data in State column is all in the same format
stdf.loc[:, "State"] = stdf["State"].str.strip().str.lower()
merged_df.loc[:, "State"] = merged_df["State"].str.strip().str.lower()

# Merge state temps into merged df, keeping BOTH temperature columns rather than combining state temps into the merged_df temps. This is because we only want to update
# merged_df temps if there was a NaN entry there 
merged_df = merged_df.merge(stdf, on='State', how='left', suffixes=('_old', '_new'))

# Fill NaN values in the original Temperature column with values from the second dataset
merged_df['Temperature_old'] = merged_df['Temperature_old'].fillna(merged_df['Temperature_new'])

merged_df = merged_df.drop(columns=['Temperature_new'])

merged_df = merged_df.rename(columns={"Temperature_old": "Temperature"})

merged_df

Unnamed: 0,State,City,Bedroom,Bathroom,Area,LotArea,MarketEstimate,RentEstimate,Price,Temperature
0,al,"saraland,al",4.0,2.0,1614.0,0.38050,240600.0,1599.0,239900.0,Hot
1,al,"southside,al",3.0,2.0,1474.0,0.67034,186700.0,1381.0,1.0,Hot
2,al,"robertsdale,al",3.0,2.0,1800.0,3.20000,,,259900.0,Hot
3,al,"gulf shores,al",2.0,2.0,1250.0,,,,342500.0,Hot
4,al,"chelsea,al",3.0,3.0,2224.0,0.26000,336200.0,1932.0,335000.0,Hot
...,...,...,...,...,...,...,...,...,...,...
2249097,wa,"richland,wa",4.0,2.0,3600.0,0.33000,,,359900.0,Cold
2249098,wa,"richland,wa",3.0,2.0,1616.0,0.10000,,,350000.0,Cold
2249099,wa,"richland,wa",6.0,3.0,3200.0,0.50000,,,440000.0,Cold
2249100,wa,"richland,wa",2.0,1.0,933.0,0.09000,,,179900.0,Cold


# SECTION 4: MERGING IN CITY QOL RATINGS

In [None]:
# Read qol data
qoldf = pd.read_csv("../data/qolcitydata.csv", encoding="ISO-8859-1")

# Exract only the columns we want
qoldf = qoldf[["LCITY", "LSTATE", "2016 Crime Rate", "Unemployment", "AQI%Good", "WaterQualityVPV", "%CvgCityPark", "Cost of Living", "2022 Median Income", "AVG C2I", "Diversity Rank (Race)", "Diversity Rank (Gender)"]]

# Correct format of qoldf cities to match that of our merged_df
qoldf.loc[:, "LCITY"] = qoldf["LCITY"].str.strip().str.lower()
qoldf.loc[:, "LSTATE"] = qoldf["LSTATE"].str.strip().str.lower()
qoldf["LCITY"] = qoldf["LCITY"] + "," + qoldf["LSTATE"]

# Drop rows where 'City' or 'State' is NaN
qoldf = qoldf.dropna(subset=['LCITY', 'LSTATE'])

# Drop duplicate entries of a city
qoldf = qoldf.drop_duplicates(subset="LCITY", keep="first")

# Rename LCITY to City for merging
qoldf = qoldf.rename(columns={"LCITY": "City"})

merged_df = merged_df.merge(qoldf[["City", "2016 Crime Rate", "Unemployment", "AQI%Good", "WaterQualityVPV", "%CvgCityPark", "Cost of Living", "2022 Median Income", "AVG C2I", "Diversity Rank (Race)", "Diversity Rank (Gender)"]], on="City", how="left")

merged_df

  qoldf = pd.read_csv("qolcitydata.csv", encoding="ISO-8859-1")


Unnamed: 0,State,City,Bedroom,Bathroom,Area,LotArea,MarketEstimate,RentEstimate,Price,Temperature,2016 Crime Rate,Unemployment,AQI%Good,WaterQualityVPV,%CvgCityPark,Cost of Living,2022 Median Income,AVG C2I,Diversity Rank (Race),Diversity Rank (Gender)
0,al,"saraland,al",4.0,2.0,1614.0,0.38050,240600.0,1599.0,239900.0,Hot,47/1000,3.35%,80.94%,1.0,-1,"$71,947.38","$62,409.46",115.28%,26459.0,63210.0
1,al,"southside,al",3.0,2.0,1474.0,0.67034,186700.0,1381.0,1.0,Hot,43/1000,3.12%,80.94%,0.0,-1,"$67,812.73","$58,943.92",115.05%,69642.0,79134.0
2,al,"robertsdale,al",3.0,2.0,1800.0,3.20000,,,259900.0,Hot,18/1000,2.41%,80.94%,1.0,-1,"$79,155.41","$77,884.76",101.63%,29479.0,36363.0
3,al,"gulf shores,al",2.0,2.0,1250.0,,,,342500.0,Hot,18/1000,2.41%,80.94%,1.0,-1,"$79,155.41","$77,884.76",101.63%,56013.0,31948.0
4,al,"chelsea,al",3.0,3.0,2224.0,0.26000,336200.0,1932.0,335000.0,Hot,16/1000,1.87%,80.94%,-1.0,-1,"$85,691.03","$98,419.23",87.07%,44179.0,41526.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2249097,wa,"richland,wa",4.0,2.0,3600.0,0.33000,,,359900.0,Cold,23/1000,5.31%,92.89%,0.0,-1,"$72,571.13","$83,393.68",87.02%,35964.0,87680.0
2249098,wa,"richland,wa",3.0,2.0,1616.0,0.10000,,,350000.0,Cold,23/1000,5.31%,92.89%,0.0,-1,"$72,571.13","$83,393.68",87.02%,35964.0,87680.0
2249099,wa,"richland,wa",6.0,3.0,3200.0,0.50000,,,440000.0,Cold,23/1000,5.31%,92.89%,0.0,-1,"$72,571.13","$83,393.68",87.02%,35964.0,87680.0
2249100,wa,"richland,wa",2.0,1.0,933.0,0.09000,,,179900.0,Cold,23/1000,5.31%,92.89%,0.0,-1,"$72,571.13","$83,393.68",87.02%,35964.0,87680.0


# SECTION 5: ADDING POPULATIONS


In [None]:
pdf = pd.read_csv('../data/uscitypopulations.csv')[['CITY', 'STATE', '2022_POPULATION']]

# Rename columns
pdf.columns = ['City', 'State', 'Population']

# Convert full state names to abbreviations
pdf['State'] = pdf['State'].map(us_state_abbrev)

# Correct formatting of cities and towns
pdf['City'] = pdf['City'].str.replace(r'\b( city| town)\b', '', case=False, regex=True).str.strip()
pdf.loc[:, "City"] = pdf["City"].str.strip().str.lower()
pdf.loc[:, "State"] = pdf["State"].str.strip().str.lower()
pdf["City"] = pdf["City"] + "," + pdf["State"]

pdf

# Merge with merged_df
merged_df = merged_df.merge(pdf[["City", "Population"]], on="City", how="left")

merged_df


Unnamed: 0,State,City,Bedroom,Bathroom,Area,LotArea,MarketEstimate,RentEstimate,Price,Temperature,...,Unemployment,AQI%Good,WaterQualityVPV,%CvgCityPark,Cost of Living,2022 Median Income,AVG C2I,Diversity Rank (Race),Diversity Rank (Gender),Population
0,al,"saraland,al",4.0,2.0,1614.0,0.38050,240600.0,1599.0,239900.0,Hot,...,3.35%,80.94%,1.0,-1,"$71,947.38","$62,409.46",115.28%,26459.0,63210.0,16358.0
1,al,"southside,al",3.0,2.0,1474.0,0.67034,186700.0,1381.0,1.0,Hot,...,3.12%,80.94%,0.0,-1,"$67,812.73","$58,943.92",115.05%,69642.0,79134.0,9554.0
2,al,"robertsdale,al",3.0,2.0,1800.0,3.20000,,,259900.0,Hot,...,2.41%,80.94%,1.0,-1,"$79,155.41","$77,884.76",101.63%,29479.0,36363.0,7189.0
3,al,"gulf shores,al",2.0,2.0,1250.0,,,,342500.0,Hot,...,2.41%,80.94%,1.0,-1,"$79,155.41","$77,884.76",101.63%,56013.0,31948.0,16193.0
4,al,"chelsea,al",3.0,3.0,2224.0,0.26000,336200.0,1932.0,335000.0,Hot,...,1.87%,80.94%,-1.0,-1,"$85,691.03","$98,419.23",87.07%,44179.0,41526.0,16193.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2249310,wa,"richland,wa",4.0,2.0,3600.0,0.33000,,,359900.0,Cold,...,5.31%,92.89%,0.0,-1,"$72,571.13","$83,393.68",87.02%,35964.0,87680.0,62821.0
2249311,wa,"richland,wa",3.0,2.0,1616.0,0.10000,,,350000.0,Cold,...,5.31%,92.89%,0.0,-1,"$72,571.13","$83,393.68",87.02%,35964.0,87680.0,62821.0
2249312,wa,"richland,wa",6.0,3.0,3200.0,0.50000,,,440000.0,Cold,...,5.31%,92.89%,0.0,-1,"$72,571.13","$83,393.68",87.02%,35964.0,87680.0,62821.0
2249313,wa,"richland,wa",2.0,1.0,933.0,0.09000,,,179900.0,Cold,...,5.31%,92.89%,0.0,-1,"$72,571.13","$83,393.68",87.02%,35964.0,87680.0,62821.0


## Drop Nan Values as well as other unnecessary columns

In [8]:
# Simplify data by dropping columns we're not interested in
merged_df = merged_df.drop(columns=['MarketEstimate', 'RentEstimate', '%CvgCityPark', 'Diversity Rank (Race)', 'Diversity Rank (Gender)', 'AVG C2I'])

# Drop NaN Vals
merged_df = merged_df.dropna()

merged_df

Unnamed: 0,State,City,Bedroom,Bathroom,Area,LotArea,Price,Temperature,2016 Crime Rate,Unemployment,AQI%Good,WaterQualityVPV,Cost of Living,2022 Median Income,Population
0,al,"saraland,al",4.0,2.0,1614.0,0.38050,239900.0,Hot,47/1000,3.35%,80.94%,1.0,"$71,947.38","$62,409.46",16358.0
1,al,"southside,al",3.0,2.0,1474.0,0.67034,1.0,Hot,43/1000,3.12%,80.94%,0.0,"$67,812.73","$58,943.92",9554.0
2,al,"robertsdale,al",3.0,2.0,1800.0,3.20000,259900.0,Hot,18/1000,2.41%,80.94%,1.0,"$79,155.41","$77,884.76",7189.0
4,al,"chelsea,al",3.0,3.0,2224.0,0.26000,335000.0,Hot,16/1000,1.87%,80.94%,-1.0,"$85,691.03","$98,419.23",16193.0
6,al,"montgomery,al",3.0,2.0,1564.0,8712.00000,151000.0,Hot,47/1000,3.17%,80.94%,1.0,"$74,899.78","$64,886.16",196986.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2249310,wa,"richland,wa",4.0,2.0,3600.0,0.33000,359900.0,Cold,23/1000,5.31%,92.89%,0.0,"$72,571.13","$83,393.68",62821.0
2249311,wa,"richland,wa",3.0,2.0,1616.0,0.10000,350000.0,Cold,23/1000,5.31%,92.89%,0.0,"$72,571.13","$83,393.68",62821.0
2249312,wa,"richland,wa",6.0,3.0,3200.0,0.50000,440000.0,Cold,23/1000,5.31%,92.89%,0.0,"$72,571.13","$83,393.68",62821.0
2249313,wa,"richland,wa",2.0,1.0,933.0,0.09000,179900.0,Cold,23/1000,5.31%,92.89%,0.0,"$72,571.13","$83,393.68",62821.0


# SECTION 6: CONVERTING NON FLOAT COLUMNS TO FLOAT

In [9]:
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import numpy as np

# Select features for clustering
feature_cols = ['Bedroom', 'Bathroom', 'Area', 'LotArea', 'Price', '2022 Median Income', 'Temperature', 'Population']
quality_cols = ['AQI%Good', 'WaterQualityVPV', 'Unemployment', '2016 Crime Rate', 'Cost of Living']
# Note: Curious about whether or not we should have Cost of Living be an inputted feature variable or if it should just be something that's generally minimized

# Clean currency columns
for col in ['Price', '2022 Median Income', 'Cost of Living']:
    merged_df[col] = merged_df[col].replace('[\$,]', '', regex=True).astype(float)

# Clean crime rate
merged_df['2016 Crime Rate'] = (
    merged_df['2016 Crime Rate']
    .astype(str)
    .str.extract(r'(\d+)/(\d+)')
    .astype(float)
    .apply(lambda row: row[0] / row[1] if pd.notna(row[0]) and pd.notna(row[1]) else np.nan, axis=1)
)

# Clean quality columns
for col in quality_cols:
    merged_df[col] = (
        merged_df[col]
        .astype(str)
        .str.replace('%', '', regex=False)
        .str.replace(',', '', regex=False)
        .replace({'N/A': np.nan, 'unknown': np.nan, 'Missing': np.nan})
    )
    merged_df[col] = pd.to_numeric(merged_df[col], errors='coerce')

# Map temperature to numeric
temp_mapping = {'Cold': 0, 'Medium': 1, 'Hot': 2}
merged_df['Temperature'] = merged_df['Temperature'].map(temp_mapping)

merged_df

  merged_df[col] = merged_df[col].replace('[\$,]', '', regex=True).astype(float)


Unnamed: 0,State,City,Bedroom,Bathroom,Area,LotArea,Price,Temperature,2016 Crime Rate,Unemployment,AQI%Good,WaterQualityVPV,Cost of Living,2022 Median Income,Population
0,al,"saraland,al",4.0,2.0,1614.0,0.38050,239900.0,2,0.047,3.35,80.94,1.0,71947.38,62409.46,16358.0
1,al,"southside,al",3.0,2.0,1474.0,0.67034,1.0,2,0.043,3.12,80.94,0.0,67812.73,58943.92,9554.0
2,al,"robertsdale,al",3.0,2.0,1800.0,3.20000,259900.0,2,0.018,2.41,80.94,1.0,79155.41,77884.76,7189.0
4,al,"chelsea,al",3.0,3.0,2224.0,0.26000,335000.0,2,0.016,1.87,80.94,-1.0,85691.03,98419.23,16193.0
6,al,"montgomery,al",3.0,2.0,1564.0,8712.00000,151000.0,2,0.047,3.17,80.94,1.0,74899.78,64886.16,196986.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2249310,wa,"richland,wa",4.0,2.0,3600.0,0.33000,359900.0,0,0.023,5.31,92.89,0.0,72571.13,83393.68,62821.0
2249311,wa,"richland,wa",3.0,2.0,1616.0,0.10000,350000.0,0,0.023,5.31,92.89,0.0,72571.13,83393.68,62821.0
2249312,wa,"richland,wa",6.0,3.0,3200.0,0.50000,440000.0,0,0.023,5.31,92.89,0.0,72571.13,83393.68,62821.0
2249313,wa,"richland,wa",2.0,1.0,933.0,0.09000,179900.0,0,0.023,5.31,92.89,0.0,72571.13,83393.68,62821.0


# SECTION 7: UPLOADING TO SUPABASE

In [10]:
from sqlalchemy import create_engine
from dotenv import load_dotenv
from supabase import create_client, Client
import os

# Load environment variables
load_dotenv()

# Retrieve database credentials from .env file
DB_URL = os.getenv("DATABASE_URL")
SUPABASE_KEY = os.getenv("SUPABASE_KEY")
SUPABASE_URL = os.getenv("SUPABASE_URL")
DB_PASS = os.getenv("DATABASE_PASS")

# Initialize Supabase client
supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)

# Create SQLAlchemy engine
engine = create_engine(DB_URL, pool_size=5, max_overflow=10)

try:
    with engine.connect() as conn:
        print("Connected to the database successfully!")
except Exception as e:
    print(f"Error: {e}")


try:
    # merged_df2.to_sql('Housing Data 2', engine, if_exists='replace', index=False)
    print("Data uploaded successfully!")
except Exception as e:
    print(f"Error uploading data: {e}")

# Upload DataFrame to SQL table
# merged_df.to_sql("Housing Data", engine, if_exists="replace", index=False)

print("DataFrame successfully uploaded to the SQL database.")

Connected to the database successfully!
Data uploaded successfully!
DataFrame successfully uploaded to the SQL database.
