Install Packages

In [None]:
!pip install supabase python-dotenv pandas tensorflow scikit-learn joblib

Collecting supabase
  Downloading supabase-2.15.0-py3-none-any.whl.metadata (11 kB)
Collecting python-dotenv
  Downloading python_dotenv-1.1.0-py3-none-any.whl.metadata (24 kB)
Collecting gotrue<3.0.0,>=2.11.0 (from supabase)
  Downloading gotrue-2.12.0-py3-none-any.whl.metadata (6.1 kB)
Collecting postgrest<1.1,>0.19 (from supabase)
  Downloading postgrest-1.0.1-py3-none-any.whl.metadata (3.5 kB)
Collecting realtime<2.5.0,>=2.4.0 (from supabase)
  Downloading realtime-2.4.2-py3-none-any.whl.metadata (6.6 kB)
Collecting storage3<0.12,>=0.10 (from supabase)
  Downloading storage3-0.11.3-py3-none-any.whl.metadata (1.8 kB)
Collecting supafunc<0.10,>=0.9 (from supabase)
  Downloading supafunc-0.9.4-py3-none-any.whl.metadata (1.2 kB)
Collecting pytest-mock<4.0.0,>=3.14.0 (from gotrue<3.0.0,>=2.11.0->supabase)
  Downloading pytest_mock-3.14.0-py3-none-any.whl.metadata (3.8 kB)
Collecting deprecation<3.0.0,>=2.1.0 (from postgrest<1.1,>0.19->supabase)
  Downloading deprecation-2.1.0-py2.py3-no

Bring in data

In [None]:
import os
os.environ['SUPABASE_URL'] = 'https://lgcrogvgnqphznuwdopu.supabase.co'
os.environ['SUPABASE_KEY'] = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6ImxnY3JvZ3ZnbnFwaHpudXdkb3B1Iiwicm9sZSI6ImFub24iLCJpYXQiOjE3NDQ0MTQwMzcsImV4cCI6MjA1OTk5MDAzN30.2lozGgOq70UbrCm1_7Y1p38WbCqOMTjQ8Cs_ZSvNhSs'

Import PsiKit Learn

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

Database Setup

In [None]:
%%writefile db.py
import os
from dotenv import load_dotenv
from supabase import create_client, Client
import pandas as pd

# Load environment variables
load_dotenv()

SUPABASE_URL = os.getenv("SUPABASE_URL")
SUPABASE_KEY = os.getenv("SUPABASE_KEY")

def get_supabase_client() -> Client:
    return create_client(SUPABASE_URL, SUPABASE_KEY)

def fetch_housing_data() -> pd.DataFrame:
    client = get_supabase_client()
    # Replace "House" with your actual table name if different
    response = client.table("House").select("*").execute()
    data = response.data  # a list of dictionaries
    return pd.DataFrame(data)



Writing db.py


Database link

In [None]:
%%writefile db.py
import os
from dotenv import load_dotenv
from supabase import create_client, Client
import pandas as pd

# Load environment variables from a .env file or your environment
load_dotenv()

SUPABASE_URL = os.getenv("SUPABASE_URL")
SUPABASE_KEY = os.getenv("SUPABASE_KEY")

def get_supabase_client() -> Client:
    return create_client(SUPABASE_URL, SUPABASE_KEY)

def fetch_housing_data() -> pd.DataFrame:
    client = get_supabase_client()
    # Replace "House" with your actual table name if needed.
    response = client.table("House").select("*").execute()
    data = response.data  # a list of dictionaries
    return pd.DataFrame(data)


Overwriting db.py


Model Script

In [None]:
%%writefile train_model.py
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from db import fetch_housing_data
import joblib

def train_model():
    # 1. Fetch data from Supabase.
    df = fetch_housing_data()
    print("Data loaded from Supabase:")
    print(df.head())

    # 2. Create a binary target from "QualityOfLifeTotalScore".
    # Here, a score above the median is set to 1 (high quality) and otherwise 0.
    threshold = df['QualityOfLifeTotalScore'].median()
    df['target'] = (df['QualityOfLifeTotalScore'] > threshold).astype(int)

    # Input feature selection.
    input_features = ['ListedPrice', 'MeanIncome', 'Bedroom', 'Bathroom', 'Area', '2022 Population']
    target_feature = 'target'

    # Impute missing values.
    X = df[input_features].fillna(df[input_features].median())
    y = df[target_feature]

    # 3. Scale the input features.
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # 4. Split the data into training and testing sets.
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

    # 5. Build the logistic regression model.
    model = tf.keras.Sequential([
        tf.keras.Input(shape=(X_train.shape[1],)),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])

    # 6. Compile the model with binary_crossentropy and accuracy as metric.
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    # 7. Train the model. (Using 10% of training data for validation.)
    model.fit(X_train, y_train, epochs=50, validation_split=0.1)

    # 8. Evaluate the model.
    loss, accuracy = model.evaluate(X_test, y_test)
    print("Test loss:", loss)
    print("Test accuracy:", accuracy)

    # 9. Save the model and scaler.
    model.save("trained_model.h5")
    joblib.dump(scaler, "scaler.pkl")

if __name__ == "__main__":
    train_model()


Writing train_model.py


Add-on

In [None]:
from google.colab import drive
drive.mount('/content/drive')

MessageError: Error: credential propagation was unsuccessful

In [None]:
!pip install matplotlib



Predictions on Model

In [None]:
%%writefile prediction.py
import numpy as np
import tensorflow as tf
import joblib
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from db import fetch_housing_data

# Feature lists must match training
INPUT_FEATURES = ['ListedPrice', 'MeanIncome', 'Bedroom', 'Bathroom', 'Area', '2022 Population']
TARGET_COLUMN = 'QualityOfLifeTotalScore'

def get_user_input():
    """Collects user input, allowing skipping any field."""
    user_input = {}
    prompts = {
        "ListedPrice":     "Enter your desired house price (or press Enter to skip): ",
        "MeanIncome":      "Enter your income (or press Enter to skip): ",
        "Bedroom":         "Enter number of beds (or press Enter to skip): ",
        "Bathroom":        "Enter number of baths (or press Enter to skip): ",
        "Area":            "Enter square footage (or press Enter to skip): ",
        "2022 Population": "Enter desired population (or press Enter to skip): "
    }
    for key, prompt in prompts.items():
        val = input(prompt)
        if val.strip():
            user_input[key] = float(val) if key not in ("Bedroom","Bathroom") else int(val)
    return user_input

def predict_house_quality(user_input):
    """Returns (class, probability)."""
    df = fetch_housing_data()
    medians = df[INPUT_FEATURES].median()
    vector = [ user_input.get(f, medians[f]) for f in INPUT_FEATURES ]
    scaled = scaler.transform([vector])
    prob = model.predict(scaled)[0,0]
    return int(prob > 0.5), prob

def find_top_cities(user_input, df, top_n=5):
    """Return DataFrame of top_n closest cities with distances."""
    X = df[INPUT_FEATURES].fillna(df[INPUT_FEATURES].median())
    X_scaled = scaler.transform(X)
    medians = df[INPUT_FEATURES].median()
    user_vec = np.array([ user_input.get(f, medians[f]) for f in INPUT_FEATURES ]).reshape(1, -1)
    user_scaled = scaler.transform(user_vec)
    distances = np.linalg.norm(X_scaled - user_scaled, axis=1)
    df2 = df.copy()
    df2['distance'] = distances
    top = df2.nsmallest(top_n, 'distance')
    return top[['City','distance']]

# Load model and scaler once
model = tf.keras.models.load_model("trained_model.h5")
scaler = joblib.load("scaler.pkl")

if __name__ == "__main__":
    # 1) get input & predict
    user_input = get_user_input()
    cls, prob = predict_house_quality(user_input)
    print(f"\nPredicted house quality class: {cls} (P={prob:.2f})")

    # 2) fetch data & find top 5 cities
    df = fetch_housing_data()
    top_cities = find_top_cities(user_input, df, top_n=5)
    print("\nTop 5 matching cities:")
    for city, dist in zip(top_cities['City'], top_cities['distance']):
        print(f"  {city}  (distance = {dist:.1f})")

    # 3) bar chart of distances
    plt.bar(top_cities['City'], top_cities['distance'])
    plt.xticks(rotation=45, ha='right')
    plt.ylabel('Euclidean distance')
    plt.title('Top 5 Closest City Matches')
    plt.tight_layout()
    plt.show()

    # 4) overall accuracy on full dataset
    full = fetch_housing_data()
    X_full = full[INPUT_FEATURES].fillna(full[INPUT_FEATURES].median())
    y_true = (full[TARGET_COLUMN] > full[TARGET_COLUMN].median()).astype(int)
    X_scaled_full = scaler.transform(X_full)
    y_pred = (model.predict(X_scaled_full).ravel() > 0.5).astype(int)
    acc = accuracy_score(y_true, y_pred)
    print(f"\nOverall model accuracy (full data): {acc:.3f}")


Overwriting prediction.py


Find closest match

Model Training Protocol

In [None]:
!python train_model.py

2025-04-25 11:24:33.604127: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1745580273.629450   14677 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745580273.636779   14677 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
Data loaded from Supabase:
  State            City  ...  QualityOfLifeSafety     id
0    sd  sioux falls,sd  ...                   29  82672
1    sd  sioux falls,sd  ...                   29  82673
2    sd  sioux falls,sd  ...                   29  82674
3    sd  sioux falls,sd  ...                   29  82675
4    sd  sioux falls,sd  ...                   29  82676

[5 rows x 22 columns]
2025-04-25 11:24:38.699102: E external/local_

In [None]:
!python prediction.py

2025-04-25 11:25:50.195387: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1745580350.234621   16628 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745580350.246446   16628 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-04-25 11:25:56.377272: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)
Enter your desired house price (or press Enter to skip): 3000000
Enter your income (or press Enter to skip): 200000
Enter number of beds (or press Enter to skip): 
Enter number of baths (or press Enter to skip): 
Enter square footage (or press Enter to sk

User Input and Output

In [None]:
import numpy as np
import pandas as pd
from db import fetch_housing_data
import matplotlib.pyplot as plt
from prediction import predict_house_quality, find_closest_match, scaler

# Load the full dataset from Supabase.
df = fetch_housing_data()  # Ensure that df is defined.
print("Data loaded:")
print(df.head())




# 1. Collect user input.
def get_user_input():
    """Collects user input, allowing for skipping some."""
    user_input = {}

    features = {
        "house_price": "Enter your desired house price (or press Enter to skip): ",
        "income": "Enter your income (or press Enter to skip): ",
        "beds": "Enter number of beds (or press Enter to skip): ",
        "baths": "Enter number of baths (or press Enter to skip): ",
        "sq_ft": "Enter square footage (or press Enter to skip): ",
        "population": "Enter desired population (or press Enter to skip): "
    }

    for feature, prompt in features.items():
        while True:
            value = input(prompt)
            if value == "":  # Skip if empty input
                break
            try:
                user_input[feature] = float(value) if feature != "beds" and feature != "baths" else int(value)
                break
            except ValueError:
                print("Invalid input. Please enter a number or press Enter to skip.")

    return user_input


user_input = get_user_input()

# 2. Get the model prediction.
prediction = predict_house_quality(user_input)
print("Predicted house quality class (0 = low, 1 = high):", prediction)

# 3. Reload the full dataset if needed.
df = fetch_housing_data()  # or load from a CSV if applicable.
print("Full dataset loaded (first 5 rows):")
print(df.head(5))

# 4. Find the closest matching record.
closest_match = find_closest_match(user_input, df, scaler)

# 5. Print out details from the best match.
print("\nClosest Matching House Record:")
print("State:", closest_match.get("State", "N/A"))
print("City:", closest_match.get("City", "N/A"))
print("Listed Price:", closest_match.get("ListedPrice", "N/A"))
print("Mean Income:", closest_match.get("MeanIncome", "N/A"))
print("Bedrooms:", closest_match.get("Bedroom", "N/A"))
print("Bathrooms:", closest_match.get("Bathroom", "N/A"))
print("Area:", closest_match.get("Area", "N/A"))
print("2022 Population:", closest_match.get("2022 Population", "N/A"))



Data loaded:
  State      City  Bedroom  Bathroom    Area  ListedPrice Temperature  \
0    ne  omaha,ne      2.0       1.0   968.0        79000      Medium   
1    ne  omaha,ne      4.0       3.0  2420.0       250000      Medium   
2    ne  omaha,ne      3.0       1.0  1388.0       165000      Medium   
3    ne  omaha,ne      NaN       NaN     NaN        15500      Medium   
4    ne  omaha,ne      3.0       2.0  1888.0       170000      Medium   

   2022 Population  2016 Crime Rate  Unemployment  ...  Cost of Living  \
0           586327            0.035          2.73  ...         81031.3   
1           586327            0.035          2.73  ...         81031.3   
2           586327            0.035          2.73  ...         81031.3   
3           586327            0.035          2.73  ...         81031.3   
4           586327            0.035          2.73  ...         81031.3   

   AVG C2I  MeanIncome  QualityOfLifeTotalScore  QualityOfLifeQualityOfLife  \
0    93.91       60544  



Predicted house quality class (0 = low, 1 = high): 1
Full dataset loaded (first 5 rows):
  State          City  Bedroom  Bathroom    Area  ListedPrice Temperature  \
0    wi  milwaukee,wi      4.0       2.0  2014.0        25000        Cold   
1    wi  milwaukee,wi      6.0       2.0  2474.0       289900        Cold   
2    wi  milwaukee,wi      3.0       2.0  1530.0       184900        Cold   
3    wi  milwaukee,wi      3.0       1.0  1300.0       123500        Cold   
4    wi  milwaukee,wi      3.0       2.0  1267.0       179000        Cold   

   2022 Population  2016 Crime Rate  Unemployment  ...  Cost of Living  \
0           918661             0.05          3.82  ...         72420.6   
1           918661             0.05          3.82  ...         72420.6   
2           918661             0.05          3.82  ...         72420.6   
3           918661             0.05          3.82  ...         72420.6   
4           918661             0.05          3.82  ...         72420.6   

   



Evaluate Model

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
import os
import pandas as pd
from dotenv import load_dotenv
from supabase import create_client, Client

def fetch_housing_data() -> pd.DataFrame:
    load_dotenv()  # Make sure your environment variables are set
    SUPABASE_URL = os.getenv("SUPABASE_URL")
    SUPABASE_KEY = os.getenv("SUPABASE_KEY")
    client: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
    response = client.table("House").select("*").execute()
    data = response.data
    return pd.DataFrame(data)


# Import additional metrics from scikit-learn
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# scripts/train_model.py
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf

def train_model():
    # 1. Fetch data
    df = fetch_housing_data()

    # 2. Define features/targets
    input_features = ['ListedPrice', 'MeanIncome', 'Bedroom', 'Bathroom', 'Area', '2022 Population']
    target_features = ['QualityOfLifeTotalScore', 'Cost of Living', '2016 Crime Rate']

    X = df[input_features].fillna(df[input_features].median())
    y = df[target_features].fillna(df[target_features].median())

    # 3. Scale
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # 4. Split
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

    # 5. Build model
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(len(target_features), input_shape=(X_train.shape[1],))
    ])

    # Change: Since there is only one Dense layer (output),
    # use a single loss function and remove loss_weights
    model.compile(optimizer='adam', loss='mean_squared_error')

    # 6. Train
    model.fit(X_train, y_train, epochs=50, validation_split=0.1)

    # 7. Evaluate
    loss = model.evaluate(X_test, y_test)
    print("Test loss:", loss)

    # 8. Save model & scaler if needed
    model.save("trained_model.h5")
    # Optionally pickle the scaler for predictions
    import joblib
    joblib.dump(scaler, "scaler.pkl")

if __name__ == "__main__":
    train_model()

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - loss: 2144239104.0000 - val_loss: 2146055424.0000
Epoch 2/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 2140733696.0000 - val_loss: 2146054400.0000
Epoch 3/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 2165804544.0000 - val_loss: 2146053120.0000
Epoch 4/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 2165979648.0000 - val_loss: 2146052096.0000
Epoch 5/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 2144832384.0000 - val_loss: 2146050688.0000
Epoch 6/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 2144219648.0000 - val_loss: 2146049664.0000
Epoch 7/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 2139578880.0000 - val_loss: 2146048384.0000
Epoch 8/50
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37



Test loss: 2133832704.0
