In [42]:
#Imports
import pandas as pd
import sqlite3
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np
import joblib
import json

In [43]:
# Connect to the database file
conn = sqlite3.connect('../Data/Flask/new_database2.db')
# Create a cursor object to execute SQL queries
cursor = conn.cursor()

In [44]:
# Load data from the SQLite database
query = 'SELECT bed, bath, acre_lot, house_size, sold_previously, city, state, zip_code, price FROM Connecticut'
housing_df = pd.read_sql_query(query, conn)

In [45]:
# Perform one-hot encoding for categorical variables
# housing_df = pd.get_dummies(housing_df, columns=['city', 'zip_code'])
city_encoder = LabelEncoder()
city_df = city_encoder.fit_transform(housing_df['city'])
zip_encoder = LabelEncoder()
zip_df = zip_encoder.fit_transform(housing_df['zip_code'])
city_zip_df = pd.DataFrame({"city_encoded":
    pd.Series(city_df), "zip_encoded": pd.Series(zip_df)})



# Split the data into training and testing sets
X = pd.concat([housing_df[['bed', 'bath', 'acre_lot', 'house_size', 'sold_previously']], city_zip_df], axis=1)
y = housing_df['price']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [46]:
y.shape

(67445,)

In [47]:
# Create a Linear Regression model
model = LinearRegression()

# Train the model on the training data
model.fit(X_train, y_train)

# Make predictions on the test data
y_pred = model.predict(X_test)

# Calculate evaluation metrics
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

print(f"Mean Absolute Error: {mae}")
print(f"Root Mean Squared Error: {rmse}")

Mean Absolute Error: 193352.18247326306
Root Mean Squared Error: 399912.9511614398


In [48]:
# Example input data for prediction
new_data = pd.DataFrame({
    'bed': [4],
    'bath': [3],
    'acre_lot': [2.11],
    'house_size': [2626],
    'sold_previously': [1],
    'city': ['Barkhamsted'],
    'zip_code': ['6063.0']

})

encoded_new_data = city_encoder.transform(new_data['city'])
new_data ['city_encoded'] = encoded_new_data[0]
encoded_new_data = zip_encoder.transform(new_data['zip_code'])
new_data ['zip_encoded'] = encoded_new_data[0]

# Predict the price for the new data
del new_data['zip_code']
del new_data['city']
predicted_price = model.predict(new_data)

print(f"Predicted Price: ${predicted_price[0]}")

Predicted Price: $544895.7850557694


In [49]:

city_encoded = LabelEncoder()
city_encoded.fit(housing_df['city'])
zip_encoded = LabelEncoder()
zip_encoded.fit(housing_df['zip_code'])


joblib.dump(model, "../Data/Flask/PKL/connecticut.pkl")
joblib.dump(city_encoded, "../Data/Flask/PKL/connecticut_CE.pkl")
joblib.dump(zip_encoded, "../Data/Flask/PKL/connecticut_ZE.pkl")

['../Data/Flask/PKL/connecticut_ZE.pkl']

In [50]:
conn.close()

In [51]:
# # Load the model from the saved file
# loaded_model = joblib.load("ML_Models/connecticut_model.pkl")

# # Extract relevant model information
# model_info = {
#     "model_type": "LinearRegression",
#     "coefficients": loaded_model.coef_.tolist(),
#     "intercept": loaded_model.intercept_,
# }

# # Convert the model information to a JSON format
# model_json = json.dumps(model_info, indent=4)

In [52]:
# print(model_json)

{
    "model_type": "LinearRegression",
    "coefficients": [
        -70459.54775916362,
        176553.88672433703,
        18033.920627264746,
        155.27153155230917,
        38066.31715587318,
        -444.02036215977694,
        1369.7057473196944
    ],
    "intercept": -221994.5893453455
}


In [53]:
# # Your model_info goes here
# model_info = {
#     "model_type": "LinearRegression",
#     "coefficients": [        -70459.54775916362,
#         176553.88672433703,
#         18033.920627264746,
#         155.27153155230917,
#         38066.31715587318,
#         -444.02036215977694,
#         1369.7057473196944],
#     "intercept": -221994.5893453455
# }

# # Define a file path for your JSON file
# json_file_path = "../Data/Flask/JSON/connecticut.json"

# # Write the model_info to the JSON file
# with open(json_file_path, "w") as json_file:
#     json.dump(model_info, json_file, indent=4)

# # Confirm that the JSON file has been created
# print(f"JSON file saved as {json_file_path}")