<a href="https://colab.research.google.com/github/apekshamehta/machine-learning-examples/blob/main/linear_regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_absolute_error
from sklearn.model_selection import train_test_split

In [None]:
# 1. Load the data
data = pd.read_csv("nyc_real_estate_dummy.csv")  # use the downloaded file path

In [None]:
# Quick look
print(data.head())

    latitude  longitude  size_sqft    price
0  40.699816 -74.066690       1593  1214831
1  40.930286 -73.906145       1252  1019454
2  40.842798 -73.757174        512   489401
3  40.789463 -73.820499       2324  1751381
4  40.612407 -73.787047        619   429090


In [None]:
# 2. Select features (X) and target (y)
feature_cols = ["latitude", "longitude", "size_sqft"]
X = data[feature_cols]
y = data["price"]

In [None]:
# 3. Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [None]:
# 4. Define and train the Linear Regression model
model = LinearRegression()
model.fit(X_train, y_train)

In [None]:
# 5. Evaluate the model
y_pred = model.predict(X_test)

In [None]:
r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

In [None]:
print(f"R² score: {r2:.3f}")
print(f"Mean Absolute Error: ${mae:,.2f}")

R² score: 0.992
Mean Absolute Error: $42,082.22


In [None]:
# 6. Inspect learned coefficients
coef_df = pd.DataFrame({
    "feature": feature_cols,
    "coefficient": model.coef_
})
print("\nLearned coefficients:")
print(coef_df)
print(f"\nIntercept: {model.intercept_:.2f}")

# 7. Example: Predict price for a new property
new_property = pd.DataFrame([{
    "latitude": 40.75,    # midtown-ish
    "longitude": -73.98,  # midtown-ish
    "size_sqft": 1200
}])

predicted_price = model.predict(new_property)[0]
print(f"\nPredicted price for new property: ${predicted_price:,.2f}")


Learned coefficients:
     feature    coefficient
0   latitude  167797.822889
1  longitude -209213.265244
2  size_sqft     801.018174

Intercept: -22320912.23

Predicted price for new property: $955,668.23
