In [5]:
import pandas as pd
import numpy as np


df = pd.read_csv('house-prices.csv')
df.head()

Unnamed: 0,Home,Price,SqFt,Bedrooms,Bathrooms,Offers,Brick,Neighborhood
0,1,114300,1790,2,2,2,No,East
1,2,114200,2030,4,2,3,No,East
2,3,114800,1740,3,2,1,No,East
3,4,94700,1980,3,2,3,No,East
4,5,119800,2130,3,3,3,No,East


In [6]:
df = df.drop(columns=['Bedrooms','Bathrooms','Brick','Neighborhood'],axis=1)

In [14]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error


# Extract features and target
X = df[['SqFt', 'Offers']]
y = df['Price']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and fit the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict output for test set
predicted_output = model.predict(X_test)

# Create a DataFrame with predicted and actual outputs
output_df = pd.DataFrame({
    'Predicted_Output': predicted_output,
    'Actual_Output': y_test.values
})

print(output_df)


    Predicted_Output  Actual_Output
0      111108.876044         125700
1      110072.045470         106600
2      130773.362458         167200
3      131756.586779         112300
4      159233.261505         145500
5      142625.660560         140900
6      131810.193032         165600
7      100400.621023          82300
8      152404.297513         184300
9      123944.398467         137000
10     138692.763277         160600
11     109142.427403          99300
12     124874.016534         130300
13     136726.314636         119800
14     167206.268577         133300
15     123944.398467         115700
16     126840.465175         117500
17     127930.902002         143400
18     145521.727269         113500
19      76749.631073          90500
20     123837.185960         111400
21     126894.071429         132500
22     119028.276863         150200
23     125857.240855         123000
24     161360.528906         146900
25     105263.136373         103200


In [15]:
# Create a function to compute the confusion matrix
def compute_confusion_matrix(predicted, actual):
    true_positives = 0
    true_negatives = 0
    false_positives = 0
    false_negatives = 0

    for pred, act in zip(predicted, actual):
        if pred == 1 and act == 1:
            true_positives += 1
        elif pred == 0 and act == 0:
            true_negatives += 1
        elif pred == 1 and act == 0:
            false_positives += 1
        elif pred == 0 and act == 1:
            false_negatives += 1

    return true_positives, true_negatives, false_positives, false_negatives

# Assuming 1 indicates positive and 0 indicates negative in the outputs
predicted_output_binary = (output_df['Predicted_Output'] > 0).astype(int)
actual_output_binary = (output_df['Actual_Output'] > 0).astype(int)

# Compute the confusion matrix
tp, tn, fp, fn = compute_confusion_matrix(predicted_output_binary, actual_output_binary)

# Display the confusion matrix
confusion_matrix = pd.DataFrame({
    'Predicted Positive': [tp, fp],
    'Predicted Negative': [fn, tn]
}, index=['Actual Positive', 'Actual Negative'])

print(confusion_matrix)


                 Predicted Positive  Predicted Negative
Actual Positive                  26                   0
Actual Negative                   0                   0


In [16]:
# Calculate Accuracy
accuracy = (tp + tn) / (tp + fp + fn + tn)

# Calculate Precision
precision = tp / (tp + fp) if tp + fp > 0 else 0  # Handle division by zero

# Calculate Recall
recall = tp / (tp + fn) if tp + fn > 0 else 0  # Handle division by zero

# Calculate F1 Score
f1_score = 2 * (precision * recall) / (precision + recall) if precision + recall > 0 else 0  # Handle division by zero

# Display the metrics
metrics = pd.DataFrame({
    'Metric': ['Accuracy', 'Precision', 'Recall', 'F1 Score'],
    'Value': [accuracy, precision, recall, f1_score]
})

print(metrics)


      Metric  Value
0   Accuracy    1.0
1  Precision    1.0
2     Recall    1.0
3   F1 Score    1.0
