In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [2]:
# 1. Load the dataset
# Replace 'groceries.csv' with the path to your grocery dataset file
data = pd.read_csv('housing_prices.csv')

In [3]:
# Display the first few rows of the dataset
print("First few rows of the dataset:")
print(data.head())

First few rows of the dataset:
   Sale_Price  No of Bedrooms  No of Bathrooms  Flat Area (in Sqft)  \
0    221900.0               3             1.00               1180.0   
1    538000.0               3             2.25               2570.0   
2    180000.0               2             1.00                770.0   
3    604000.0               4             3.00               1960.0   
4    510000.0               3             2.00               1680.0   

   Lot Area (in Sqft)  No of Floors  No of Times Visited  Overall Grade  \
0              5650.0           1.0                    0              7   
1              7242.0           2.0                    0              7   
2             10000.0           1.0                    0              6   
3              5000.0           1.0                    0              7   
4              8080.0           1.0                    0              8   

   Area of the House from Basement (in Sqft)  Basement Area (in Sqft)  ...  \
0            

In [4]:
# 2. Preprocess the Data
# Create the target variable (1 for above average, 0 for below average)
average_price = data['Sale_Price'].mean()
data['price_category'] = np.where(data['Sale_Price'] > average_price, 1, 0)  # 1 for above average, 0 for below average

In [5]:
# Features and target variable
X = data.drop(['Sale_Price', 'price_category'], axis=1)  # Drop the Sale_Price and target column
y = data['price_category']

In [6]:
# Convert categorical variables to dummy/indicator variables if necessary
X = pd.get_dummies(X)

In [7]:
# 3. Split the Data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
# 4. Scale the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [9]:
# 5. Create the ANN
input_dim = X_train.shape[1]  # Number of features
model = keras.Sequential([
    layers.Dense(64, activation='relu', input_shape=(input_dim,)),  # First layer with ReLU activation
    layers.Dense(32, activation='relu'),  # Second layer with ReLU activation
    layers.Dense(1, activation='sigmoid')  # Output layer with Sigmoid activation
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [10]:
# 6. Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [11]:
# 7. Train the model
model.fit(X_train, y_train, epochs=40, batch_size=32, verbose=1)

Epoch 1/40
[1m541/541[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.8457 - loss: 0.3457
Epoch 2/40
[1m541/541[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9055 - loss: 0.2207
Epoch 3/40
[1m541/541[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9052 - loss: 0.2239
Epoch 4/40
[1m541/541[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9100 - loss: 0.2073
Epoch 5/40
[1m541/541[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9108 - loss: 0.2071
Epoch 6/40
[1m541/541[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9123 - loss: 0.2043
Epoch 7/40
[1m541/541[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9165 - loss: 0.1953
Epoch 8/40
[1m541/541[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9146 - loss: 0.1990
Epoch 9/40
[1m541/541[0m [32m━━━━━━━━

<keras.src.callbacks.history.History at 0x1b14311e050>

In [12]:
# 8. Evaluate the model
y_pred = (model.predict(X_test) > 0.5).astype("int32")  # Predictions
print("\nAccuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

[1m136/136[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step

Accuracy: 0.9162424803331791
              precision    recall  f1-score   support

           0       0.92      0.93      0.93      2544
           1       0.91      0.89      0.90      1778

    accuracy                           0.92      4322
   macro avg       0.91      0.91      0.91      4322
weighted avg       0.92      0.92      0.92      4322



In [13]:
#q2

In [14]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

In [15]:
# Load the dataset (Replace with the path to your dataset)
url = "https://raw.githubusercontent.com/selva86/datasets/master/BostonHousing.csv"
data = pd.read_csv(url)

In [16]:
# Display the first few rows of the dataset
print("First few rows of the dataset:")
print(data.head())

First few rows of the dataset:
      crim    zn  indus  chas    nox     rm   age     dis  rad  tax  ptratio  \
0  0.00632  18.0   2.31     0  0.538  6.575  65.2  4.0900    1  296     15.3   
1  0.02731   0.0   7.07     0  0.469  6.421  78.9  4.9671    2  242     17.8   
2  0.02729   0.0   7.07     0  0.469  7.185  61.1  4.9671    2  242     17.8   
3  0.03237   0.0   2.18     0  0.458  6.998  45.8  6.0622    3  222     18.7   
4  0.06905   0.0   2.18     0  0.458  7.147  54.2  6.0622    3  222     18.7   

        b  lstat  medv  
0  396.90   4.98  24.0  
1  396.90   9.14  21.6  
2  392.83   4.03  34.7  
3  394.63   2.94  33.4  
4  396.90   5.33  36.2  


In [17]:
# Step 1: Preprocessing - Check for null values
print("\nChecking for null values in each column:")
print(data.isnull().sum())


Checking for null values in each column:
crim       0
zn         0
indus      0
chas       0
nox        0
rm         0
age        0
dis        0
rad        0
tax        0
ptratio    0
b          0
lstat      0
medv       0
dtype: int64


In [18]:
# Step 2: Features and Target Variables
# Features (predictors)
X = data.drop('medv', axis=1)  # 'medv' is the target (house price)
# Target (House Price)
y = data['medv']

In [19]:
# Step 3: Split the data into training and testing sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [20]:
# Step 4: Standardize the features (important for some models but not strictly necessary for linear regression)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [21]:
# Step 5: Train the Multiple Linear Regression model
model = LinearRegression()
model.fit(X_train_scaled, y_train)

In [22]:
# Step 6: Make predictions on the test data
y_pred = model.predict(X_test_scaled)

In [23]:
# Step 7: Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print("\nModel Performance:")
print(f"Mean Squared Error: {mse}")
print(f"R^2 Score: {r2}")


Model Performance:
Mean Squared Error: 24.291119474973495
R^2 Score: 0.6687594935356322


In [24]:
# Step 8: Display the predicted vs actual values
results = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
print("\nPredicted vs Actual values:")
print(results.head())


Predicted vs Actual values:
     Actual  Predicted
173    23.6  28.996724
274    32.4  36.025565
491    13.6  14.816944
72     22.8  25.031979
452    16.1  18.769880
