In [1]:
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input

# Load your dataset
data = pd.read_csv("allcities.csv")

# Features and target
X = data.drop(columns=['price'])
y = data['price']

# Preprocessing pipeline
numeric_features = ['bedroom', 'bathroom', 'area']
numeric_transformer = StandardScaler()

categorical_features = ['seller_type', 'layout_type', 'property_type', 'locality', 'furnish_type', 'city']
categorical_transformer = OneHotEncoder(handle_unknown='ignore')

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# Preprocess the dataset
X = preprocessor.fit_transform(X)

# Function to create a neural network
def create_neural_network(input_dim):
    model = Sequential()
    model.add(Input(shape=(input_dim,)))  # Input layer
    model.add(Dense(128, activation='relu'))  # Hidden layer
    model.add(Dense(64, activation='relu'))  # Hidden layer
    model.add(Dense(1))  # Output layer
    model.compile(optimizer='adam', loss='mse')  # Compile the model
    return model

# Cross-validation setup
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

rmse_scores = []
mae_scores = []
r2_scores = []

# Perform KFold cross-validation
for train_index, test_index in kfold.split(X):
    # Split data into training and test sets for the current fold
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    
    # Build the model
    model = create_neural_network(X_train.shape[1])
    
    # Train the model
    model.fit(X_train, y_train, epochs=50, batch_size=32, verbose=0)
    
    # Predict on test set
    y_pred = model.predict(X_test).flatten()
    
    # Calculate evaluation metrics
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    # Store the metrics for this fold
    rmse_scores.append(rmse)
    mae_scores.append(mae)
    r2_scores.append(r2)

# Calculate average metrics across all folds
avg_rmse = np.mean(rmse_scores)
avg_mae = np.mean(mae_scores)
avg_r2 = np.mean(r2_scores)

# Print the average metrics
print(f"Average RMSE: {avg_rmse}")
print(f"Average MAE: {avg_mae}")
print(f"Average R^2: {avg_r2}")


[1m1207/1207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m1207/1207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m1207/1207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m1207/1207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 969us/step
[1m1207/1207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
Average RMSE: 34225.27107347763
Average MAE: 9765.856799846024
Average R^2: 0.8613215212473211
