<a href="https://colab.research.google.com/github/anneboysen/Tensorflow_ANN_homeprice_predictions/blob/main/Tensorslow_Home_value_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import gspread
from google.colab import auth
from google.auth import default
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
from google.colab import drive

# Mount Google Drive to save output
drive.mount('/content/drive')

# Authenticate and set up the gspread client
auth.authenticate_user()
creds, _ = default()
gc = gspread.authorize(creds)

# Load data from Google Sheets
spreadsheet_key = 'Your_dataset'
worksheet_name = 'worksheet'
worksheet = gc.open_by_key(spreadsheet_key).worksheet(worksheet_name)
data = worksheet.get_all_values()
headers = data.pop(0)
original_df = pd.DataFrame(data, columns=headers).apply(pd.to_numeric, errors='coerce').fillna(0)
original_df.replace([np.inf, -np.inf], np.nan, inplace=True)
original_df.dropna(inplace=True)

# Print column names to inspect
print("Column names in the DataFrame:")
print(original_df.columns)

# Define features and target variable. Make sure it matches the column name
target_column = '$SqFt'

# Define columns to exclude from features. Replace with your actual columns to exclude.
columns_to_exclude = [target_column] # Exclude the target column from features
# Add other columns to exclude if necessary, e.g., ['Column1', 'Column2', target_column]

# Uncomment and execute the following lines to define features and target
features_df = original_df.drop(columns=columns_to_exclude, errors='ignore')
features_scaled = StandardScaler().fit_transform(features_df.values)
target = original_df[target_column].values

# Define K-Fold Cross Validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Cross-validation scores
cv_mse_scores = []
cv_r2_scores = []
mse_per_epoch = []

for train_index, test_index in kf.split(features_scaled, target):
    X_train, X_test = features_scaled[train_index], features_scaled[test_index]
    y_train, y_test = target[train_index], target[test_index]

    # Model definition
    model = Sequential([
        Dense(64, activation='relu', input_shape=(X_train.shape[1],), kernel_regularizer='l2'),
        Dropout(0.3),
        Dense(32, activation='relu', kernel_regularizer='l2'),
        Dropout(0.3),
        Dense(1, activation='linear')
    ])

    # Compile model with custom Adam optimizer
    custom_adam = Adam(learning_rate=0.001)
    model.compile(optimizer=custom_adam, loss='mean_squared_error', metrics=['mean_absolute_error'])

    # Train the model and store MSE for each epoch
    history = model.fit(X_train, y_train, epochs=200, batch_size=10, verbose=1, validation_split=0.2)
    mse_per_epoch.append(history.history['val_loss'])

    # Evaluate the model on the test set
    mse_score = model.evaluate(X_test, y_test, verbose=0)[0]
    cv_mse_scores.append(mse_score)

    # Predictions for R^2 score
    predictions = model.predict(X_test).flatten()
    r2_score_val = r2_score(y_test, predictions)
    cv_r2_scores.append(r2_score_val)

# Print Cross-validation scores
print(f"Cross-validation MSE scores: {cv_mse_scores}")
print(f"Average Cross-validation MSE: {np.mean(cv_mse_scores)}")
print(f"Cross-validation R^2 scores: {cv_r2_scores}")
print(f"Average Cross-validation R^2: {np.mean(cv_r2_scores)}")

# Plotting MSE per epoch for each fold
plt.figure(figsize=(12, 8))
for i, mse in enumerate(mse_per_epoch):
    plt.plot(mse, label=f'Fold {i+1}')
plt.xlabel('Epoch')
plt.ylabel('Mean Squared Error')
plt.title('Validation MSE per Epoch for Each Fold')
plt.legend()
plt.grid(True)
plt.show()

# Retrain final model on the full dataset
final_model = Sequential([
    Dense(64, activation='relu', input_shape=(features_scaled.shape[1],), kernel_regularizer='l2'),
    Dropout(0.3),
    Dense(32, activation='relu', kernel_regularizer='l2'),
    Dropout(0.3),
    Dense(1, activation='linear')
])

final_model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')
final_model.fit(features_scaled, target, epochs=175, batch_size=5, verbose=1)

# Append predictions to the DataFrame
original_df['Predicted_Values'] = final_model.predict(features_scaled).flatten()

# Save the DataFrame with predictions to Google Drive
file_path = '/content/drive/My Drive/Your_output_set.csv'
original_df.to_csv(file_path, index=False)
print(f"Predictions saved to: {file_path}")