# Import libraries

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

# Import constants, functions and application
import constants
import functions
from application import app

# Import data

In [None]:
# Load the dataset
real_estate_data = pd.read_csv(constants.PATH)

# Display the first few rows of the dataset and the info about the dataset
real_estate_data.head()

In [None]:
#Transform the date column into a datetime type
real_estate_data[constants.date_col] = pd.to_datetime(real_estate_data[constants.date_col])

# Dataset Info

In [None]:
# Display the info about the dataset
real_estate_data.info()

In [None]:
# Descriptive statistics of the dataset
real_estate_data.describe()

# Missing values

In [None]:
# Drop 0.0 values
if constants.drop_zero_vals:
    real_estate_data = real_estate_data[real_estate_data[constants.price_col]!=0]

real_estate_data[15:20]

# Exploratory analysis

In [None]:
# Print histograms for variables
functions.histograms_eda(real_estate_data)

In [None]:
# Print Scatter plots for variables
functions.scatterplots_eda(real_estate_data)

In [None]:
# Print points in the map for variables
functions.scattermap_eda(real_estate_data)

In [None]:
# Print 3D Scatter plot for variables
functions.scatter3d_eda(real_estate_data)

In [None]:
# Print correlation matrix
functions.correlation_matrix_eda(real_estate_data)

# Models

In [None]:
# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = functions.data_sep_train_test(real_estate_data)

In [None]:
#Normalize the data
X_train_scaled, X_test_scaled = functions.data_normalization(X_train, X_test)

### Linear Regression

In [None]:
# Linear regression
model = functions.linear_regression_train(X_train_scaled, y_train)

# Predict values with model
y_pred_lr =  model.predict(X_test_scaled)

# Plot output vs predicted
functions.plot_output_pred(y_test, y_pred_lr)

In [None]:
#R2 en train
r2_score(y_train, model.predict(X_train_scaled))

In [None]:
#R2 en test
r2_score(y_test, y_pred_lr)

# Application



In [None]:
# Run application
app.run_server(debug=True)