# BOSTON HOUSE PRICING

In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')  #importamos librerias

ModuleNotFoundError: No module named 'pandas'

## Lets load the Boston House Pricing Dataset

In [None]:
from sklearn.datasets import fetch_openml

boston = fetch_openml(name='boston')          #no me permitia importarlo, se hizo de esta manera
data = boston['data']
target = boston['target']



In [None]:
boston.keys()

In [None]:
## Lets check the description of the dataset
boston = fetch_openml(name='boston')


print(boston['DESCR'])

#observamos la descripcion

In [None]:
print(boston.data)

In [None]:
print(boston.data) #visualizmos la data

In [None]:
print(boston.target)

In [None]:
print(boston.feature_names)

## Preparing The Dataset

In [None]:
dataset = pd.DataFrame(boston.data,columns=boston.feature_names)

In [None]:
dataset.head()

In [None]:
dataset['Price']=boston.target

In [None]:
dataset.head()

In [None]:
dataset.info()

In [None]:
## Summarizing The Stats of the data
dataset.describe()

In [None]:
## Check the missing Values
dataset.isnull().sum()

In [None]:
### EXploratory Data Analysis
## Correlation
dataset.corr()

In [None]:
import seaborn as sns
sns.pairplot(dataset)

## Analyzing The Correlated Features

In [None]:
dataset.corr()

In [None]:
plt.scatter(dataset['CRIM'],dataset['Price'])
plt.xlabel("Crime Rate")                        #sugiere que los precios de las casas tienden a disminuir a medida que aumenta la tasa de criminalidad en el área.
plt.ylabel("Price")

In [None]:
plt.scatter(dataset['RM'],dataset['Price'])
plt.xlabel("RM") # average number of rooms per dwelling
plt.ylabel("Price")   #a medida que el número promedio de habitaciones por vivienda aumenta, el precio medio de las viviendas también aumenta

In [None]:
import seaborn as sns
sns.regplot(x="RM",y="Price",data=dataset)

In [None]:
sns.regplot(x="LSTAT",y="Price",data=dataset) # % lower status of the population   A medida que el porcentaje de la población de "estatus más bajo" aumenta, el precio de la vivienda disminuye

In [None]:
sns.barplot(x=dataset['RAD'], y=dataset['Price'])
plt.xlabel("RAD")
plt.ylabel("Price") #Esto sugiere que la accesibilidad a las autopistas radiales puede ser un
                      #factor importante en la determinación del valor de la propiedad en Boston.

In [None]:
sns.regplot(x="PTRATIO",y="Price",data=dataset) #pupil-teacher ratio by town

In [None]:
## Independent and Dependent features

X = dataset.iloc[:,:-1]   # todas las variables menos Precio, estas independientes
y = dataset.iloc[:,-1]    #Solo precio en esta caso la depnditente

In [None]:
X.head() #mostramos los X

In [None]:
y #show y

In [None]:
##Train Test Split
from sklearn.model_selection import train_test_split   #en este caso entrenamos con train al 30 y el otro 70 para las pruevavs, con 42 para que salga el valor siempre
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=42)

In [None]:
X_train.head(3) #mostrar los primeros 3

In [None]:
X_test.head(3)

In [None]:
## Standardize the dataset
from sklearn.preprocessing import StandardScaler   #escala los datos para que esten en la misma escala y distribucion
scaler=StandardScaler()

In [None]:
X_train_scalado =scaler.fit_transform(X_train) # dar una media de cero y desviacion en 1 asi mejorar el rendimiento dle modelo

In [None]:
X_test_scalado =scaler.transform(X_test) #dar normalizacion

In [None]:
import pickle
pickle.dump(scaler,open('scaling.pkl','wb')) #guardar los datos

In [None]:
X_train

In [None]:
X_test

## Model Training

In [None]:
from sklearn.linear_model import LinearRegression #importamos para un modelo lineal

In [None]:
regression = LinearRegression()

In [None]:
regression.fit(X_train,y_train) #en este caso podemos usarlo para hacer predcciones 

In [None]:
## print the coefficients and the intercept
print(regression.coef_) #en este caso vemos las variables independientes si es positivo o negativo indicando asi su cambio

In [None]:
print(regression.intercept_) #el valor de la casa cuando todo los otros valores son 0

In [None]:
## on which parameters the model has been trained
regression.get_params()

In [None]:
print(X_test.shape)

In [None]:
print(X_train.shape)

In [None]:
### Prediction With Test Data
reg_pred = regression.predict(X_test)

In [None]:
reg_pred

## Assumptions

In [None]:
## plot a scatter plot for the prediction
plt.scatter(y_test,reg_pred)

In [None]:
## Residuals
residuals=y_test-reg_pred

In [None]:
residuals

In [None]:
## Plot this residuals 

sns.displot(residuals,kind="kde")

In [None]:
## Scatter plot with respect to prediction and residuals
## uniform distribution
plt.scatter(reg_pred,residuals)

In [None]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

print(mean_absolute_error(y_test,reg_pred))
print(mean_squared_error(y_test,reg_pred))
print(np.sqrt(mean_squared_error(y_test,reg_pred)))

## R square and adjusted R square


Formula

**R^2 = 1 - SSR/SST**


R^2	=	coefficient of determination
SSR	=	sum of squares of residuals
SST	=	total sum of squares


In [None]:
from sklearn.metrics import r2_score
score=r2_score(y_test,reg_pred)
print(score)

**Adjusted R2 = 1 – [(1-R2)*(n-1)/(n-k-1)]**

where:

R2: The R2 of the model
n: The number of observations
k: The number of predictor variables

In [None]:
#display adjusted R-squared
1 - (1-score)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1)

## New Data Prediction

In [None]:
boston.data[0].reshape(1,-1)

In [None]:
##transformation of new data
scaler.transform(boston.data[0].reshape(1,-1))

In [None]:
regression.predict(scaler.transform(boston.data[0].reshape(1,-1)))

In [None]:
dataset.head(1).to_json(orient = 'columns') 


## Pickling The Model file For Deployment

In [None]:
import pickle

In [None]:
pickle.dump(regression,open('regmodel.pkl','wb')) # guardamos el modelo

In [None]:
pickled_model = pickle.load(open('regmodel.pkl','rb')) # cargando el modelo

In [None]:
## Prediction
pickled_model.predict(scaler.transform(boston.data[0].reshape(1,-1)))

In [None]:
import os

os.listdir('.')

In [None]:
from google.colab import files
files.download('regmodel.pkl')