In [1]:
import warnings
warnings.filterwarnings("ignore")
import numpy as np
import pandas as pd
#import geopandas as gpd
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf
from sklearn.linear_model import LinearRegression, Lasso, LassoCV, Ridge, RidgeCV, ElasticNet, ElasticNetCV
from sklearn.model_selection import train_test_split, KFold, cross_val_score


pd.set_option('display.float_format', lambda x: '%.2f' % x)
df = pd.read_csv('df2.csv')

In [2]:
df['rooms2'] = df.rooms**2
df['rooms3'] = df.rooms**3
df['rooms4'] = df.rooms**4


In [3]:
def findAlphaLasso(X,y,randomState=53,tries=2,alphaFrom=0.000001,alphaTo=10,steps=10000):
    kf = KFold(n_splits=5, shuffle=True, random_state=randomState)
    step_value=(alphaTo-alphaFrom)/steps
    print("Lasso, try from {} to {}:".format(alphaFrom,alphaTo))
    prevAlphaTest=0
    for i in range(0,tries):
        al_lasso = np.linspace(alphaFrom, alphaTo, steps)
        lm_lasso_cv= LassoCV(alphas=al_lasso, cv=kf, normalize=False)
        lm_lasso_cv.fit(X, y)
        prevAlphaTest=lm_lasso_cv.alpha_
        alphaFrom = max(prevAlphaTest - step_value*2,0.000000000000001)
        alphaTo = prevAlphaTest   + step_value*2
        step_value=(alphaTo-alphaFrom)/steps
        print("intento {} Alpha Value {}. Next try from {} to {} ".format(i+1,lm_lasso_cv.alpha_,alphaFrom,alphaTo))
    print("Alpha Founded {}".format(lm_lasso_cv.alpha_))
    return lm_lasso_cv

def findAlphaRidge(X,y,randomState=53,tries=2,alphaFrom=0.000001,alphaTo=10,steps=1000):
    kf = KFold(n_splits=5, shuffle=True, random_state=randomState)
    step_value=(alphaTo-alphaFrom)/steps
    print("Ridge, try from {} to {}:".format(alphaFrom,alphaTo))
    prevAlphaTest=0
    for i in range(0,tries):
        al_ridge = np.linspace(alphaFrom, alphaTo, steps)
        lm_ridge_cv= RidgeCV(alphas=al_ridge, cv=kf, normalize=False)
        lm_ridge_cv.fit(X, y)
        prevAlphaTest=lm_ridge_cv.alpha_
        alphaFrom = max(prevAlphaTest - step_value*2,0.000000000000001)
        alphaTo = prevAlphaTest   + step_value*2
        step_value=(alphaTo-alphaFrom)/steps
        print("intento {} Alpha Value {}. Next try from {} to {} ".format(i+1,lm_ridge_cv.alpha_,alphaFrom,alphaTo))
    print("Alpha Founded {}".format(lm_ridge_cv.alpha_))
    return lm_ridge_cv


def findAlphaElasticNet(X,y,randomState=53,tries=2,alphaFrom=0.000001,alphaTo=10,steps=1000):
    kf = KFold(n_splits=5, shuffle=True, random_state=randomState)
    step_value=(alphaTo-alphaFrom)/steps
    print("ElasticNet, try from {} to {}:".format(alphaFrom,alphaTo))
    prevAlphaTest=0
    for i in range(0,tries):
        al_elastic = np.linspace(alphaFrom, alphaTo, steps)
        modelCV= ElasticNetCV(alphas=al_elastic, cv=kf, normalize=False)
        modelCV.fit(X, y)
        prevAlphaTest=modelCV.alpha_
        alphaFrom = max(prevAlphaTest - step_value*2,0.000000000000001)
        alphaTo = prevAlphaTest   + step_value*2
        step_value=(alphaTo-alphaFrom)/steps
        print("intento {} Alpha Value {}. Next try from {} to {} ".format(i+1,modelCV.alpha_,alphaFrom,alphaTo))
    print("Alpha Founded {}".format(modelCV.alpha_))
    return modelCV

def splitTest(df,provinciaFilter="",randomStates=53):
    columns=['log_price_usd_per_m2','rooms','rooms2','rooms3','rooms4','surface_total_in_m2','property_type','localidad',
    'Seguridad','Amenities','Cochera','Estrenar','Gimnasio','Lavadero','Parrilla','Pileta','SUM']
    if(provinciaFilter==""):
        X = df.loc[:,columns]
    else:
        X = df.loc[df.provincia==provinciaFilter,columns]    
    X = X.dropna()
    X = pd.get_dummies(X.property_type, drop_first=1).join(X).drop('property_type', axis=1)
    X = pd.get_dummies(X.localidad, drop_first=1).join(X).drop('localidad', axis=1)
    y = X['log_price_usd_per_m2']
    X = X.drop('log_price_usd_per_m2', axis=1)
    return train_test_split(X, y, test_size=0.30, random_state=randomStates)

def scoreByModel(model,X_train,X_test,y_train,y_test):
    print("Score Train : %.4f " %  model.score(X_train, y_train))
    print("Score Test : %.4f " %  model.score(X_test, y_test))

In [4]:
# ANALISIS FILTRADO POR CABA
X_caba_train,X_caba_test,y_caba_train,y_caba_test = splitTest(df,provinciaFilter="Capital Federal")
print("CABA CON LASSO:")
#caba_lasso=findAlphaLasso(X_caba_train, y_caba_train)
caba_lasso = Lasso(alpha=5.5633477896767367e-05)
caba_lasso.fit(X_caba_train,y_caba_train)
scoreByModel(caba_lasso,X_caba_train,X_caba_test,y_caba_train,y_caba_test)

CABA CON LASSO:
Score Train : 0.5304 
Score Test : 0.5328 


In [5]:
print("CABA CON RIDGE:")
#caba_ridge=findAlphaRidge(X_caba_train, y_caba_train)
caba_ridge = Ridge(alpha=2.4119281585820747)
caba_ridge.fit(X_caba_train,y_caba_train)
scoreByModel(caba_ridge,X_caba_train,X_caba_test,y_caba_train,y_caba_test)

CABA CON RIDGE:
Score Train : 0.5305 
Score Test : 0.5326 


In [6]:
print("CABA CON ELASTIC NET:")
#caba_elastic=findAlphaElasticNet(X_caba_train, y_caba_train)
caba_elastic = ElasticNet(alpha=0.0001001050950960901)
caba_elastic.fit(X_caba_train,y_caba_train)
scoreByModel(caba_elastic,X_caba_train,X_caba_test,y_caba_train,y_caba_test)

CABA CON ELASTIC NET:
Score Train : 0.5304 
Score Test : 0.5328 


In [7]:
# ANALISIS TODO ARGENTINA
X_arg_train,X_arg_test,y_arg_train,y_arg_test = splitTest(df,provinciaFilter="")
print("ARG CON LASSO:")
#arg_lasso=findAlphaLasso(X_arg_train, y_arg_train)
arg_lasso = Lasso(alpha=4.9429758144795265e-05)
arg_lasso.fit(X_arg_train,y_arg_train)
scoreByModel(arg_lasso,X_arg_train,X_arg_test,y_arg_train,y_arg_test)

ARG CON LASSO:
Score Train : 0.6277 
Score Test : 0.5086 


In [12]:
print("ARG CON RIDGE:")
#arg_ridge=findAlphaRidge(X_arg_train, y_arg_train)
arg_ridge = Ridge(alpha=0.09811910830630631)
arg_ridge.fit(X_arg_train,y_arg_train)
scoreByModel(arg_ridge,X_arg_train,X_arg_test,y_arg_train,y_arg_test)

ARG CON RIDGE:
Score Train : 0.6326 
Score Test : 0.5247 


In [9]:
print("ARG CON ELASTIC NET:")
#arg_elastic=findAlphaElasticNet(X_arg_train, y_arg_train)
arg_elastic = ElasticNet(alpha=2.002101902001802e-05)
arg_elastic.fit(X_arg_train,y_arg_train)
scoreByModel(arg_elastic,X_arg_train,X_arg_test,y_arg_train,y_arg_test)

ARG CON ELASTIC NET:
Score Train : 0.6308 
Score Test : 0.5536 


#### -----------------------------