In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
from sklearn.linear_model import ElasticNet, LinearRegression, Ridge, Lasso, LogisticRegression
from sklearn.tree import DecisionTreeRegressor, ExtraTreeRegressor
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error, r2_score

In [3]:
train_path = '/home/bilalcelebi/Workspace/notebooks/data/regression/train.csv'
test_path = '/home/bilalcelebi/Workspace/notebooks/data/regression/test.csv'

In [4]:
train_df = pd.read_csv(train_path)
test_df = pd.read_csv(test_path)

In [5]:
train = train_df.copy()

In [6]:
train.drop('Id', axis = 1, inplace = True)

In [7]:
train_na_cols = train.isna().sum()
train_na_cols = train_na_cols[train_na_cols.values > 0].index.to_list()

In [8]:
train.drop(train_na_cols, axis = 1, inplace = True)

In [9]:
encoder = LabelEncoder()
cat_columns = [column for column in train.columns if train[column].dtype == 'object']

for col in cat_columns:
    train[col] = encoder.fit_transform(train[col])

In [10]:
X = train.drop('SalePrice', axis = 1)
y = train['SalePrice']

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X,y, random_state=125, test_size=0.3, shuffle=True)

In [12]:
models_data = dict()
models = {
    'LinearRegression':LinearRegression(),
    'LogisticRegression':LogisticRegression(),
    'ElasticNet':ElasticNet(),
    'Ridge':Ridge(),
    'Lasso':Lasso(),
    'DecisionTreeRegressor':DecisionTreeRegressor(),
    'ExtraTreeRegressor':ExtraTreeRegressor()
}

In [13]:
def get_model(x_train,y_train,x_test,y_test, model_name):
    
    model = models[model_name]
    model.fit(x_train,y_train)
    
    preds = model.predict(x_test)
    
    mse = mean_squared_error(preds, y_test)
    mse = np.sqrt(mse)
    r2 = r2_score(preds, y_test)
    
    scores = {'MSE':mse, 'R2':r2}
    
    return scores

In [14]:
for model_name in models.keys():
    
    models_data[model_name] = get_model(X_train,y_train,X_test,y_test,model_name)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  model = cd_fast.enet_coordinate_descent(


In [15]:
models_df = pd.DataFrame(models_data)
models_df

Unnamed: 0,LinearRegression,LogisticRegression,ElasticNet,Ridge,Lasso,DecisionTreeRegressor,ExtraTreeRegressor
MSE,32764.296168,64102.517632,32899.600504,32783.934758,32763.497764,40095.613071,39727.632981
R2,0.786861,-0.420717,0.759948,0.78647,0.78685,0.78357,0.770993


In [16]:
test_df.drop(train_na_cols, axis = 1, inplace = True)

In [17]:
test_df.drop('Id', axis = 1, inplace = True)

In [18]:
test_df.columns = [column for column in test_df.columns if column in X_train.columns]
test_cat_cols = [column for column in test_df.columns if test_df[column].dtype == 'object']

for column in test_cat_cols:   
    test_df[column] = encoder.fit_transform(test_df[column])

In [19]:
selected = Lasso()
selected.fit(X_train, y_train)

In [20]:
test_df.dropna(inplace = True)

In [21]:
test_preds = selected.predict(test_df)

In [22]:
test_df['SalePrice'] = test_preds.round(1)

In [23]:
test_df

Unnamed: 0,MSSubClass,MSZoning,LotArea,Street,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,...,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,20,2,11622,1,3,3,0,4,0,12,...,0,0,120,0,0,6,2010,8,4,112104.3
1,20,3,14267,1,0,3,0,0,0,12,...,0,0,0,0,12500,6,2010,8,4,158369.7
2,60,3,13830,1,0,3,0,4,0,8,...,0,0,0,0,0,3,2010,8,4,162236.2
3,60,3,9978,1,0,3,0,4,0,8,...,0,0,0,0,0,6,2010,8,4,196231.2
4,120,3,5005,1,0,1,0,4,0,22,...,0,0,144,0,0,1,2010,8,4,193930.7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1454,160,4,1936,1,3,3,0,4,0,10,...,0,0,0,0,0,6,2006,8,4,60239.6
1455,160,4,1894,1,3,3,0,4,0,10,...,0,0,0,0,0,4,2006,8,0,53429.3
1456,20,3,20000,1,3,3,0,4,0,11,...,0,0,0,0,0,9,2006,8,0,157794.7
1457,85,3,10441,1,3,3,0,4,0,11,...,0,0,0,0,700,7,2006,8,4,106011.2
