In [1]:
import pandas as pd
from numpy import *
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [2]:
train = pd.read_csv('../input/house-prices-advanced-regression-techniques/train.csv')

In [3]:
train.head()

In [4]:
train.info()

# **Removing unwanted columns**

In [5]:
train = train.drop(['LotFrontage','Alley','LotShape','LandContour','LotConfig','LandSlope','BldgType','HouseStyle',
                   'YearBuilt','YearRemodAdd','RoofStyle','RoofMatl','Exterior2nd','MasVnrType','MasVnrArea',
                   'ExterQual','ExterCond','BsmtQual','BsmtCond','BsmtExposure','BsmtFinType1','BsmtFinSF1','BsmtFinType2',
                   'BsmtFinSF2','BsmtUnfSF','TotalBsmtSF','Heating','HeatingQC','1stFlrSF','2ndFlrSF','LowQualFinSF',
                   'GrLivArea','BsmtFullBath','BsmtHalfBath','FullBath','HalfBath','KitchenQual','TotRmsAbvGrd','Fireplaces',
                   'FireplaceQu','GarageType','GarageYrBlt','GarageFinish','GarageCars','GarageQual','GarageCond','WoodDeckSF',
                   'EnclosedPorch','3SsnPorch','ScreenPorch','MiscFeature','MiscVal','MoSold','YrSold','SaleType','SaleCondition','PoolQC','Fence','PoolArea','Electrical','MSZoning',
                   'Utilities','Exterior1st','Functional','GarageArea','Condition2'],1)

In [6]:
train.head()

In [7]:
train.info()

In [8]:
train.describe()

In [9]:
train.isnull().any()

In [10]:
train.head()

In [11]:
train.shape

In [12]:
train.head()

In [13]:
target = train['SalePrice']
train = train.drop(['SalePrice'],1)

In [14]:
train.shape

# **Sorting and Cleaning Test data**

In [15]:
test = pd.read_csv('../input/house-prices-advanced-regression-techniques/test.csv')

In [16]:
test.head()

In [17]:
test.info()

In [18]:
test = test.drop(['LotFrontage','Alley','LotShape','LandContour','LotConfig','LandSlope','BldgType','HouseStyle',
                   'YearBuilt','YearRemodAdd','RoofStyle','RoofMatl','Exterior2nd','MasVnrType','MasVnrArea',
                   'ExterQual','ExterCond','BsmtQual','BsmtCond','BsmtExposure','BsmtFinType1','BsmtFinSF1','BsmtFinType2',
                   'BsmtFinSF2','BsmtUnfSF','TotalBsmtSF','Heating','HeatingQC','1stFlrSF','2ndFlrSF','LowQualFinSF',
                   'GrLivArea','BsmtFullBath','BsmtHalfBath','FullBath','HalfBath','KitchenQual','TotRmsAbvGrd','Fireplaces',
                   'FireplaceQu','GarageType','GarageYrBlt','GarageFinish','GarageCars','GarageQual','GarageCond','WoodDeckSF',
                   'EnclosedPorch','3SsnPorch','ScreenPorch','MiscFeature','MiscVal','MoSold','YrSold','SaleType','SaleCondition','PoolQC','Fence','PoolArea','Electrical','MSZoning',
                   'Utilities','Exterior1st','Functional','GarageArea','Condition2'],1)

In [19]:
test.head()

In [20]:
test.isnull().sum()

In [21]:
test.shape

In [22]:
test.isnull().sum()

# **Plotting Data**

In [23]:
plt.figure(figsize=(10,10))
plt.scatter(train.LotArea,target)
plt.xlabel("Lot Area")
plt.ylabel("Price")

In [24]:
plt.figure(figsize=(6,6))
plt.bar(train.Street,target)
plt.ylabel("Price")

In [25]:
plt.figure(figsize=(30,6))
plt.bar(train.Neighborhood,target)
plt.ylabel("Price")

In [26]:
plt.figure(figsize=(10,7))
plt.bar(train.PavedDrive,target)
plt.ylabel("Price")

# ***Label Encoding and Normalization***

In [27]:
train.dtypes == int64

In [28]:
train.head()

In [29]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

In [30]:
numerical = ['MSSubClass','LotArea','OverallQual','OverallCond','BedroomAbvGr','KitchenAbvGr','OpenPorchSF']
categorical = ['Street','Neighborhood','Condition1','Condition2','Foundation','PavedDrive']

In [31]:
x = pd.get_dummies(train,drop_first=True)
x.head()

In [32]:
x = x.drop(['Id'],1)

In [33]:
x = pd.DataFrame(scaler.fit_transform(x),columns=x.columns)
x.head()

In [34]:
x.shape

In [35]:
target

In [36]:
y = pd.DataFrame(target)
y = log1p(y)
y

In [37]:
import seaborn as sn
sn.distplot(y)

In [38]:
x.columns

# Spliting Data into Train,Test

In [39]:
X_train,X_test,y_train,y_test = train_test_split(x,y,test_size=0.3,random_state=30)

In [40]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import plot_confusion_matrix
from xgboost import XGBRegressor
from sklearn.svm import SVR
from sklearn.metrics import classification_report
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import ExtraTreesRegressor

In [41]:
classifiers = [RandomForestRegressor(),XGBRegressor(),DecisionTreeRegressor(),SVR(),
               Ridge(),Lasso(),ElasticNet(),RandomForestRegressor(),ExtraTreesRegressor(n_estimators=99,max_features="log2")]

In [42]:
for i in classifiers:
    model = i
    print("---------------------------------------------")
    model.fit(X_train,y_train)
    print(i)
    print("Train Model Score",model.score(X_train,y_train))
    print('_____________________________________________')
    print("Test Model Score",model.score(X_test,y_test))
    print(" ")
    print(" ")

# ***ExtraTreesRegressor have best score***

In [43]:
model = ExtraTreesRegressor()
model.fit(X_train,y_train)
print("Train Score -",model.score(X_train,y_train))
print("Test Score - ",model.score(X_test,y_test))

scores = []
for i in classifiers:
    model = i
    model.fit(X_train,y_train)
    w = model.score(X_train,y_train)
    scores.append(w)

In [44]:
classifier = ['RandomForestRegressor','XGBRegressor','DecisionTreeRegressor','SVR',
               'Ridge','Lasso','ElasticNet','RandomForestRegressor','ExtraTreesRegressor']

In [45]:
plt.figure(figsize=(18,5),facecolor='gainsboro')
plt.bar(x = classifier,height=scores,color='lightgrey',edgecolor='cyan')
plt.title("Training Scores")


# Final Model 

In [46]:
final_model = ExtraTreesRegressor()
test.head()

In [47]:
id_pred = test['Id']

In [48]:
test = test.drop(['Id'],1)

In [49]:
final_model.fit(X_train,y_train)

In [50]:
test.isnull().sum()

In [51]:
test = pd.get_dummies(test,drop_first=True)

In [52]:
test.head()

In [53]:
test_enc = pd.DataFrame(scaler.fit_transform(test),columns=test.columns)
test_enc.head()

In [54]:
test_enc.shape

In [55]:
test_enc.columns

In [56]:
test_enc.isnull().sum()

In [57]:
y_final_pred = final_model.predict(test_enc)

In [58]:
y_final_pred

In [59]:
ypred = expm1(y_final_pred)

In [60]:
ypred

In [61]:
y_pred = pd.DataFrame(ypred,columns=['SalePrice'])

In [62]:
y_pred

In [63]:
result = pd.concat([id_pred,y_pred],axis=1)
result

In [64]:
result.to_csv("submission.csv",index=False)

In [65]:
print("Submission was sucessfully save!!!")