IMPORT LIBRARIES

In [1]:
import numpy
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import SGD, Adam
#from keras.wrappers.scikit_learn import KerasRegressor
#from sklearn.model_selection import cross_val_score
#from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler, MinMaxScaler
#from sklearn.pipeline import Pipeline
from sklearn.cross_validation import train_test_split


  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


# IMPORT DATA

In [2]:
data = pd.read_excel("SaqmeBotSM.xlsx")

SET VARIABLES

In [3]:
columns = ['District','Area','Rooms','Floor','Number_of_floors','Condition','Status','Project','Heat','Gas','Storage','Cellar','Balcony','Garage','gancxadeba','Price']

In [4]:
train_columns = ['District','Area','Rooms','Floor','Number_of_floors','Condition','Status','Project','Heat','Gas','Storage','Cellar','Balcony','Garage','gancxadeba']

In [5]:
amenities = ['Heat','Gas','Storage','Cellar','Balcony','Garage']

In [6]:
cat =  ['District','Condition','Status','Project'] 

In [7]:
num = ['Area','Rooms','Floor','Number_of_floors']

TRANSLATE COLUMN NAMES

In [8]:
data.columns = columns

## Preprocessing

Change Nan to 0 in Amenities

In [9]:
data[amenities] = data[amenities].fillna(0)

Drop other Nan-s

In [10]:
data.dropna(inplace=True)

Drop 'gancxadebebi'. We don't need that feature

In [11]:
data.drop('gancxadeba',axis=1,inplace=True)

Remove Fake data

In [12]:
data = data[(data['Area']>10) & (data['Area']<300)]  # Keep 'Area' from 10 to 300
data = data[(data['Floor']>1) & (data['Floor']<30)] # Keep 'Floor' from 1 to 30
data = data[(data['Number_of_floors']>1) & (data['Number_of_floors']<30)] # Keep 'Number_of_floors' from 1 to 30
data = data[(data['Price']>5000) & (data['Price']<2000000)] # Keep 'Price' from 5,000 to 2,000,000

Drop Duplicate Samples

In [13]:
data.drop_duplicates(inplace=True)

Create Dummies for Categorical Variables and drop first column for every feature

In [14]:
data = pd.get_dummies(data,columns=cat, drop_first=True)

MinMaxScale on Num Data

In [15]:
scaler = MinMaxScaler(feature_range=(0,1))
num = ['Area','Rooms','Floor','Number_of_floors']

In [16]:
scaled_features = scaler.fit_transform((data[num])) #Scaled features in 'num' columns

In [17]:
new = pd.DataFrame(scaled_features,columns=num) #Create 'new' DataFrame from scaled_features

In [18]:
data.reset_index(drop=True,inplace=True) #We should reset indexing in Data to merge it with 'new' Dataframe because their indexing are off (because of dropping rows)

In [19]:
data[num] = new[num] #Change 'num' columns in data with 'new' Dataframe

In [20]:
print(new.shape)
print(type(new))
print(data.shape)
print(type(data))
print(data[num].shape)

(37963, 4)
<class 'pandas.core.frame.DataFrame'>
(37963, 84)
<class 'pandas.core.frame.DataFrame'>
(37963, 4)


Create Label

In [21]:
y = data.pop('Price') #Price is our label

Split train test

In [22]:
X_train, X_test, y_train, y_test = train_test_split(data,y,test_size = 0.3,random_state=42)

In [23]:
print(X_train.shape)
print(type(X_train))

print(X_test.shape)
print(type(X_test))

print(y_train.shape)
print(type(y_train))

print(y_test.shape)
print(type(y_test))

(26574, 83)
<class 'pandas.core.frame.DataFrame'>
(11389, 83)
<class 'pandas.core.frame.DataFrame'>
(26574,)
<class 'pandas.core.series.Series'>
(11389,)
<class 'pandas.core.series.Series'>


## Neural Network

In [24]:
model = Sequential()
model.add(Dense(1000, input_shape=(X_train.shape[1],), activation='relu'))
model.add(Dense(512, activation='relu'))
model.add(Dense(256, activation='relu'))
model.add(Dense(1, kernel_initializer='normal'))

In [25]:
model.compile(loss='mean_squared_error',optimizer='adam',metrics=['accuracy'])

In [None]:
model.fit(X_train,y_train,epochs=5000,verbose=2,batch_size=500)

Epoch 1/5000
 - 7s - loss: 2822414000.9092 - acc: 0.0000e+00
Epoch 2/5000
 - 7s - loss: 2775308109.7807 - acc: 0.0000e+00
Epoch 3/5000
 - 6s - loss: 2756924179.6041 - acc: 0.0000e+00
Epoch 4/5000
 - 6s - loss: 2745970733.9613 - acc: 7.5262e-05
Epoch 5/5000
 - 7s - loss: 2737893347.5620 - acc: 3.7631e-05
Epoch 6/5000


## Evaluate

In [None]:
results = model.evaluate(X_test,y_test)

In [None]:
results[1]

## Save Model

In [None]:
model.save('trained.h5')
model.save_weights('trained_weights.h5')