# AI LAB 11
### Name: Rasikh Ali
### Rollnumber: BSEM-F19-060

<div class="alert alert-block alert-info">
    Using <b>Python </b> v3.8.18
    <br>
    with <b>Jupyter </b> v7.4.9
</div>

# Libraries

In [57]:
import pandas as pd
import numpy as np
import pickle

from sklearn.svm import SVC, SVR
from sklearn.metrics import accuracy_score, mean_squared_error, r2_score, root_mean_squared_error, mean_absolute_error
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

# Data reading / loading

In [None]:
df = pd.read_csv("data.csv")
df

In [None]:
print(df)

In [None]:
pd.set_option("display.max_rows", None, "display.max_columns", None)
df

# Data Exploration

In [None]:
# Data View
df.head()
df.head(2)

In [None]:
df.tail()
df.tail(2)

In [None]:
print(f"Number of rows {df.shape[0]}, Number of columns {df.shape[1]}")
print(df.shape)

In [None]:
print("-- Attributes in Data --")
for cols in df.columns:
    print(cols)

In [None]:
print("-- Number of instances in Data --")
print(df.count())

In [None]:
df['city'].unique()

In [None]:
print("-- Number of Unique Values in Data --")
print(df.nunique())

In [None]:
print("-- Number of Null Values in Data --")
print(df.isnull().sum())

In [None]:
print("-- Details of Data --")
df.describe()

In [None]:
print("-- Insights of Data --")
df.info()

# Data Pre-Processing

### Manage Null-Values

In [None]:
print("-- Number of Null Values in Data --")
print(df.isnull().sum())

In [None]:
print("-- Number of Null Values in Data --")
print(df['street'].isnull().sum())

In [None]:
# For Object/String Types
df['street']   = df['street'].fillna(df['street'].mode()[0])
df['city']     = df['city'].fillna(df['city'].mode()[0])
df['statezip'] = df['statezip'].fillna(df['statezip'].mode()[0])
df['country']  = df['country'].fillna(df['country'].mode()[0])

In [None]:
def fillNaObjMode(col):
    for i in col:
        df[i] = df[i].fillna(df[i].mode()[0])

columns = ['street', 'city', 'statezip', 'country']
fillNaObjMode(columns)

In [None]:
def fillNaMean(col):
    for i in col:
        df[i] = df[i].fillna(df[i].mean())

columns = ['price','sqft_living','sqft_lot','sqft_above','sqft_basement']
fillNaMean(columns)

In [None]:
def fillNaMode(col):
    for i in col:
        df[i] = df[i].fillna(df[i].mode()[0])

columns = ['bedrooms','bathrooms','floors','waterfront','view','yr_built']
fillNaMode(columns)

In [None]:
df.drop('date', axis=1, inplace=True)
df.drop('street', axis=1, inplace=True)

In [None]:
df.head(2)

In [None]:
print(df.isnull().sum())

In [None]:
df['price'] = df['price'].astype('int64')
def changetoint64(col):
    for i in col:
        df[i] = df[i].astype('int64')
        
columns =[
    'price','bedrooms','bathrooms','sqft_living','sqft_lot',
    'floors','waterfront','view','sqft_above','sqft_basement',
    'yr_built']
changetoint64(columns)

In [None]:
print("-- Insights of Data --")
df.info()

In [None]:
def fillNaObjMode(col):
    for i in col:
        df[i] = df[i].fillna(df[i].mode()[0])

columns = ['city', 'statezip', 'country']
fillNaObjMode(columns)

In [None]:
df['city'].unique()

In [None]:
df['statezip'].unique()

In [None]:
df['country'].unique()

In [None]:
df_encoded = df.copy()

In [None]:
def encodeCols(cols):
    for i in cols:
        data = pd.DataFrame({i:df[i].unique()})
        data_label_encoder = LabelEncoder()
        data_label_encoder.fit(np.ravel(data))
        df_encoded[i] = data_label_encoder.transform(df[i]) 

columns = ['city','statezip','country']
encodeCols(columns)

In [None]:
# Labels
country = pd.DataFrame({'country':df['country'].unique()})

# Initializing Label Encoders
country_label_encoder = LabelEncoder()

# Training Label Encoder
country_label_encoder.fit(np.ravel(country))

In [None]:
df_encoded['country'] = country_label_encoder.transform(df['country']) 

In [None]:
# All the Attributes are Numerical 
df_encoded.info()

In [None]:
df_encoded.to_csv(r'encoded-data.csv', index = False, header = True)

# BREAK 3:10

# Train-Test Splitting

In [None]:
traindata, testdata = train_test_split(df_encoded, test_size=0.2, shuffle=False)

In [None]:
traindata.head(2)

In [None]:
testdata.head(2)

In [None]:
X = df_encoded.drop('price', axis=1)
y = df_encoded['price']

In [None]:
train_x = traindata.iloc[:, 1:]
train_x.head()

In [None]:
train_y = traindata.iloc[:, 0]
train_y.head()

In [None]:
test_x = testdata.iloc[:, 1:]
test_x.head()

In [None]:
test_y = testdata.iloc[:, 0]
test_y.head()

# Training Model Using Support Vector Classifier

In [None]:
print("-- Training using SVC on Training Data --")
print("-- Parameters & Values: ", end='')

model_svr = SVR(gamma='auto')
model_svr.fit(train_x, np.ravel(train_y))

print(model_svr)

In [None]:
# Saving Trained Model
pickle.dump(model_svr, open('model_svc.pkl', 'wb'))

In [None]:
# Load saved Model
model = pickle.load(open('model_svc.pkl', 'rb'))

In [None]:
model_predictions = model.predict(test_x)

testdata_predict = testdata.copy(deep=True)
pd.options.mode.chained_assignment = None

testdata_predict['Prediction'] = model_predictions

In [None]:
# Printing Testing Data
print("-- Testing Data with Prediction --")
# pd.set_option("display.max_rows", None, "display.max_columns", None)
testdata_predict

In [58]:
mse = mean_squared_error(testdata_predict['price'], testdata_predict['Prediction'])

print("-- mean squared error: ", end='')
print(mse)

-- mean squared error: 1063101319147.0159


In [59]:
rmse = root_mean_squared_error(testdata_predict['price'], testdata_predict['Prediction'])
print("-- root mean squared error: ", end='')
print(rmse)

-- root mean squared error: 1031068.0477771658


In [60]:
mae = mean_absolute_error(testdata_predict['price'], testdata_predict['Prediction'])
print("-- mean absolute error: ", end='')
print(mae)

-- mean absolute error: 291937.8900661286
