In [121]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn import metrics
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split, cross_val_score

In [122]:
train_df = pd.read_csv('../project_2-master/datasets/train.csv')
test_df = pd.read_csv('../project_2-master/datasets/test.csv')

In [123]:
# Training ORDINAL values
ordinal_train = train_df[['Id', 'Lot Shape', 'Utilities', 'Land Slope', 'Overall Qual', 'Overall Cond', 'Exter Qual', 
          'Exter Cond', 'Bsmt Qual', 'Bsmt Cond', 'Bsmt Exposure', 'BsmtFin Type 1', 'BsmtFin Type 2',
          'Heating QC', 'Electrical', 'Kitchen Qual', 'Functional', 'Fireplace Qu', 'Garage Finish', 'Garage Qual',
          'Garage Cond', 'Paved Drive', 'Pool QC', 'Fence', 
         ]]
ordinal_train.head(1)

# You might want to set index to 'Id' will revist later

Unnamed: 0,Id,Lot Shape,Utilities,Land Slope,Overall Qual,Overall Cond,Exter Qual,Exter Cond,Bsmt Qual,Bsmt Cond,...,Electrical,Kitchen Qual,Functional,Fireplace Qu,Garage Finish,Garage Qual,Garage Cond,Paved Drive,Pool QC,Fence
0,109,IR1,AllPub,Gtl,6,8,Gd,TA,TA,TA,...,SBrkr,Gd,Typ,,RFn,TA,TA,Y,,


In [124]:
# Testing ORDINAL values
ordinal_test = test_df[['Id', 'Lot Shape', 'Utilities', 'Land Slope', 'Overall Qual', 'Overall Cond', 'Exter Qual', 
          'Exter Cond', 'Bsmt Qual', 'Bsmt Cond', 'Bsmt Exposure', 'BsmtFin Type 1', 'BsmtFin Type 2',
          'Heating QC', 'Electrical', 'Kitchen Qual', 'Functional', 'Fireplace Qu', 'Garage Finish', 'Garage Qual',
          'Garage Cond', 'Paved Drive', 'Pool QC', 'Fence', 
         ]]
ordinal_test.head(1)

# You might want to set index to 'Id' will revist later

Unnamed: 0,Id,Lot Shape,Utilities,Land Slope,Overall Qual,Overall Cond,Exter Qual,Exter Cond,Bsmt Qual,Bsmt Cond,...,Electrical,Kitchen Qual,Functional,Fireplace Qu,Garage Finish,Garage Qual,Garage Cond,Paved Drive,Pool QC,Fence
0,2658,Reg,AllPub,Gtl,6,8,TA,Fa,Fa,TA,...,FuseP,Fa,Typ,,Unf,Po,Po,Y,,


In [125]:
ordinal_columns = {'Lot Shape' : {'IR3' : 1, 'IR2' : 2, 'IR1' : 3, 'Reg' : 4}, 
                   'Utilities': {'ELO' : 0, 'NoSeWa' : 1, 'NoSewr' : 2, 'AllPub' : 3}, 
                   'Land Slope': {'Sev' : 1, 'Mod' : 2, 'Gtl' : 3}, 
                   'Exter Qual': {'Po' : 1, 'Fa' : 2, 'TA' : 3, 'Gd' : 4, 'Ex' : 5},
                   'Exter Cond': {'Po' : 1, 'Fa' : 2, 'TA' : 3, 'Gd' : 4, 'Ex' : 5},
                   'Bsmt Qual' : {np.nan : 0, 'Po' : 1, 'Fa' : 2, 'TA' : 3, 'Gd' : 4, 'Ex' : 5},   # not sure about this one 
                   'Bsmt Cond' : {np.nan : 0, 'Po' : 1, 'Fa' : 2, 'TA' : 3, 'Gd' : 4, 'Ex' : 5},    # or this one
                   'Bsmt Exposure' : {np.nan : 0, 'No' : 1, 'Mn' : 2, 'Av' : 3, 'Gd' : 4}, 
                   'BsmtFin Type 1' : {np.nan : 0, 'Unf' : 1, 'LwQ' : 2, 'Rec' : 3, 'BLQ' : 4, 'ALQ' : 5, 'GLQ' : 6},
                   'BsmtFin Type 2' : {np.nan : 0, 'Unf' : 1, 'LwQ' : 2, 'Rec' : 3, 'BLQ' : 4, 'ALQ' : 5, 'GLQ' : 6},
                   'Heating QC' : {'Po' : 1, 'Fa' : 2, 'TA' : 3, 'Gd' : 4, 'Ex' : 5}, 
                   'Electrical' : {np.nan : 0, 'Mix' : 1, 'FuseP' : 2, 'FuseF' : 3, 'FuseA' : 4, 'SBrkr' : 5},
                   'Kitchen Qual' : {'Po' : 1, 'Fa' : 2, 'TA' : 3, 'Gd' : 4, 'Ex' : 5},
                   'Functional' : {'Sal' : 1, 'Sev' : 2, 'Maj2' : 3, 'Maj1' : 4, 'Mod': 5, 'Min2' : 6, 'Min1' : 7, 'Typ' : 8}, 
                   'Fireplace Qu' : {np.nan : 0, 'Po' : 1, 'Fa' : 2, 'TA' : 3, 'Gd' : 4, 'Ex' : 5},
                   'Garage Finish' : {np.nan : 0, 'Unf' : 1, 'RFn' : 2, 'Fin' : 3},
                   'Garage Qual' : {np.nan : 0, 'Po' : 1, 'Fa' : 2, 'TA' : 3, 'Gd' : 4, 'Ex' : 5},
                   'Garage Cond' : {np.nan : 0, 'Po' : 1, 'Fa' : 2, 'TA' : 3, 'Gd' : 4, 'Ex' : 5},
                   'Paved Drive' : {'N' : 1, 'P' : 2, 'Y' : 3},
                   'Pool QC' : {np.nan : 0, 'Fa' : 1, 'TA' : 2, 'Gd' : 3, 'Ex' : 4},
                   'Fence' : {np.nan : 0, 'MnWw' : 1, 'GdWo' : 2, 'MnPrv' : 3, 'GdPrv' : 4} }

In [126]:
ordinal_train.replace(ordinal_columns, inplace = True)
ordinal_test.replace(ordinal_columns, inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)


In [127]:
ordinal_train.to_csv('ordinal_train.csv', index=False)

In [128]:
ordinal_test.to_csv('ordinal_test.csv', index=False)