# Project 2: Ames Housing Data

**Part 2 EDA and Data Cleaning (Test)**

**Import libraries**

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

In [2]:
#create a func to modify test data
def change_test (data):
    
    ##Rename Columns
    data.columns = [x.replace(' ', '_').lower() for x in data.columns]
    
    ##Drop Columns
    data.drop(columns = ['pid'], axis=1, inplace=True)
    
    ##Fillna for Misc Feature, Alley, Fence and Fireplace Quality
    for i in ['misc_feature','alley','fence']:
        data[i] = data[i].fillna('None')
    
    ##Fillna for lot_frontage
    frontage_mean = data.lot_frontage.mean()
    data['lot_frontage'].fillna(frontage_mean, inplace = True)
    
    ##Fillna for garage variables
    for j in ['garage_finish','garage_yr_blt','garage_type']:
        data[j] = data[j].fillna('None')
    
    
    ##Fillna for bsmt variables
    for i in ['bsmtfin_type_2','bsmtfin_type_1']:
        data[i] = data[i].fillna('None')


    ##Fillna for mas variables    
    data['mas_vnr_type'] = data['mas_vnr_type'].fillna('None')
    data['mas_vnr_area'] = data['mas_vnr_area'].fillna(0)
    
    #Convert ordinal values in variables
    # Variables with Ex, Gd, TA, Fa, & Po
    for x in ['exter_qual','exter_cond','kitchen_qual', 'heating_qc']:
        data[x] = data[x].map({'Ex':5,'Gd':4,'TA':3,'Fa':2,'Po':1})
    #Fillna
    for i in ['exter_qual','exter_cond','kitchen_qual', 'heating_qc']:
        data[i] = data[i].fillna('None')
    
    # Variables with Ex, Gd, TA, Fa, Po, & NA
    for y in ['bsmt_qual','bsmt_cond','fireplace_qu', 'garage_qual', 'garage_cond']:
        data[y] = data[y].map({'Ex':5,'Gd':4,'TA':3,'Fa':2,'Po':1, 'NA':0})
    #Fillna
    for i in ['bsmt_qual','bsmt_cond','fireplace_qu', 'garage_qual', 'garage_cond']:
        data[i] = data[i].fillna('None')
    
    # Exposure 
    data['bsmt_exposure'] = data['bsmt_exposure'].map({'Gd':3,'Av':2,'Mn':1,'No':0,'None':0})
    data['bsmt_exposure'] = data['bsmt_exposure'].fillna('None')
    
    # Pool_qc
    data['pool_qc'] = data['pool_qc'].map({'Ex':4,'Gd':3,'TA':2,'Fa':1,'NA':0})
    data['pool_qc'] = data['pool_qc'].fillna('None')
    
    # Binarize Central air 
    data['central_air'] = data['central_air'].map(lambda x: '1' if x == 'Y' else '0')
    
    # Binarize Street
    data['street'] = data['street'].map(lambda x: '1' if x == 'Pave' else '2')
    
    # concatenating the square feet of the house
    data['total_sf'] = (data['total_bsmt_sf'] + 
                     data['1st_flr_sf'] + 
                     data['2nd_flr_sf'] + 
                     data['wood_deck_sf'] + 
                     data['open_porch_sf'])
    
    # Calculate total age of house
    data['housing_age'] = data['yr_sold'] - data['year_built']
    
    # Calculate age after Remodeling 
    data['age_after_remod'] = data['yr_sold'] - data['year_remod/add']

    
    #Create seasons variable to group months 
    data['seasons'] = data['mo_sold'].map(lambda x : '4' if x <= 2 else '1' 
                                        if x < 6 else "2" 
                                        if x < 9 else "3" 
                                        if x < 12 else "4")
    
    #Create dummies for categorical variables
    data = pd.get_dummies(data, columns=['ms_subclass','ms_zoning','street','alley', 
                                               'lot_shape', 'land_contour', 'utilities','lot_config',
                                               'land_slope','condition_1','condition_2','bldg_type',
                                               'house_style','roof_style','roof_matl','exterior_1st',
                                               'exterior_2nd','mas_vnr_type','foundation','bsmtfin_type_1',
                                               'bsmtfin_type_2','heating','central_air','electrical',
                                               'functional','garage_type','garage_finish','paved_drive',
                                               'fence', 'misc_feature','sale_type', 'neighborhood',
                                               'year_built', 'year_remod/add', 'garage_yr_blt'], 
                               drop_first=True)

    return data

In [3]:
test_data = pd.read_csv("D:/GA/DSI3lab/dsi3test/project_2/datasets/test.csv")
test_data.shape

(878, 80)

In [4]:
#apply to test.csv
test_data = change_test(test_data)
test_data.shape

(878, 484)

**Export test data**

In [5]:
test_data.to_csv('D:/GA/DSI3lab/dsi3test/project_2/datasets/test_final.csv')