# Exploratory Data Analysis

In [1]:
#import required libraries
import matplotlib.pyplot as plt
import seaborn as sns
import random 
import warnings
import operator

from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go
import plotly.plotly as py
from plotly import tools
from datetime import date
import pandas as pd
import numpy as np 
import plotly.figure_factory as ff

In [2]:
# Import Dataset
train = pd.read_csv("../input/train.csv")
test = pd.read_csv("../input/test.csv")

#Print Shape of Dataset
print ("Train Dataset: Rows, Columns: ", train.shape)
print ("Test Dataset: Rows, Columns: ", test.shape)

In [3]:
print ("Dataset Head: ")
train.head()

In [4]:
print("Shape of Train Data:")
print(train.shape)

In [5]:
print("Information of Train Data:")
train.info()

In [6]:
print ("Summary of Train Dataset: ")
train.describe()

In [7]:
missingFeatures = train.isnull().sum()
missingFeatures = missingFeatures.sort_values(0,False)
df = missingFeatures.to_frame()
df = df.loc[df[0]>0]
df = df.reset_index().rename({'index': 'Feature', 0: 'Number of NaN'}, axis='columns')
df['Description'] = ['Years behind in school','number of tablets household owns','Monthly rent payment','average years of education for adults (18+)','square of the mean years of education of adults (>=18) in the household']
df
# missingFeatures[:5]

## Distribution of Population by Income Level

In [8]:
#Count for each Poverty Levels
povertyTarget = train['Target'].value_counts()
povertyTargetDF = povertyTarget.to_frame()
povertyLevels = ["NonVulnerable", "Moderate Poverty", "Vulnerable", "Extreme Poverty"]
figure = go.Bar(y=povertyTargetDF.Target, x=povertyLevels, marker=dict(color='dodgerblue'))
layout = dict(title="Poverty Levels", margin=dict(l=200), width=800, height=400)
figure = [figure]
fig = go.Figure(data=figure, layout=layout)
iplot(fig)

## Graphs showing ownership of items for every Income Level

In [9]:
#Plot to compare counts for every poverty level for given feature
def compare_plot(col, title):
    extremePoverty = train[train['Target'] == 1][col].value_counts().to_dict()
    moderatePoverty = train[train['Target'] == 2][col].value_counts().to_dict()
    vulnerableHouseholds = train[train['Target'] == 3][col].value_counts().to_dict()
    nonvulnerableHouseholds = train[train['Target'] == 4][col].value_counts().to_dict()
    
    targets = ['Extreme', 'Moderate', 'Vulnerable', 'NonVulnerable']
    bar1 = go.Bar(y=[extremePoverty[0], moderatePoverty[0], vulnerableHouseholds[0], nonvulnerableHouseholds[0]], name="Not Present", x=targets, marker=dict(color="firebrick"))
    bar2 = go.Bar(y=[extremePoverty[1], moderatePoverty[1], vulnerableHouseholds[1], nonvulnerableHouseholds[1]], name="Present", x=targets, marker=dict(color="seagreen"))
    return bar1, bar2 

#Plotting Compare Plots for Phone, TV, Computer, Refrigerator, Tablet
noPhone, phone = compare_plot("mobilephone", "Mobile Phone")
noTV, TV = compare_plot("television", "Television")
noComp, comp = compare_plot("computer", "Computer")
noFridge, fridge = compare_plot("refrig", "Refrigerator")
noTablet, tablet = compare_plot("v18q", "Tablet")
titles = ["Mobile Phone", "Television", "Computer","Refrigerator" ,"Tablet"]

fig = tools.make_subplots(rows=3, cols=2, print_grid=False, subplot_titles=titles)
fig.append_trace(noPhone, 1, 1)
fig.append_trace(phone, 1, 1)
fig.append_trace(noTV, 1, 2)
fig.append_trace(TV, 1, 2)
fig.append_trace(noComp, 2, 1)
fig.append_trace(comp, 2, 1)
fig.append_trace(noFridge, 2, 2)
fig.append_trace(fridge, 2, 2)
fig.append_trace(noTablet, 3, 1)
fig.append_trace(tablet, 3, 1)

fig['layout'].update(height=1000, title="Ownership of Items by Income Level", barmode="stack", showlegend=True)
iplot(fig)

## Household Materials and Other Key Characteristics (Percentage of Population from each income level)

In [10]:
#Find Material
def findMaterial(record, materials):
    for material in materials:
        if record[material] == 1:
            return material
    return 

#For each Poverty Level find Material and plot count 
def plotEach(prefix, colname, replace):
    materials = [feature for feature in train.columns if feature.startswith(prefix)]
    train[colname] = train.apply(lambda record : findMaterial(record, materials), axis=1)
    train[colname] = train[colname].apply(lambda x : replace[x] if x != None else x)

    extremePovertyTotal = len(train[train['Target'] == 1])
    moderatePovertyTotal = len(train[train['Target'] == 2])
    vulnerableHouseholdsTotal = len(train[train['Target'] == 3])
    nonvulnerableHouseholdsTotal = len(train[train['Target'] == 4])
    
    extremePoverty = train[train['Target'] == 1][colname].value_counts().to_frame()
    moderatePoverty = train[train['Target'] == 2][colname].value_counts().to_frame()
    vulnerableHouseholds = train[train['Target'] == 3][colname].value_counts().to_frame()
    nonvulnerableHouseholds = train[train['Target'] == 4][colname].value_counts().to_frame()
    
    extremePoverty = (extremePoverty/extremePovertyTotal) * 100
    moderatePoverty = (moderatePoverty/moderatePovertyTotal) * 100
    vulnerableHouseholds = (vulnerableHouseholds/vulnerableHouseholdsTotal) * 100
    nonvulnerableHouseholds = (nonvulnerableHouseholds/nonvulnerableHouseholdsTotal) * 100
    
    bar1 = go.Bar(y=extremePoverty[colname], x=extremePoverty.index, name="Extreme", marker=dict(color='red'))
    bar2 = go.Bar(y=moderatePoverty[colname], x=moderatePoverty.index, name="Moderate", marker=dict(color='orange'))
    bar3 = go.Bar(y=vulnerableHouseholds[colname], x=vulnerableHouseholds.index, name="Vulnerable", marker=dict(color='rgb(200,200,0)'))
    bar4 = go.Bar(y=nonvulnerableHouseholds[colname], x=nonvulnerableHouseholds.index, name="NonVulnerable", marker=dict(color='green'))
    return [bar1, bar2, bar3, bar4]

#Plotting for the Outside Wall Material, Floor Material, Roof Material, Sanitary Conditions, Cooking Energy Sources and Disposal Methods
titlesOfGraphs = ["Outside Wall Material", "Floor Material", "Roof Material", "Sanitary Conditions", "Cooking Energy Sources", "Disposal Methods"]
figure = tools.make_subplots(rows=3, cols=2, print_grid=False, subplot_titles=titlesOfGraphs)

### outside material
conversion = {'paredblolad' : "Block / Brick", "paredpreb" : "Prefabricated / Cement", "paredmad" : "Wood",
      "paredzocalo" : "Socket", "pareddes" : "Waste Material", "paredfibras" : "Natural Fibres",
      "paredother" : "Other", "paredzinc": "Zinc"}
results = plotEach("pared", "outside_material", conversion)      
for x in results:
    figure.append_trace(x, 1, 1)

### floor material 
conversion = {'pisomoscer' : "Mosaic / Ceramic", "pisocemento" : "Cement", "pisonatur" : "Natural Material",
      "pisonotiene" : "No Floor", "pisomadera" : "Wood", "pisoother" : "Other"}
results = plotEach("piso", "floor_material", conversion)
for x in results:
    figure.append_trace(x, 1, 2)

### Roof Material
conversion = {'techozinc' : "Zinc", "techoentrepiso" : "Fibre / Cement", "techocane" : "Natural Fibre", "techootro" : "Other"}
results = plotEach("tech", "roof_material", conversion)  
for x in results:
    figure.append_trace(x, 2, 1)


### Sanitary Conditions
conversion = {'sanitario1' : "No Toilet", "sanitario2" : "Sewer / Cesspool", "sanitario3" : "Septic Tank",
       "sanitario5" : "Black Hole", "sanitario6" : "Other System"}
results = plotEach("sanit", "sanitary", conversion)
for x in results:
    figure.append_trace(x, 2, 2)

### Energy Source
conversion = {'energcocinar1' : "No Kitchen", "energcocinar2" : "Electricity", "energcocinar3" : "Cooking Gas",
       "energcocinar4" : "Wood Charcoal"}
results = plotEach("energ", "energy_source", conversion)  
for x in results:
    figure.append_trace(x, 3, 1)

### Disposal Methods
conversion = {"elimbasu1":"Tanker truck",
"elimbasu2": "Buried",
"elimbasu3": "Burning",
"elimbasu4": "Unoccupied space",
"elimbasu5": "River",
"elimbasu6": "Other"}
results = plotEach("elim", "waste_method", conversion)  
for x in results:
    figure.append_trace(x, 3, 2)

figure['layout'].update(height=900, title="Characteristics of Households", barmode="stack", showlegend=False)
iplot(figure)

## State of House

In [11]:
#Find the condition of each record for given feature
def findState(record, mats):
    for i,c in enumerate(mats):
        if record[c] == 1 and c.endswith("1"):
            return "Bad"
        elif record[c] == 1 and c.endswith("2"):
            return "Regular"
        elif record[c] == 1 and c.endswith("3"):
            return "Good"
    return 

stateOfWall = [row for row in train.columns if row.startswith("epar")]
stateOfRoof = [row for row in train.columns if row.startswith("etec")]
stateOfFloor = [row for row in train.columns if row.startswith("eviv")]
train["StateOfWall"] = train.apply(lambda record : findState(record, stateOfWall), axis=1)
train["StateOfRoof"] = train.apply(lambda record : findState(record, stateOfRoof), axis=1)
train["StateOfFloor"] = train.apply(lambda record : findState(record, stateOfFloor), axis=1)

#Count State of Wall for every Poverty Level
stateOfWall1 = train[train['Target']==1]['StateOfWall'].value_counts()
stateOfWall2 = train[train['Target']==2]['StateOfWall'].value_counts()
stateOfWall3 = train[train['Target']==3]['StateOfWall'].value_counts()
stateOfWall4 = train[train['Target']==4]['StateOfWall'].value_counts()
bar1=go.Bar(x=stateOfWall1.index, y=stateOfWall1.values, marker=dict(color="green", opacity=0.99), name="Extreme")
bar2=go.Bar(x=stateOfWall2.index, y=stateOfWall2.values, marker=dict(color="green", opacity=0.69), name="Moderate")
bar3=go.Bar(x=stateOfWall3.index, y=stateOfWall3.values, marker=dict(color="green", opacity=0.49), name="Vulnerable")
bar4=go.Bar(x=stateOfWall4.index, y=stateOfWall4.values, marker=dict(color="green", opacity=0.29), name="NonVulnerable")

#Count State of Roof for every Poverty Level
stateOfRoof1 = train[train['Target']==1]['StateOfRoof'].value_counts()
stateOfRoof2 = train[train['Target']==2]['StateOfRoof'].value_counts()
stateOfRoof3 = train[train['Target']==3]['StateOfRoof'].value_counts()
stateOfRoof4 = train[train['Target']==4]['StateOfRoof'].value_counts()
bar5=go.Bar(x=stateOfRoof1.index, y=stateOfRoof1.values, marker=dict(color="purple", opacity=0.99), name="Extreme")
bar6=go.Bar(x=stateOfRoof2.index, y=stateOfRoof2.values, marker=dict(color="purple", opacity=0.69), name="Moderate")
bar7=go.Bar(x=stateOfRoof3.index, y=stateOfRoof3.values, marker=dict(color="purple", opacity=0.49), name="Vulnerable")
bar8=go.Bar(x=stateOfRoof4.index, y=stateOfRoof4.values, marker=dict(color="purple", opacity=0.29), name="NonVulnerable")

#Count State of Floor for every Poverty Level
stateOfFloor1 = train[train['Target']==1]['StateOfFloor'].value_counts()
stateOfFloor2 = train[train['Target']==2]['StateOfFloor'].value_counts()
stateOfFloor3 = train[train['Target']==3]['StateOfFloor'].value_counts()
stateOfFloor4 = train[train['Target']==4]['StateOfFloor'].value_counts()
bar9=go.Bar(x=stateOfFloor1.index, y=stateOfFloor1.values, marker=dict(color="red", opacity=0.99), name="Extreme")
bar10=go.Bar(x=stateOfFloor2.index, y=stateOfFloor2.values, marker=dict(color="red", opacity=0.69), name="Moderate")
bar11=go.Bar(x=stateOfFloor3.index, y=stateOfFloor3.values, marker=dict(color="red", opacity=0.49), name="Vulnerable")
bar12=go.Bar(x=stateOfFloor4.index, y=stateOfFloor4.values, marker=dict(color="red", opacity=0.29), name="NonVulnerable")

#Plot Bar
fig = tools.make_subplots(rows=1, cols=4, print_grid=False, subplot_titles=["Extreme", "Moderate", "Vulnerable", "NonVulnerable"])
fig.append_trace(bar1, 1, 1)
fig.append_trace(bar2, 1, 2)
fig.append_trace(bar3, 1, 3)
fig.append_trace(bar4, 1, 4)
fig['layout'].update(height=250, showlegend=False, title="State of Wall of Households")
iplot(fig)

fig = tools.make_subplots(rows=1, cols=4, print_grid=False, subplot_titles=["Extreme", "Moderate", "Vulnerable", "NonVulnerable"])
fig.append_trace(bar5, 1, 1)
fig.append_trace(bar6, 1, 2)
fig.append_trace(bar7, 1, 3)
fig.append_trace(bar8, 1, 4)
fig['layout'].update(height=250, showlegend=False, title="State of Roof of Households")
iplot(fig)

fig = tools.make_subplots(rows=1, cols=4, print_grid=False, subplot_titles=["Extreme", "Moderate", "Vulnerable", "NonVulnerable"])
fig.append_trace(bar9, 1, 1)
fig.append_trace(bar10, 1, 2)
fig.append_trace(bar11, 1, 3)
fig.append_trace(bar12, 1, 4)
fig['layout'].update(height=250, showlegend=False, title="State of Floor of Households")
iplot(fig)

# Level of Education (Percentage of Population from each income level)

In [12]:
#Plot count for each poverty level for given feature
def plotEach(prefix, colname, title, replace, plotme = True):
    materials = [feature for feature in train.columns if feature.startswith(prefix)]
    train[colname] = train.apply(lambda record : findMaterial(record, materials), axis=1)
    train[colname] = train[colname].apply(lambda x : replace[x] if x != None else x )

    extremePoverty = train[train['Target'] == 1][colname].value_counts().to_frame()
    moderatePoverty = train[train['Target'] == 2][colname].value_counts().to_frame()
    vulnerableHouseholds = train[train['Target'] == 3][colname].value_counts().to_frame()
    nonvulnerableHouseholds = train[train['Target'] == 4][colname].value_counts().to_frame()
    
    extremePovertyTotal = len(train[train['Target'] == 1])
    moderatePovertyTotal = len(train[train['Target'] == 2])
    vulnerableHouseholdsTotal = len(train[train['Target'] == 3])
    nonvulnerableHouseholdsTotal = len(train[train['Target'] == 4])
    
    extremePoverty = (extremePoverty/extremePovertyTotal) * 100
    moderatePoverty = (moderatePoverty/moderatePovertyTotal) * 100
    vulnerableHouseholds = (vulnerableHouseholds/vulnerableHouseholdsTotal) * 100
    nonvulnerableHouseholds = (nonvulnerableHouseholds/nonvulnerableHouseholdsTotal) * 100
    
    bar1 = go.Bar(y=extremePoverty[colname], x=extremePoverty.index, name="Extreme", marker=dict(color='red'))
    bar2 = go.Bar(y=moderatePoverty[colname], x=moderatePoverty.index, name="Moderate", marker=dict(color='orange'))
    bar3 = go.Bar(y=vulnerableHouseholds[colname], x=vulnerableHouseholds.index, name="Vulnerable", marker=dict(color='rgb(200,200,0)'))
    bar4 = go.Bar(y=nonvulnerableHouseholds[colname], x=nonvulnerableHouseholds.index, name="NonVulnerable", marker=dict(color='green'))

    data = [bar1, bar2, bar3, bar4]
    layout = dict(title=title, legend=dict(y=1.1, orientation="h"), barmode="stack", margin=dict(l=50), height=400)
    figure = go.Figure(data=data, layout=layout)
    if plotme:
        iplot(figure)

# Plotting for Level of Education by Poverty Level
conversion = {"instlevel1": "No Education", "instlevel2": "Incomplete Primary", "instlevel3": "Complete Primary", 
       "instlevel4": "Incomplete Sc.", "instlevel5": "Complete Sc.", "instlevel6": "Incomplete Tech Sc.",
       "instlevel7": "Complete Tech Sc.", "instlevel8": "Undergraduation", "instlevel9": "Postgraduation"}
plotEach("instl", "education_details", "Level of Education", conversion)  

# Plotting for Relationship Status by Poverty Level
conversion = {"estadocivil1": "< 10 years", "estadocivil2": "Free / Coupled union", "estadocivil3": "Married", 
       "estadocivil4": "Divorced", "estadocivil5": "Separated", "estadocivil6": "Widow",
       "estadocivil7": "Single"}
plotEach("estado", "status_members", "Relationship Status", conversion)  

# Plotting for Number of Famliy Members by Poverty Level
conversion = {"parentesco1": "Household Head", "parentesco2": "Spouse/Partner", "parentesco3": "Son/Daughter", 
       "parentesco4": "Stepson/Daughter", "parentesco5" : "Son/Daughter in Law" , "parentesco6": "Grandson/Daughter", 
       "parentesco7": "Mother/Father", "parentesco8": "Mother/Father in Law", "parentesco9" : "Brother/Sister" , 
       "parentesco10" : "Brother/Sister in law", "parentesco11" : "Other Family Member", "parentesco12" : "Other Non Family Member"}
plotEach("parentesc", "family_members", "Family Members in the Households", conversion)  

# Plotting for Region of Households by Poverty Level
conversion = {"lugar1": "Central", "lugar2": "Chorotega", "lugar3": "PacÃƒÂ­fico central", 
       "lugar4": "Brunca", "lugar5": "Huetar AtlÃƒÂ¡ntica", "lugar6": "Huetar Norte"}
plotEach("lugar", "region", "Region of the Households", conversion)  

## Percentage of People from each Poverty Level based on Gender and Age

In [20]:
def plotForPovertyLevel(feature):
    
    extremePovertyTotal = len(train[train['Target'] == 1])
    moderatePovertyTotal = len(train[train['Target'] == 2])
    vulnerableHouseholdsTotal = len(train[train['Target'] == 3])
    nonvulnerableHouseholdsTotal = len(train[train['Target'] == 4])
    
    extremePoverty = train[train['Target'] == 1][feature].value_counts()
    extremePoverty = (extremePoverty/extremePovertyTotal) * 100
    bar1 = go.Bar(x=extremePoverty.index, y=extremePoverty.values, marker=dict(color="red"), name="Extreme")

    moderatePoverty = train[train['Target'] == 2][feature].value_counts()
    moderatePoverty = (moderatePoverty/moderatePovertyTotal) * 100
    bar2 = go.Bar(x=moderatePoverty.index, y=moderatePoverty.values, marker=dict(color="orange"), name="Moderate")

    vulnerableHouseholds = train[train['Target'] == 3][feature].value_counts()
    vulnerableHouseholds = (vulnerableHouseholds/vulnerableHouseholdsTotal) * 100
    bar3 = go.Bar(x=vulnerableHouseholds.index, y=vulnerableHouseholds.values, marker=dict(color="rgb(200,200,0)"), name="Vulnerable")

    nonvulnerableHouseholds = train[train['Target'] == 4][feature].value_counts()
    nonvulnerableHouseholds = (nonvulnerableHouseholds/nonvulnerableHouseholdsTotal) * 100
    bar4 = go.Bar(x=nonvulnerableHouseholds.index, y=nonvulnerableHouseholds.values, marker=dict(color="green"), name="NonVulnerable")
    
    return [bar1, bar2, bar3, bar4]


titles = ["Total Persons", "< 12 Yrs", ">= 12 Yrs", "Total Males", "Males < 12 Yrs", "Males >= 12 Yrs", 
         "Total Females", "Females < 12 Yrs", "Females >= 12 Yrs"]
figures = tools.make_subplots(rows=3, cols=3, print_grid=False, subplot_titles=titles)

plots = plotForPovertyLevel('r4t1')
for x in plots:
    figures.append_trace(x, 1, 1)
plots = plotForPovertyLevel('r4t2')
for x in plots:
    figures.append_trace(x, 1, 2)
plots = plotForPovertyLevel('r4t3')
for x in plots:
    figures.append_trace(x, 1, 3)

plots = plotForPovertyLevel('r4h1')
for x in plots:
    figures.append_trace(x, 2, 1)
plots = plotForPovertyLevel('r4h2')
for x in plots:
    figures.append_trace(x, 2, 2)
plots = plotForPovertyLevel('r4h3')
for x in plots:
    figures.append_trace(x, 2, 3)

plots = plotForPovertyLevel('r4m1')
for x in plots:
    figures.append_trace(x, 3, 1)
plots = plotForPovertyLevel('r4m2')
for x in plots:
    figures.append_trace(x, 3, 2)
plots = plotForPovertyLevel('r4m3')
for x in plots:
    figures.append_trace(x, 3, 3)

    
figures['layout'].update(height=750, showlegend=False, title="Distribution of Poverty Levels based on Gender and Age")
iplot(figures)

## Age Groups among the households (Percentage of Population from each income level)

In [14]:
#Plotting Counts of number of children, adults, working adults and 65+ Year Olds
titles = ["Children", "Adults", "65+ Old"]
figure = tools.make_subplots(rows=1, cols=3, print_grid=False, subplot_titles=titles)

res = plotForPovertyLevel("hogar_nin")
for x in res:
    figure.append_trace(x, 1, 1)
res = plotForPovertyLevel("hogar_adul")
for x in res:
    figure.append_trace(x, 1, 2)
res = plotForPovertyLevel("hogar_mayor")
for x in res:
    figure.append_trace(x, 1, 3)


figure['layout'].update(height=350, title="Distribution Of People by Age Categories", barmode="stack", showlegend=False)
iplot(figure)

## Household Size

In [15]:
# Size of Househol for every Poverty Level
householdSize = plotForPovertyLevel('tamhog')
setup = dict(title="Size of Household", margin=dict(l=100), height=400, legend=dict(orientation="h", y=1))
figure = go.Figure(data=householdSize, layout=setup)
iplot(figure)

## Distributions of various features for every poverty level

In [26]:
def boxPlots(features, title):
    extremePoverty = train[train['Target'] == 1][features]
    moderatePoverty = train[train['Target'] == 2][features]
    vulnerableHouseholds = train[train['Target'] == 3][features]
    nonvulnerableHouseholds = train[train['Target'] == 4][features]


    box1 = go.Box(y=extremePoverty, name="Extreme", marker=dict(color="red"))
    box2 = go.Box(y=moderatePoverty, name="Moderate", marker=dict(color="orange"))
    box3 = go.Box(y=vulnerableHouseholds, name="Vulnerable", marker=dict(color="rgb(200,200,0)"))
    box4 = go.Box(y=nonvulnerableHouseholds, name="NonVulnerable", marker=dict(color="green"))
    data = [box1, box2, box3, box4]
    return data 

titles = ["Number of Rooms", "Number of Bedrooms", "Mobile Phones Owned", "Tablets Owned", "Age of the House", "Overcrowding Per Persons"]
figure = tools.make_subplots(rows=3, cols=2, print_grid=False, subplot_titles=titles)

res = boxPlots("rooms", "Number of ")
for x in res:
    figure.append_trace(x, 1, 1)
res = boxPlots("bedrooms", "Number of ")
for x in res:
    figure.append_trace(x, 1, 2)
res = boxPlots("qmobilephone", "Number of")
for x in res:
    figure.append_trace(x, 2, 1)
res = boxPlots("v18q1", "Number of")
for x in res:
    figure.append_trace(x, 2, 2)
res = boxPlots("age", "Number of")
for x in res:
    figure.append_trace(x, 3, 1)
res = boxPlots("overcrowding", "Number of")
for x in res:
    figure.append_trace(x, 3, 2)

figure['layout'].update(height=900, title="", barmode="stack", showlegend=False)
iplot(figure)

## Number of Rooms vs Outside Material for every income level

In [45]:
convertNumToPovertyLevel = {1:'Extreme Poverty', 2:'Moderate Poverty', 3: 'Vulnerable Households', 4:'Non Vulnerable Households'}
train['target'] = train['Target'].apply(lambda x : convertNumToPovertyLevel[x])
sns.set(rc={'figure.figsize':(15, 6)})
sns.boxplot(x="outside_material", y="rooms", hue="target",  palette="gist_stern", data=train)
plt.title("Effect of Number of Rooms and Outside Material on Income Level", fontsize=14)
plt.xticks(rotation='vertical')
plt.xlabel('Outside Material',fontsize=14)
plt.ylabel('Number of Rooms',fontsize=14)
plt.show()

## Number of Rooms vs. Floor Materials for every income level

In [46]:
sns.set(rc={'figure.figsize':(15, 6)})
sns.boxplot(x="floor_material", y="rooms", hue="target",  palette="rocket", data=train)
plt.title("Effect of Number of Rooms and Floor Material on Income Level", fontsize=14)
plt.xticks(rotation='vertical')
plt.xlabel('Floor Material',fontsize=14)
plt.ylabel('Number of Rooms',fontsize=14)
plt.show()

## Household Size vs Educational Levels for every income level

In [47]:
sns.set(rc={'figure.figsize':(15, 6)})
sns.boxplot(x="education_details", y="hhsize", hue="target",  palette="cool", data=train)
plt.title("Effect of Education Details and Household Size on Income Level", fontsize=14)
plt.xticks(rotation='vertical')
plt.xlabel('Education Details',fontsize=14)
plt.ylabel('Household Size',fontsize=14)
plt.show()

## Household Size vs. Family Member Status on Income Level

In [49]:
sns.set(rc={'figure.figsize':(15, 6)})
sns.boxplot(x="status_members", y="hhsize", hue="target",  palette="Spectral", data=train)
plt.title("Effect of Family Members Status and and Household Size on Income Levels", fontsize=14)
plt.xticks(rotation='vertical')
plt.xlabel('Family Members Status',fontsize=14)
plt.ylabel('Household Size',fontsize=14)
plt.show()

## Household Size vs Family Member Status on Income Levels

In [50]:
sns.set(rc={'figure.figsize':(15, 6)})
sns.boxplot(x="status_members", y="hhsize", hue="target",  palette="Spectral", data=train)
plt.title("Effect of Family Members Status and and Household Size on Income Levels", fontsize=14)
plt.xticks(rotation='vertical')
plt.xlabel('Family Members Status',fontsize=14)
plt.ylabel('Household Size',fontsize=14)
plt.show()