# Homelessness with Population Datasets 

In [1]:
import plotly.plotly as py
import pandas as pd
import numpy as np
import plotly.graph_objs as go
import matplotlib.pyplot as plt
%matplotlib inline
from plotly.offline import *
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode( connected = True )


In [54]:
homelessness = pd.read_csv('2007-2016-Homelessnewss-USA.csv')
population =  pd.read_csv('Population-by-state.csv')
# reading first 3 columns
homelessness.head(3)


Unnamed: 0,Year,State,CoC Number,CoC Name,Measures,Count
0,1/1/2007,AK,AK-500,Anchorage CoC,Chronically Homeless Individuals,224
1,1/1/2007,AK,AK-500,Anchorage CoC,Homeless Individuals,696
2,1/1/2007,AK,AK-500,Anchorage CoC,Homeless People in Families,278


In [55]:
population.head(10)

Unnamed: 0,GEO.id,GEO.id2,GEO.display-label,rescen42010,resbase42010,respop72010,respop72011,respop72012,respop72013,respop72014,respop72015,respop72016
0,Id,Id2,Geography,"April 1, 2010 - Census","April 1, 2010 - Estimates Base",Population Estimate (as of July 1) - 2010,Population Estimate (as of July 1) - 2011,Population Estimate (as of July 1) - 2012,Population Estimate (as of July 1) - 2013,Population Estimate (as of July 1) - 2014,Population Estimate (as of July 1) - 2015,Population Estimate (as of July 1) - 2016
1,0400000US01,1,Alabama,4779736,4780131,4785492,4799918,4815960,4829479,4843214,4853875,4863300
2,0400000US02,2,Alaska,710231,710249,714031,722713,731089,736879,736705,737709,741894
3,0400000US04,4,Arizona,6392017,6392301,6408312,6467163,6549634,6624617,6719993,6817565,6931071
4,0400000US05,5,Arkansas,2915918,2916025,2921995,2939493,2950685,2958663,2966912,2977853,2988248
5,0400000US06,6,California,37253956,37254522,37332685,37676861,38011074,38335203,38680810,38993940,39250017
6,0400000US08,8,Colorado,5029196,5029324,5048644,5118360,5189867,5267603,5349648,5448819,5540545
7,0400000US09,9,Connecticut,3574097,3574114,3579899,3589893,3593795,3596003,3591873,3584730,3576452
8,0400000US10,10,Delaware,897934,897936,899816,907924,916993,925395,934948,944076,952065
9,0400000US11,11,District of Columbia,601723,601766,605183,620477,635327,649165,659005,670377,681170


In [56]:
# Cleaning datasets 
# Begining with Population DataSet 
# 1. taking the first row and making it the header
population.columns = population.iloc[0]
# 2. removing the 0 row
population.drop(0,axis=0, inplace=True)
# 3. Removing Unneeded columns
population.drop(['Id', 'Id2','April 1, 2010 - Census', 'April 1, 2010 - Estimates Base'], axis=1, inplace=True)
# 4. Renaming the columns, titles are too long
population.columns = ['State','pop2010','pop2011','pop2012','pop2013','pop2014','pop2015','pop2016']
population.head(3)



Unnamed: 0,State,pop2010,pop2011,pop2012,pop2013,pop2014,pop2015,pop2016
1,Alabama,4785492,4799918,4815960,4829479,4843214,4853875,4863300
2,Alaska,714031,722713,731089,736879,736705,737709,741894
3,Arizona,6408312,6467163,6549634,6624617,6719993,6817565,6931071


In [57]:
population.head(10)

Unnamed: 0,State,pop2010,pop2011,pop2012,pop2013,pop2014,pop2015,pop2016
1,Alabama,4785492,4799918,4815960,4829479,4843214,4853875,4863300
2,Alaska,714031,722713,731089,736879,736705,737709,741894
3,Arizona,6408312,6467163,6549634,6624617,6719993,6817565,6931071
4,Arkansas,2921995,2939493,2950685,2958663,2966912,2977853,2988248
5,California,37332685,37676861,38011074,38335203,38680810,38993940,39250017
6,Colorado,5048644,5118360,5189867,5267603,5349648,5448819,5540545
7,Connecticut,3579899,3589893,3593795,3596003,3591873,3584730,3576452
8,Delaware,899816,907924,916993,925395,934948,944076,952065
9,District of Columbia,605183,620477,635327,649165,659005,670377,681170
10,Florida,18849098,19096952,19344156,19582022,19888741,20244914,20612439


In [58]:
# Before cleaning, we need to check data types of the columns
homelessness.dtypes

Year          object
State         object
CoC Number    object
CoC Name      object
Measures      object
Count         object
dtype: object

In [59]:
homelessness.head(5)

Unnamed: 0,Year,State,CoC Number,CoC Name,Measures,Count
0,1/1/2007,AK,AK-500,Anchorage CoC,Chronically Homeless Individuals,224
1,1/1/2007,AK,AK-500,Anchorage CoC,Homeless Individuals,696
2,1/1/2007,AK,AK-500,Anchorage CoC,Homeless People in Families,278
3,1/1/2007,AK,AK-500,Anchorage CoC,Sheltered Chronically Homeless Individuals,187
4,1/1/2007,AK,AK-500,Anchorage CoC,Sheltered Homeless,842


In [60]:
## Since every column is an object ,, we need to convert Count and Year to integers
# Cleaning datasets 
# Homelessness DataSet 
# 1. converting Count to String (remove the comma) and then to an integer
homelessness['Count'] = homelessness['Count'].str.replace(',', '')
homelessness['Count'] = pd.to_numeric(homelessness['Count'])
# 2. Removing Unneeded columns
## Continuum of Care (CoC) 
homelessness.drop(['CoC Number','CoC Name'], axis=1, inplace=True)
## we want to show in our map, the state & # of homeless people (count) Only
homelessness.drop(['Measures'], axis=1, inplace=True)
# 3. Converting Year to dateTime and then to an integer
## so that we can check for any year we want 
homelessness['Year'] = pd.to_datetime(homelessness['Year'])
homelessness['Year'] = homelessness['Year'].dt.year
homelessness.head(5)


Unnamed: 0,Year,State,Count
0,2007,AK,224
1,2007,AK,696
2,2007,AK,278
3,2007,AK,187
4,2007,AK,842


In [61]:
homelessness.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 86529 entries, 0 to 86528
Data columns (total 3 columns):
Year     86529 non-null int64
State    86529 non-null object
Count    86529 non-null int64
dtypes: int64(2), object(1)
memory usage: 2.0+ MB


In [62]:
# Now, we will check for the general information about the Homelessness datasets --> Data Types are presented 
homelessness.info()
## from the result--> we have 86,529 rows and 4 columns in total (after removing the 2 columns Coc Number & Name)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 86529 entries, 0 to 86528
Data columns (total 3 columns):
Year     86529 non-null int64
State    86529 non-null object
Count    86529 non-null int64
dtypes: int64(2), object(1)
memory usage: 2.0+ MB


In [63]:
# Now, we will check for the general information about the Population datasets --> Data Types are presented 
population.info()
## from the result--> we have 52 rows and 7 columns in total (from 2010-2016)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 52 entries, 1 to 52
Data columns (total 8 columns):
State      52 non-null object
pop2010    52 non-null object
pop2011    52 non-null object
pop2012    52 non-null object
pop2013    52 non-null object
pop2014    52 non-null object
pop2015    52 non-null object
pop2016    52 non-null object
dtypes: object(8)
memory usage: 3.7+ KB


In [64]:
# the last step is checking for the Null Values in both DataSets
homelessness.isnull().sum()
## No NULL Values 

Year     0
State    0
Count    0
dtype: int64

In [65]:
# the last step is checking for the Null Values in both DataSets
population.isnull().sum()
## No NULL Values 

State      0
pop2010    0
pop2011    0
pop2012    0
pop2013    0
pop2014    0
pop2015    0
pop2016    0
dtype: int64

In [66]:
StatesFullNames = [ 'Alaska', 'Alabama', 'Arkansas', 'Arizona', 
'California', 'Colorado', 'Connecticut', 'District of Columbia', 
'Delaware', 'Florida', 'Georgia','Guam','Hawaii', 'Iowa', 
'Idaho','Illinois', 'Indiana', 'Kansas', 'Kentucky',
'Louisiana','Massachusetts', 'Maryland', 'Maine', 'Michigan', 
'Minnesota', 'Missouri', 'Mississippi', 'Montana', 
'North Carolina', 'North Dakota', 'Nebraska', 'New Hampshire', 
'New Jersey','New Mexico', 'Nevada', 'New York', 'Ohio', 
'Oklahoma', 'Oregon', 'Pennsylvania', 'Puerto Rico', 
'Rhode Island', 'South Carolina', 'South Dakota', 'Tennessee', 
'Texas','Utah', 'Virginia', 'Virgin Islands','Vermont', 
'Washington', 'Wisconsin', 'West Virginia', 'Wyoming']


In [67]:
# To draw our map
years = [2007, 2008, 2009,2010,2011,
         2012, 2013, 2014, 2015]
# To draw our map

## making a copy of the homeless dataset to use it in our map
homelessnessCopy1 = homelessness.copy()

homelessnessCopy1.head()

Unnamed: 0,Year,State,Count
0,2007,AK,224
1,2007,AK,696
2,2007,AK,278
3,2007,AK,187
4,2007,AK,842


In [68]:
homelessnessCopy1.head(10)

Unnamed: 0,Year,State,Count
0,2007,AK,224
1,2007,AK,696
2,2007,AK,278
3,2007,AK,187
4,2007,AK,842
5,2007,AK,589
6,2007,AK,253
7,2007,AK,974
8,2007,AK,37
9,2007,AK,132


In [69]:
# Converting the Years rows to columns and setting the # of homeless to max to avoid duplicates
homelessnessCopy1 = homelessnessCopy1.groupby(['State', 'Year'])['Count'].max().unstack()
# removing the first index column instead of having 
#             Year  2007  2008  2009
#             State 
# we want to have State 2007  2008  2009 and so on
homelessnessCopy1 = homelessnessCopy1.reset_index().rename_axis(None).rename_axis(None, axis=1)
# Adding the States full names column to the dataset
homelessnessCopy1 = homelessnessCopy1.rename(columns={'State':'StateAbbrev'})
homelessnessCopy1['State'] = pd.Series(StatesFullNames)
homelessnessCopy1.head()

Unnamed: 0,StateAbbrev,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
0,AK,974,1023,1267,1231,1223,1147,1122,1023,1208,1105,Alaska
1,AL,2104,2104,2273,2273,1950,1707,1469,1329,1153,1228,Alabama
2,AR,1822,1811,1425,1425,1276,1873,1678,1074,830,808,Arkansas
3,AZ,8448,7189,7889,6999,5831,6485,5889,5918,5631,5702,Arizona
4,CA,47862,47862,33243,33243,34622,31553,35524,34393,41174,43854,California


In [70]:
population1 = population.copy()
population1.info()
population1['pop2015'] = population1['pop2015'].str.replace(',', '')
population1['pop2015'] = pd.to_numeric(population1['pop2015'])
homelessnessCopy12 = homelessnessCopy1.copy()
population1.head(10)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 52 entries, 1 to 52
Data columns (total 8 columns):
State      52 non-null object
pop2010    52 non-null object
pop2011    52 non-null object
pop2012    52 non-null object
pop2013    52 non-null object
pop2014    52 non-null object
pop2015    52 non-null object
pop2016    52 non-null object
dtypes: object(8)
memory usage: 3.7+ KB


Unnamed: 0,State,pop2010,pop2011,pop2012,pop2013,pop2014,pop2015,pop2016
1,Alabama,4785492,4799918,4815960,4829479,4843214,4853875,4863300
2,Alaska,714031,722713,731089,736879,736705,737709,741894
3,Arizona,6408312,6467163,6549634,6624617,6719993,6817565,6931071
4,Arkansas,2921995,2939493,2950685,2958663,2966912,2977853,2988248
5,California,37332685,37676861,38011074,38335203,38680810,38993940,39250017
6,Colorado,5048644,5118360,5189867,5267603,5349648,5448819,5540545
7,Connecticut,3579899,3589893,3593795,3596003,3591873,3584730,3576452
8,Delaware,899816,907924,916993,925395,934948,944076,952065
9,District of Columbia,605183,620477,635327,649165,659005,670377,681170
10,Florida,18849098,19096952,19344156,19582022,19888741,20244914,20612439


In [71]:
homelessnessCopy1.head()

Unnamed: 0,StateAbbrev,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State
0,AK,974,1023,1267,1231,1223,1147,1122,1023,1208,1105,Alaska
1,AL,2104,2104,2273,2273,1950,1707,1469,1329,1153,1228,Alabama
2,AR,1822,1811,1425,1425,1276,1873,1678,1074,830,808,Arkansas
3,AZ,8448,7189,7889,6999,5831,6485,5889,5918,5631,5702,Arizona
4,CA,47862,47862,33243,33243,34622,31553,35524,34393,41174,43854,California


In [72]:

homelessnessCopy1 = homelessnessCopy1.merge(population1, on='State')
homelessnessCopy1.head()

Unnamed: 0,StateAbbrev,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State,pop2010,pop2011,pop2012,pop2013,pop2014,pop2015,pop2016
0,AK,974,1023,1267,1231,1223,1147,1122,1023,1208,1105,Alaska,714031,722713,731089,736879,736705,737709,741894
1,AL,2104,2104,2273,2273,1950,1707,1469,1329,1153,1228,Alabama,4785492,4799918,4815960,4829479,4843214,4853875,4863300
2,AR,1822,1811,1425,1425,1276,1873,1678,1074,830,808,Arkansas,2921995,2939493,2950685,2958663,2966912,2977853,2988248
3,AZ,8448,7189,7889,6999,5831,6485,5889,5918,5631,5702,Arizona,6408312,6467163,6549634,6624617,6719993,6817565,6931071
4,CA,47862,47862,33243,33243,34622,31553,35524,34393,41174,43854,California,37332685,37676861,38011074,38335203,38680810,38993940,39250017


In [73]:
homelessnessCopy1.head(54)

Unnamed: 0,StateAbbrev,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,State,pop2010,pop2011,pop2012,pop2013,pop2014,pop2015,pop2016
0,AK,974,1023,1267,1231,1223,1147,1122,1023,1208,1105,Alaska,714031,722713,731089,736879,736705,737709,741894
1,AL,2104,2104,2273,2273,1950,1707,1469,1329,1153,1228,Alabama,4785492,4799918,4815960,4829479,4843214,4853875,4863300
2,AR,1822,1811,1425,1425,1276,1873,1678,1074,830,808,Arkansas,2921995,2939493,2950685,2958663,2966912,2977853,2988248
3,AZ,8448,7189,7889,6999,5831,6485,5889,5918,5631,5702,Arizona,6408312,6467163,6549634,6624617,6719993,6817565,6931071
4,CA,47862,47862,33243,33243,34622,31553,35524,34393,41174,43854,California,37332685,37676861,38011074,38335203,38680810,38993940,39250017
5,CO,8698,8482,8752,8752,9283,9283,6316,6621,5335,5728,Colorado,5048644,5118360,5189867,5267603,5349648,5448819,5540545
6,CT,3563,3781,3780,3372,3451,3307,3389,3367,3134,3016,Connecticut,3579899,3589893,3593795,3596003,3591873,3584730,3576452
7,DC,5320,6044,6228,6539,6546,6954,6865,7748,7298,8350,District of Columbia,605183,620477,635327,649165,659005,670377,681170
8,DE,1061,933,1130,982,1035,1008,946,901,953,1070,Delaware,899816,907924,916993,925395,934948,944076,952065
9,FL,6483,6483,7473,7473,7336,7419,4378,4156,4152,4235,Florida,18849098,19096952,19344156,19582022,19888741,20244914,20612439


In [74]:
perc2015 = 100 * (homelessnessCopy1[2015]/ homelessnessCopy1['pop2015']) 
homelessnessCopy1.drop([2007,2008,2009,2010, 2011,2012,2013,2014,2016,'pop2010','pop2011','pop2012','pop2013','pop2014','pop2016'], axis=1, inplace=True)

In [75]:
homelessnessCopy1['perc2015'] = perc2015
homelessnessCopy1.head(10)

Unnamed: 0,StateAbbrev,2015,State,pop2015,perc2015
0,AK,1208,Alaska,737709,0.16375
1,AL,1153,Alabama,4853875,0.023754
2,AR,830,Arkansas,2977853,0.027872
3,AZ,5631,Arizona,6817565,0.082595
4,CA,41174,California,38993940,0.105591
5,CO,5335,Colorado,5448819,0.097911
6,CT,3134,Connecticut,3584730,0.087426
7,DC,7298,District of Columbia,670377,1.088641
8,DE,953,Delaware,944076,0.100945
9,FL,4152,Florida,20244914,0.020509


In [76]:
Colorscl = [[0.0, 'rgb(255, 230, 230)'],[0.2, 'rgb(255, 153, 153)'],
            [0.4, 'rgb(255, 77, 77)'], [0.6, 'rgb(255, 0, 0)'],
            [0.8, 'rgb(179, 0, 0)'],[1.0, 'rgb(128, 0, 0)']]

#Color shading from  https://www.w3schools.com/colors/colors_picker.asp

data_Homeless = []

data_2007 = [dict(type='choropleth',
                colorscale = Colorscl,
                zmin =0, zmax=60000,
                autocolorscale = False,
                locations = homelessnessCopy12['StateAbbrev'],
                z = homelessnessCopy12[2007],
                locationmode = 'USA-states',
                text = homelessnessCopy12['State'],
                marker = dict(line = dict(color = 'rgb(220,220,220)',
                                          width = 1)),
                visible = True,
                colorbar = dict(title = "Number of Homeless"))]
    
data_Homeless.extend(data_2007)

for i in years[1:]:
    data_Homeless_upd = [dict(type='choropleth',
                      colorscale = Colorscl, 
                      zmin =0, zmax=60000,  
                      autocolorscale = False,
                      locations = homelessnessCopy12['StateAbbrev'],
                      z = homelessnessCopy12[i],
                      locationmode = 'USA-states',
                      text = homelessnessCopy12['State'],
                      marker = dict(line = dict(color = 'rgb(220,220,220)',
                                                width = 1)),
                      visible = False,
                      colorbar = dict(title = "Number of Homeless"))]
    
    data_Homeless.extend(data_Homeless_upd)
    
   


In [77]:

# setting the menues (Years' Sliders) inside the plot
levels = []  
yearsNo = 0 
for i in range(0,len(data_Homeless)):
    level = dict(method = "restyle",
                 args = ["visible", [False]*len(data_Homeless)],
                 label = years[yearsNo]) 
    level['args'][1][i] = True
    levels.append(level)
    yearsNo += 1
    
    
    sliders = [dict(active = 10,
                currentvalue = {"prefix": "Year: "},
                pad = {"t": 50},
                steps = levels)]

# Setting the layout
layout = dict(title = 'Number of Homeless in USA per state',
              geo = dict(scope='usa',
                         projection=dict( type='albers usa' ),
                         showlakes = True,
                         lakecolor = 'rgb(255, 255, 255)'),
              sliders = sliders)

In [78]:
fig = dict(data=data_Homeless, layout=layout)

In [79]:
## drawing the map to see which states have higher # of homeless people
iplot(fig)
## we can see from the map that the number of homeless individuals in the US
## have decreased over the years

In [80]:
## Now, we will see the percentage of homeless to the population of each state

import plotly.graph_objs as go


x = homelessnessCopy1['StateAbbrev']
y = homelessnessCopy1['perc2015']


data = [go.Bar(
            x=x,
            y=y,
            text = homelessnessCopy1['State'],
             marker={'color': y,'colorscale': Colorscl,  'showscale':True}
)]

layout=go.Layout(
    title="Percentages of Homeless for Each State, 2015",
    xaxis=dict(
         title='States Abbrevation',
        titlefont=dict(
            size=16,
            color='rgb(107, 107, 107)'
        ),
          tickangle=-45,
        tickfont=dict(
            size=12,
            color='rgb(107, 107, 107)'
        )
    ),
    yaxis=dict(
         title='Percentage of Homeless',
         titlefont=dict(
            size=16,
            color='rgb(107, 107, 107)'
        ),
         tickfont=dict(
            size=12,
            color='rgb(107, 107, 107)'
        )
    )
)


fig2 = go.Figure(data=data, layout=layout)

iplot(fig2)
