# Percentages of Different Races in Each USA State in 2015

In [1]:
import plotly.plotly as py
import pandas as pd
import numpy as np
import plotly.graph_objs as go
import matplotlib.pyplot as plt
#Used Offline Libraries Since Azure Notebook won't allow running the graphs online (linling to Plotly API)
%matplotlib inline
from plotly.offline import *
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode( connected = True )

In [2]:
race = pd.read_excel('2015 Racial Data _ State.xls','raw_data.csv')
race.head()
race.info() 
## Max # of values is 52

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 52 entries, 0 to 51
Data columns (total 9 columns):
Location                                  52 non-null object
White                                     52 non-null float64
Black                                     51 non-null float64
Hispanic                                  52 non-null float64
Asian                                     47 non-null float64
American Indian/Alaska Native             9 non-null float64
Native Hawaiian/Other Pacific Islander    13 non-null float64
Two Or More Races                         45 non-null float64
Total                                     52 non-null int64
dtypes: float64(7), int64(1), object(1)
memory usage: 3.7+ KB


In [3]:
## there are NaN values--> must be replaced by 0 
## will have to delete the [ American Indian/Alaska Native & Native Hawaiian/Other Pacific Islander]
## since there is no enough data about them
## No need for the 2 
raceCopy = race.copy()
raceCopy.drop(['Native Hawaiian/Other Pacific Islander', 'American Indian/Alaska Native','Total','Two Or More Races'], axis=1, inplace=True)

# replaceing NaN with the mean values of the columns
raceCopy['Black'].fillna(race['White'].mean(), inplace = True)
raceCopy['Asian'].fillna(race['Asian'].mean(), inplace = True)

raceCopy.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 52 entries, 0 to 51
Data columns (total 5 columns):
Location    52 non-null object
White       52 non-null float64
Black       52 non-null float64
Hispanic    52 non-null float64
Asian       52 non-null float64
dtypes: float64(4), object(1)
memory usage: 2.1+ KB


In [4]:
# Rounding the float values to 3 decimal places
raceCopy["White"] = (raceCopy["White"].round(3))*100
raceCopy["Black"] = (raceCopy["Black"].round(3))*100
raceCopy["Hispanic"] = (raceCopy["Hispanic"].round(3))*100
raceCopy["Asian"] = (raceCopy["Asian"].round(3))*100
raceCopy.head(6)

Unnamed: 0,Location,White,Black,Hispanic,Asian
0,United States,61.0,12.0,18.0,6.0
1,Alabama,65.0,27.0,4.0,4.6
2,Alaska,58.0,3.0,6.0,7.0
3,Arizona,51.0,4.0,37.0,3.0
4,Arkansas,74.0,15.0,7.0,2.0
5,California,39.0,6.0,38.0,15.0


In [5]:
## choosing states that have higher [# & percentage of homeless] AND [cost of living]
#1
IsHawaii = raceCopy['Location']=='Hawaii'
Hawaii  = raceCopy[IsHawaii]
#2
IsCalifornia = raceCopy['Location']=='California'
California  = raceCopy[IsCalifornia]
#3
IsNew_Mexico = raceCopy['Location']=='New Mexico'
New_Mexico  = raceCopy[IsNew_Mexico]
#4
IsNevada = raceCopy['Location']=='Nevada'
Nevada  = raceCopy[IsNevada]
#5
IsTexas = raceCopy['Location']=='Texas'
Texas  = raceCopy[IsTexas]
#6
IsNew_York = raceCopy['Location']=='New York'
New_York  = raceCopy[IsNew_York]
#7
IsDistrict_of_Columbia = raceCopy['Location']=='District of Columbia'
District_of_Columbia  = raceCopy[IsDistrict_of_Columbia]
#8
IsWashington = raceCopy['Location']=='Washington'
Washington  = raceCopy[IsWashington]


In [6]:
## create a new dataframe to be used in the graph using the previous row values
CertainStates = pd.concat([New_Mexico , Nevada,Texas, Washington,District_of_Columbia,Hawaii,New_York,California], ignore_index=True)

CertainStates

Unnamed: 0,Location,White,Black,Hispanic,Asian
0,New Mexico,38.0,2.0,44.0,1.0
1,Nevada,50.0,9.0,28.0,8.0
2,Texas,44.0,12.0,37.0,5.0
3,Washington,69.0,3.0,13.0,8.0
4,District of Columbia,37.0,46.0,11.0,5.0
5,Hawaii,19.0,2.0,10.0,39.0
6,New York,58.0,14.0,18.0,8.0
7,California,39.0,6.0,38.0,15.0


In [7]:

# Drawing a Stacked Bar chart 

trace0 = go.Bar(
            x=CertainStates['White'],
            y=CertainStates['Location'],
            name = 'White',
            orientation='h',
            marker=dict(
                color='rgb(128,177,211)',
                line=dict(
                        color='rgb(248, 248, 249)',
                        width=1)
            )
        )
trace1 = go.Bar(
            x=CertainStates['Black'],
            y=CertainStates['Location'],
            name = 'Black',
            orientation='h',
            marker=dict(
                color='rgb(253,180,98)',
                line=dict(
                        color='rgb(248, 248, 249)',
                        width=1)
            )
        )
    
trace2 = go.Bar(
            x=CertainStates['Hispanic'],
            y=CertainStates['Location'],
            name = 'Hispanic',
            orientation='h',
            marker=dict(
                color='rgb(179,222,105)', 
                line=dict(
                        color='rgb(248, 248, 249)',
                        width=1)
            )
        )
trace3 = go.Bar(
            x=CertainStates['Asian'],
            y=CertainStates['Location'],
            name = 'Asian',
            orientation='h',
            marker=dict(
                color='rgb(251,128,114)',
                line=dict(
                        color='rgb(248, 248, 249)',
                        width=1)
            )
        )
    
data = [trace0, trace1,trace2,trace3]
 

layout = go.Layout(
    title='Percentages of Race for Certain USA States, 2015',
    yaxis=dict(
        showgrid=False,
        showline=False,
        showticklabels=True,
    ),
    xaxis=dict(
        zeroline=False,
        
        showline=False,
        showticklabels=True,
        showgrid=True,
    ),
    margin=dict(
        l=120,
        r=20,
        t=70,
        b=70,
    ),
    barmode='stack'
)



fig = go.Figure(data=data, layout=layout)

iplot(fig)