In [92]:
import pandas as pd
import numpy as np

import dash
# import dash_core_components as dcc
# import dash_html_components as html

from dash import dcc
from dash import html

from dash import Dash
from dash import callback 
from dash import Output
from dash import Input

import plotly.express as px
import plotly.graph_objects as go

In [93]:
app = dash.Dash()

In [94]:
county_data = pd.read_csv('../01-DataSet-acs2015_county_data.csv')
county_data.shape

(3220, 37)

In [95]:
county_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3220 entries, 0 to 3219
Data columns (total 37 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   CensusId         3220 non-null   int64  
 1   State            3220 non-null   object 
 2   County           3220 non-null   object 
 3   TotalPop         3220 non-null   int64  
 4   Men              3220 non-null   int64  
 5   Women            3220 non-null   int64  
 6   Hispanic         3220 non-null   float64
 7   White            3220 non-null   float64
 8   Black            3220 non-null   float64
 9   Native           3220 non-null   float64
 10  Asian            3220 non-null   float64
 11  Pacific          3220 non-null   float64
 12  Citizen          3220 non-null   int64  
 13  Income           3219 non-null   float64
 14  IncomeErr        3219 non-null   float64
 15  IncomePerCap     3220 non-null   int64  
 16  IncomePerCapErr  3220 non-null   int64  
 17  Poverty       

In [96]:
county_data.iloc[0:10,18:24]

Unnamed: 0,ChildPoverty,Professional,Service,Office,Construction,Production
0,18.6,33.2,17.0,24.2,8.6,17.1
1,19.2,33.1,17.7,27.1,10.8,11.2
2,45.3,26.8,16.1,23.1,10.8,23.1
3,27.9,21.5,17.9,17.8,19.0,23.7
4,27.2,28.5,14.1,23.9,13.5,19.9
5,38.4,18.8,15.0,19.7,20.1,26.4
6,39.2,27.5,16.6,21.9,10.3,23.7
7,31.6,27.3,17.7,24.2,10.5,20.4
8,37.2,23.3,14.5,26.3,11.5,24.4
9,30.1,29.3,16.0,19.5,13.7,21.5


In [97]:
state_code = pd.read_csv('../DataSet/state_codes.csv')
state_code.shape

(51, 2)

In [98]:
state_code.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 51 entries, 0 to 50
Data columns (total 2 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   State         51 non-null     object
 1   Abbreviation  51 non-null     object
dtypes: object(2)
memory usage: 944.0+ bytes


In [99]:
state_code.head()

Unnamed: 0,State,Abbreviation
0,Alabama,AL
1,Alaska,AK
2,Arizona,AZ
3,Arkansas,AR
4,California,CA


In [100]:
df = pd.merge(county_data, state_code, right_on = 'State', left_on = 'State')
df.head()

Unnamed: 0,CensusId,State,County,TotalPop,Men,Women,Hispanic,White,Black,Native,...,OtherTransp,WorkAtHome,MeanCommute,Employed,PrivateWork,PublicWork,SelfEmployed,FamilyWork,Unemployment,Abbreviation
0,1001,Alabama,Autauga,55221,26745,28476,2.6,75.8,18.5,0.4,...,1.3,1.8,26.5,23986,73.6,20.9,5.5,0.0,7.6,AL
1,1003,Alabama,Baldwin,195121,95314,99807,4.5,83.1,9.5,0.6,...,1.4,3.9,26.4,85953,81.5,12.3,5.8,0.4,7.5,AL
2,1005,Alabama,Barbour,26932,14497,12435,4.6,46.2,46.7,0.2,...,1.5,1.6,24.1,8597,71.8,20.8,7.3,0.1,17.6,AL
3,1007,Alabama,Bibb,22604,12073,10531,2.2,74.5,21.4,0.4,...,1.5,0.7,28.8,8294,76.8,16.1,6.7,0.4,8.3,AL
4,1009,Alabama,Blount,57710,28512,29198,8.6,87.9,1.5,0.3,...,0.4,2.3,34.9,22189,82.0,13.5,4.2,0.4,7.7,AL


In [101]:
# Black Minority population.
# df[['Black','Native','Asian','Pacific','White']]
df['BlackPop'] = (df['TotalPop'] * df['Black'] / 100).round().astype(np.int64)
df['WhitePop'] = (df['TotalPop'] * df['White'] / 100).round().astype(np.int64)
df['HispanicPop'] = (df['TotalPop'] * df['Hispanic'] / 100).round().astype(np.int64)
df['AsianPop'] = (df['TotalPop'] * df['Asian'] / 100).round().astype(np.int64)


In [102]:

black_by_state = df.groupby('Abbreviation')['BlackPop'].sum().reset_index()
black_by_state.columns = ['StateCode', 'Population']

black_by_state['Population'].sum()


38786341

In [103]:

asian_by_state = df.groupby('Abbreviation')['AsianPop'].sum().reset_index()
asian_by_state.columns = ['StateCode', 'Population']

asian_by_state['Population'].sum()


16052817

In [104]:

hispanic_by_state = df.groupby('Abbreviation')['HispanicPop'].sum().reset_index()
hispanic_by_state.columns = ['StateCode', 'Population']

hispanic_by_state['Population'].sum()


54223051

In [105]:

white_by_state = df.groupby('Abbreviation')['WhitePop'].sum().reset_index()
white_by_state.columns = ['StateCode', 'Population']

white_by_state['Population'].sum()
white_by_state.head()

Unnamed: 0,StateCode,Population
0,AK,457454
1,AL,3204691
2,AR,2177133
3,AZ,3751935
4,CA,14878726


In [106]:
__tmp1df = pd.merge(white_by_state, hispanic_by_state, right_on='StateCode', left_on='StateCode')
__tmp1df.columns = ['StateCode', 'WhitePopulation', 'HispanicPopulation']
# __tmp1df


In [107]:
__tmp2df = pd.merge(black_by_state, asian_by_state, right_on='StateCode', left_on='StateCode')
__tmp2df.columns = ['StateCode', 'BlackPopulation', 'AsianPopulation']
# __tmp2df

In [108]:
minority_df = pd.merge(__tmp1df, __tmp2df, right_on='StateCode', left_on='StateCode')
minority_df

Unnamed: 0,StateCode,WhitePopulation,HispanicPopulation,BlackPopulation,AsianPopulation
0,AK,457454,47870,23626,42337
1,AL,3204691,193190,1270591,59355
2,AR,2177133,203253,456640,39993
3,AZ,3751935,2015062,263021,193808
4,CA,14878726,14746536,2157960,5192712
5,CO,3645901,1112574,202913,150857
6,CT,2487570,527257,347028,149297
7,DC,230504,66043,310792,23309
8,DE,591726,80657,195086,33322
9,FL,11013330,4662131,3033830,499744


['OK', 'CO', 'NM', 'KS']


In [109]:
total_pop_df = df.groupby('Abbreviation')['TotalPop'].sum().reset_index()
total_pop_df.columns = ['StateCode', 'TotalPopulation'] 
# total_pop_df


In [110]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3142 entries, 0 to 3141
Data columns (total 42 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   CensusId         3142 non-null   int64  
 1   State            3142 non-null   object 
 2   County           3142 non-null   object 
 3   TotalPop         3142 non-null   int64  
 4   Men              3142 non-null   int64  
 5   Women            3142 non-null   int64  
 6   Hispanic         3142 non-null   float64
 7   White            3142 non-null   float64
 8   Black            3142 non-null   float64
 9   Native           3142 non-null   float64
 10  Asian            3142 non-null   float64
 11  Pacific          3142 non-null   float64
 12  Citizen          3142 non-null   int64  
 13  Income           3141 non-null   float64
 14  IncomeErr        3141 non-null   float64
 15  IncomePerCap     3142 non-null   int64  
 16  IncomePerCapErr  3142 non-null   int64  
 17  Poverty       

In [111]:
df['ProfessionalPop'] = (df['Professional'] * df['TotalPop'] / 100).round().astype(np.int64)


In [112]:
df['ServicePop'] = (df['Service'] * df['TotalPop'] / 100).round().astype(np.int64)


In [113]:
df['OfficePop'] = (df['Office'] * df['TotalPop'] / 100).round().astype(np.int64)


In [114]:
df['ConstructionPop'] = (df['Construction'] * df['TotalPop'] / 100).round().astype(np.int64)


In [115]:
df['ProductionPop'] = (df['Production'] * df['TotalPop'] / 100).round().astype(np.int64)


In [116]:

professional_df = df[['Abbreviation','ProfessionalPop','ServicePop','OfficePop','ConstructionPop','ProductionPop']].groupby('Abbreviation').sum().reset_index()  
professional_df.rename(columns={'Abbreviation': 'StateCode'}, inplace=True)
professional_df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 51 entries, 0 to 50
Data columns (total 6 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   StateCode        51 non-null     object
 1   ProfessionalPop  51 non-null     int64 
 2   ServicePop       51 non-null     int64 
 3   OfficePop        51 non-null     int64 
 4   ConstructionPop  51 non-null     int64 
 5   ProductionPop    51 non-null     int64 
dtypes: int64(5), object(1)
memory usage: 2.5+ KB


In [117]:
melted_df = pd.melt(professional_df, id_vars='StateCode', var_name='Jobs', value_name='TotalPeople') 

professional_melted_df = melted_df.sort_values('StateCode').reset_index().drop('index', axis=1, inplace=False)
professional_melted_df.head(10)

Unnamed: 0,StateCode,Jobs,TotalPeople
0,AK,ProfessionalPop,264786
1,AK,OfficePop,166775
2,AK,ConstructionPop,92913
3,AK,ProductionPop,80168
4,AK,ServicePop,128838
5,AL,OfficePop,1161333
6,AL,ProductionPop,787975
7,AL,ServicePop,812142
8,AL,ConstructionPop,485988
9,AL,ProfessionalPop,1582939


In [118]:
px.treemap(professional_melted_df, path=['StateCode', 'Jobs'], values="TotalPeople")

In [119]:
professional_melted_df.to_csv('profession_count_by_state.csv', index=False)

In [120]:
# # Just some simple random chart for test.
# app.layout = html.Div([
#     html.H1('Dash 101..'),
#     html.Div('Data Visualization Dashboarding Project'),
    
#     dcc.Graph(
#         id='mygraph1',
#         figure = {
#             'data' : [
#                 {'x': [4,6,8], 'y': [12, 16,18], 'type':'bar', 'name':'First Chart'}
#             ],
#             'layout': {
#                 'title': 'Simple Bar chart'
#             }
#         }
#     )
# ])

In [121]:
# if __name__ == '__main__':
#     app.run_server(port = 6969)

In [123]:
import dash
from dash import dcc, html
import plotly.express as px
import pandas as pd

# Sample statewise population data
# data = {
#     'State': ['CA', 'TX', 'FL', 'NY', 'IL'],
#     'Population': [39538223, 29145505, 21538187, 20383438, 12882135]
# }

# Create a DataFrame from the sample data
# df = pd.DataFrame(data)

# Initialize the Dash app
app = dash.Dash(__name__)

# Define the app layout
app.layout = html.Div([
    html.H1('US State Population Dashboard'),
    
    # Multi-select dropdown for selecting states
    dcc.Dropdown(
        id='state-dropdown',
        options=state_code.rename(columns={"State": "label", "Abbreviation": "value"}).to_dict('records'),
        multi=True,  # Allow multiple selections
        value=['OK', 'CO', 'NM', 'KS']
    ),
    
    # Bar graph of selected states' populations
    dcc.Graph(id='bar-chart'),
    
    # Choropleth map of selected states' populations
    dcc.Graph(id='choropleth-map'),
    
    dcc.Graph(id='hierarchical-plot-treemap'),

    dcc.Graph(id='strip-dot-chart')
])

# Callback function to update plots based on dropdown selection
@app.callback(
    [dash.dependencies.Output('choropleth-map', 'figure'),
     dash.dependencies.Output('bar-chart', 'figure'),
     dash.dependencies.Output('hierarchical-plot-treemap', 'figure'),
     dash.dependencies.Output('strip-dot-chart', 'figure')],
    [dash.dependencies.Input('state-dropdown', 'value')]
)
def update_plots(selected_states):
    percapita_df = df[['State', 'County', 'IncomePerCap', 'Abbreviation']]
    filtered_percapita_df = percapita_df[percapita_df['Abbreviation'].isin(selected_states)]

    filtered_df = total_pop_df[total_pop_df['StateCode'].isin(selected_states)]

    filtered_minority_df = minority_df[minority_df['StateCode'].isin(selected_states)]
    
    filtered_professional_df = professional_melted_df[professional_melted_df['StateCode'].isin(selected_states)]

    print(selected_states)
    # Create a bar graph
#     bar_chart = px.bar(filtered_minority_df, y='StateCode', x='BlackPopulation', color='HispanicPopulation', title='State Populations', orientation='h')
    
    bar_chart = go.Figure(data=[
    go.Bar(name='Black', x=filtered_minority_df['StateCode'].values.tolist(), y=minority_df['BlackPopulation'].values.tolist()),
    go.Bar(name='Hispanic', x=filtered_minority_df['StateCode'].values.tolist(), y=minority_df['HispanicPopulation'].values.tolist())
])
    
#     bar_chart.update_layout(barmode='group')


    choropleth_map = px.choropleth(
        filtered_df, 
        locations='StateCode', 
        locationmode='USA-states', 
        color='TotalPopulation', 
        title='Statewise Population',
        scope='usa',
#         width=800, height=400
    )

    treemap_chart = px.sunburst(filtered_professional_df, path=['StateCode', 'Jobs'], values="TotalPeople")

    strip_chart = px.strip(df, y='IncomePerCap', hover_data=['State', 'County', 'IncomePerCap'], width=200)
    
    return choropleth_map, bar_chart, treemap_chart, strip_chart



In [124]:
if __name__ == '__main__':
    app.run_server(port=7171, debug=True)

[1;31m---------------------------------------------------------------------------[0m
[1;31mNameError[0m                                 Traceback (most recent call last)
[1;31mNameError[0m: name 'selected_stated' is not defined

[1;31m---------------------------------------------------------------------------[0m
[1;31mNameError[0m                                 Traceback (most recent call last)
[1;31mNameError[0m: name 'selected_stated' is not defined



In [165]:
# state_code.rename(columns={"State": "label", "Abbreviation": "value"}).to_dict('records')

In [67]:
minority_df

Unnamed: 0,StateCode,WhitePopulation,HispanicPopulation,BlackPopulation,AsianPopulation
0,AK,457454,47870,23626,42337
1,AL,3204691,193190,1270591,59355
2,AR,2177133,203253,456640,39993
3,AZ,3751935,2015062,263021,193808
4,CA,14878726,14746536,2157960,5192712
5,CO,3645901,1112574,202913,150857
6,CT,2487570,527257,347028,149297
7,DC,230504,66043,310792,23309
8,DE,591726,80657,195086,33322
9,FL,11013330,4662131,3033830,499744


In [81]:
# df.info()