In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.figure_factory as ff

In [23]:
df = pd.read_csv('Cleaned_percent_data.csv',index_col='zip code tabulation area')
data_profile_attr = pd.read_csv('Data_Profile_Attributes.csv')

In [31]:
class ImportantAttributes():
    def __init__(self,zipcode):
        self.zipcode = zipcode
        self.data = df[df.index==zipcode].T[zipcode]
    
        self.houses = self.data['DP02_0001PE'] #not percent
        self.families = self.data['DP02_0002PE']
        self.non_families = self.data['DP02_0010PE']
        self.family_size = self.data['DP02_0016PE']
        '''print('The total number of houses in the area:',int(self.houses))
        labels = ['family','non-family']
        values = [self.families,self.non_families]
        fig = go.Figure(data=[go.Pie(labels=labels, values=values)])
        fig.show()
        print('The average family size:',self.family_size)'''
        
        self.population = self.data['DP02_0017PE'] #not percent
        self.spouse = self.data['DP02_0019PE']
        self.child = self.data['DP02_0020PE']
        self.owner = self.data['DP02_0018PE']
        self.fertility = self.data['DP02_0036PE']
        '''print('The total population in the area:',int(self.population))
        labels = ['owner','spouse','child']
        values = [self.owner,self.spouse,self.child]
        fig = go.Figure(data=[go.Pie(labels=labels, values=values)])
        fig.show()'''
        
        self.enrolled = self.data['DP02_0052PE'] #not percent
        self.nursery = self.data['DP02_0053PE']
        self.kinder = self.data['DP02_0054PE']
        self.elementary = self.data['DP02_0055PE']
        self.high_school = self.data['DP02_0056PE']
        self.college = self.data['DP02_0057PE']
    
        self.pop_above_25 = self.data['DP02_0058PE']
        self.less_than_12th = self.data['DP02_0059PE'] + self.data['DP02_0060PE']
        self.high_school_or_more = self.data['DP02_0066PE']
        self.bachelors_or_more = self.data['DP02_0067PE']
    
        self.foreign_born = self.data['DP02_0092PE']
        
        self.ancestry_dict = {}
        for i in range(123,150):
            ancestry_ = data_profile_attr[data_profile_attr['Name']=='DP02_0'+str(i)+'PE']['Label_3'].values[0]
            self.ancestry_dict[ancestry_] = self.data['DP02_0'+str(i)+'PE']
            
        self.pop_above_5 = self.data['DP02_0110PE']
        self.english = self.data['DP02_0111PE']
        self.spanish = self.data['DP02_0114PE']
        self.other_european = self.data['DP02_0116PE']
        self.asian_pacific = self.data['DP02_0118PE']
        self.other_language = self.data['DP02_0120PE']
    
        self.pop_above_16 = self.data['DP03_0001PE']
        self.labor_force = self.data['DP03_0002PE']
        self.unemployment_rate = self.data['DP03_0009PE']
    
        self.civil_labor_force = self.data['DP03_0026PE']
        self.management_science_art = self.data['DP03_0027PE']
        self.service = self.data['DP03_0028PE']
        self.sales = self.data['DP03_0029PE']
        self.construction = self.data['DP03_0030PE']
        self.production_transport = self.data['DP03_0031PE']
    
        self.industry_dict = {}
        for i in range(33,46):
            industry_ = data_profile_attr[data_profile_attr['Name']=='DP03_00'+str(i)+'PE']['Label_3'].values[0]
            self.industry_dict[industry_] = self.data['DP03_00'+str(i)+'PE']
    
        self.income_dict = {}
        for i in range(52,62):
            income_ = data_profile_attr[data_profile_attr['Name']=='DP03_00'+str(i)+'PE']['Label_3'].values[0]
            self.income_dict[income_] = self.data['DP03_00'+str(i)+'PE']
        self.median_income = self.data['DP03_0062PE']
    
        self.sex_ratio = self.data['DP05_0004PE']
        self.age_dict = {}
        for i in range(5,18):
            if(i<10):
                age_ = data_profile_attr[data_profile_attr['Name']=='DP05_000'+str(i)+'PE']['Label_3'].values[0]
                self.age_dict[age_] = self.data['DP05_000'+str(i)+'PE']
            else:
                age_ = data_profile_attr[data_profile_attr['Name']=='DP05_00'+str(i)+'PE']['Label_3'].values[0]
                self.age_dict[age_] = self.data['DP05_00'+str(i)+'PE']
        self.median_age = self.data['DP05_0018PE']
    
        self.white = self.data['DP05_0037PE']
        self.black = self.data['DP05_0038PE']
        self.american_indian = self.data['DP05_0039PE']
        self.asian = self.data['DP05_0044PE']
        self.indian = self.data['DP05_0045PE']
        self.chinese = self.data['DP05_0046PE']
        self.hawaiian_pacific = self.data['DP05_0052PE']
        self.hispanic = self.data['DP05_0071PE']
    
    def create_table(self,display=True):
        self.table_data = [['Attribute Name','Value'],
                      ['Total Population',int(self.population)],
                      ['Total number of Households',int(self.houses)],
                      ['Average Family Size',self.family_size],
                      ['Percentage of Families',self.families],
                      ['Percentage of Non Families',self.non_families],
                      ['Percentage of Householders',self.owner],
                      ['Percentage of Spouses',self.spouse],
                      ['Percentage of Children',self.child],
                      ['Number of women who had baby in the past 12 months',int(self.fertility)],
                      ['Number of children enrolled in school',int(self.enrolled)],
                      ['Percentage of enrolled children in nursery',self.nursery],
                      ['Percentage of enrolled children in kindergarten',self.kinder],
                      ['Percentage of enrolled children in elementary',self.elementary],
                      ['Percentage of enrolled children in high school',self.high_school],
                      ['Percentage of  enrolled children in college',self.college],
                      ['Number of people above 25 years',int(self.pop_above_25)],
                      #['Percentage of people with less than 12th education',self.less_than_12th],
                      ['Percentage of people with high school or more education',self.high_school_or_more],
                      ['Percentage of people with bachelors or more education',self.bachelors_or_more],
                      ['Percentage of people who are foreign born',self.foreign_born],
                      ['Percentage of people who speak only English',self.english],
                      ['Percentage of people who speak Spanish',self.spanish],
                      ['Percentage of people who speak other Indo-European language',self.other_european],
                      ['Percentage of people who speak Asian languages',self.asian_pacific],
                      ['Percentage of people who speak other languages',self.other_language]]
        for ancestry in self.ancestry_dict.keys():
            table_entry = ['Percentage of people with '+ancestry+' ancestry',self.ancestry_dict[ancestry]]
            self.table_data.append(table_entry)
        self.table_data += ([['Percentage of people above 16 years in labor force',self.labor_force],
                             ['Unemplyment rate',self.unemployment_rate]
                            ])
        if(display):
            fig = ff.create_table(self.table_data, height_constant=30,)
            fig.layout.margin.update({'t':50, 'b':100})
            fig.layout.update({'title': 'Important Attributes of zip code : '+str(self.zipcode)})
            fig.show()
        
    def create_plots(self):
        self.create_table(display=False)
        #self.table_data = np.array(self.table_data)
        fig = make_subplots(rows=1,cols=2,specs=[[{"type": "pie"}, {"type": "pie"}]])
        
        labels = [self.table_data[i][0] for i in range(4,6)]
        values = [self.table_data[i][1] for i in range(4,6)]
        fig.add_trace(go.Pie(labels=labels, values=values,domain=dict(x=[0, 0.5])),row=1,col=1)
        
        labels = [self.table_data[i][0] for i in range(6,9)]
        values = [self.table_data[i][1] for i in range(6,9)]
        fig.add_trace(go.Pie(labels=labels, values=values,domain=dict(x=[0.5, 1])),row=1,col=2)
        
        #fig.update_layout(height=600, width=600, title_text="Stacked Subplots")
        fig.show()

In [25]:
zipcode = 43964
attr_obj = ImportantAttributes(zipcode)
attr_obj.create_plots()

In [26]:
attr_obj.create_table()

In [32]:
layout = go.Layout(
   {
      "title":"Visualizations",
      "grid": {"rows": 2, "columns": 2},
      "annotations": [
         {
            "font": {
               "size": 15
            },
            "showarrow": False,
            "text": "Households",
            "x": 0.18,
            "y": 0.8
         },
         {
            "font": {
               "size": 15
            },
            "showarrow": False,
            "text": "Relationships",
            "x": 0.82,
            "y": 0.8
         }
      ]
   }
)

labels = [attr_obj.table_data[i][0] for i in range(4,6)]
values = [attr_obj.table_data[i][1] for i in range(4,6)]
data1 = {
   "values": values,
   "labels": labels,
   "domain": {"row":0,"column": 0},
   "name": "Households",
   "hoverinfo":"label+percent+name",
   "hole": .6,
   "type": "pie"
}
labels = [attr_obj.table_data[i][0] for i in range(6,9)]
values = [attr_obj.table_data[i][1] for i in range(6,9)]
data2 = {
   "values": values,
   "labels": labels,
   "domain": {"row":0,"column": 1},
   "name": "Relationships",
   "hoverinfo":"label+percent+name",
   "hole": .6,
   "type": "pie"
}
data = [data1,data2]
fig = go.Figure(data = data, layout = layout)
fig.update_layout(height=1200, width=1000)
fig.show()