## IMPORT REQUIRED LIBRARIES

In [1]:
# Import Required Libraries
import plotly.express as px
import os
import pandas as pd
import numpy as np

## IMPORT DATA (EXCEL FILE)

In [2]:
# Import Excel File as a dataframe using Pandas
ohio_data = pd.read_excel("./Data-and-analysis/Cleaned_for_cluster.xlsx",
                          sheet_name="ohio", header=0)
adam_data = pd.read_excel("./Data-and-analysis/Cleaned_for_cluster.xlsx",
                          sheet_name="adam", header=0)

# Take a look at first five rows of dataframe
ohio_data.head()

Unnamed: 0,NAICS_Sector,NAICS_Code_Sector,NAICS_Industry_Sector,Eri_2018,Eri_2015,Change_Eri,Per_change_Eri,LQ_2018,LQ_2015,Per_change_LQ,Per_change_Eni,National Share,Industry share,Regional share
0,"NAICS 11 Agriculture, forestry, fishing and hu...",NAICS 11,"Agriculture, forestry, fishing and hunting",16662,15919,743,4.66,0.36,0.34,5.88,1.15,84018.32,-65560.72,-17714.59
1,"NAICS 21 Mining, quarrying, and oil and gas ex...",NAICS 21,"Mining, quarrying, and oil and gas extraction",11859,13799,-1940,-14.05,0.48,0.49,-2.04,-10.42,72829.25,-216649.46,141880.21
2,NAICS 22 Utilities,NAICS 22,Utilities,19110,19200,-90,-0.46,0.94,0.92,2.17,-0.31,101334.99,-107455.44,6030.44
3,NAICS 23 Construction,NAICS 23,Construction,220709,200338,20371,10.16,0.83,0.83,0.0,12.48,1057356.8,1443814.25,-2480800.06
4,NAICS 31-33 Manufacturing,NAICS 31-33,Manufacturing,698950,685975,12975,1.89,1.49,1.48,0.67,2.89,3620483.07,-1632464.85,-1975043.21


### Generate new column for LQ value of 2018 compared to national average

In [3]:
lq = []
for value in ohio_data["LQ_2018"]:
    if value > 1:
        lq.append("LQ>1")
    elif value == 0:
        lq.append("LQ=1")
    else:
        lq.append("LQ<1")

ohio_data["LQ_comparison"] = lq

### Install the package from terminal or using !pip install plotly from jupyter notebook.

In [4]:
# Make sure the package is installed.
# pip install plotly # Try this code in terminal
# or try this in the code
package = "plotly"
try:
    __import__package
    print("imported")
except:
    os.system("pip install " + package)
    print("Successfully installed")

Successfully installed


### Ohio data: Plot cluster bubble chart using Plotly packaage

In [5]:
# Import plotly
ohio_bubble = px.scatter(
    ohio_data,
    x="Per_change_LQ",
    y="LQ_2018",
    # size="Eri_2018", # do not include size here to inspect all data points
    # color="LQ_comparison",
    hover_name="NAICS_Industry_Sector",
    size_max=None )

ohio_bubble.update_layout(
    title_text='Industry Cluster Bubble Chart for Ohio (with unallocated)', 
    title_x=0.5, #center alignment for title,
    xaxis=dict(title="Percent change in LQ (2015-2018)"),
    yaxis=dict(title="LQ in 2018"),
    font=dict(family="Helvetica", size=14, color="black"),
    hoverlabel=dict(bgcolor='white', font=dict(color='black')),)

# Add horizontal and vertical lines
ohio_bubble.add_shape(
    dict(type="line", x0=0, y0=0, x1=0, y1=2, line=dict(color="Black", width=0.4)))

ohio_bubble.add_shape(
    type="line", x0=-32, y0=1, x1=6, y1=1, line=dict(color="Black", width=0.4))

ohio_bubble.add_annotation(
    x=-30.76, y=0.09, text="Unclassified")

ohio_bubble.update_annotations(dict(
    xref="x", yref="y", font=dict(size=12),
    # showarrow=True,
    # arrowhead=7,
    ax=15, ay=-20  # location of text on the coordinates
))

ohio_bubble.show()

In [6]:
# Saves the file in pdf format (pdf format saves resolution of picture)
# Do you want the application “orca.app” to accept incoming network connections? Allow
ohio_bubble.write_image("./cluster_chart/ohio_bubble.pdf")

In [7]:
# Remove unclassified data point so that we can clearly see other datapoints
ohio_filtered = ohio_data[ohio_data.Per_change_LQ >= -30]

In [8]:
# Interactive plots using plotly....hover over data point to see the observations
ohio_bubble_filtered = px.scatter(
    ohio_filtered,
    x="Per_change_LQ", y="LQ_2018",
    size="Eri_2018", color="LQ_comparison", hover_name="NAICS_Industry_Sector", size_max=None)

ohio_bubble_filtered.update_xaxes(range=[-5, 6])
ohio_bubble_filtered.update_yaxes(range=[0.3, 1.8])
# ohio_bubble_filtered.update_yaxes(nticks=7)

# Add shapes
ohio_bubble_filtered.add_shape(
    # Line Vertical
    dict(type="line", x0=0, y0=0, x1=0, y1=2, line=dict(color="Black", width=0.4)))

ohio_bubble_filtered.add_shape(
    # Line Horizontal
    type="line", x0=-10, y0=1, x1=10, y1=1, line=dict(color="Black", width=0.4))

ohio_bubble_filtered.update_layout(
    hoverlabel=dict(bgcolor='white', font=dict(color='black')),
    title='Industry Cluster Bubble Chart for Ohio (relevant)', 
    title_x=0.5, #center alignment for title
    xaxis_title="Percent change in LQ (2015-2018)",
    yaxis_title="LQ in 2018",
    font=dict(family="Helvetica", size=14, color="black"),
    # plot_bgcolor='rgba(0,0,0,0)', paper_bgcolor='rgba(0,0,0,0)' #for background color for graph and plot
    # showlegend=False # to hide the legend
)

ohio_bubble_filtered.show() # print the figure

In [9]:
# Adding muliple Annotation to the figure
ohio_bubble_filtered.add_annotation(
    x=-3.57, y=1.62, text="Mgmnt of companies/enterprises")
ohio_bubble_filtered.add_annotation(
    x=0.67, y=1.49, text="Manufacturing")
ohio_bubble_filtered.add_annotation(
    x=5.88, y=0.36, text="Agr,forestry,fishery,hunting")
ohio_bubble_filtered.add_annotation(
    x=1.88, y=1.08, text="Wholesale trade")
ohio_bubble_filtered.add_annotation(
    x=5.05, y=1.04, text="Finance&Insurance")
ohio_bubble_filtered.add_annotation(
    x=-0.89, y=1.11, text="Healthcare&SocialAssistance")
ohio_bubble_filtered.add_annotation(
    x=-3.06, y=0.95, text="Administrative & waste service")
ohio_bubble_filtered.add_annotation(
    x=-0, y=1.04, text="Transportation & warehousing")


ohio_bubble_filtered.update_annotations(dict(
    xref="x", yref="y", font=dict(size=8),
    # showarrow=True,
    # arrowhead=7,
    ax=15, ay=-20  # location of text on the coordinates
))

ohio_bubble_filtered.show()  # print the figure 
#hover over the picture to see the text

In [10]:
# Saves the file in pdf format (pdf format saves resolution of picture)
# Do you want the application “orca.app” to accept incoming network connections? Allow
ohio_bubble_filtered.write_image("./cluster_chart/ohio_bubble_filtered.pdf")

# FOR ADAM COUNTY


In [11]:
# creating new variable LQ_comparison with national average
lq = []
for value in adam_data["LQ_2018"]:
    if value > 1:
        lq.append("LQ>1")
    elif value == 0:
        lq.append("LQ=1")
    else:
        lq.append("LQ<1")

adam_data["LQ_comparison"] = lq

In [12]:
adam_data.head()

Unnamed: 0,NAICS Sector,NAICS Code Sector,NAICS_Industry_Sector,Eri_2018,Eri_2015,Change_Eri,Per_change_Eri,LQ_2018,LQ_2015,Per_change_LQ,LQ_comparison
0,NAICS 22 Utilities,NAICS 22,Utilities,139,149,-10,-6.7,6.06,6.47,-6.3,LQ>1
1,NAICS 23 Construction,NAICS 23,Construction,264,150,114,76.0,0.88,0.56,57.1,LQ<1
2,NAICS 31-33 Manufacturing,NAICS 31-33,Manufacturing,856,718,138,19.2,1.63,1.4,16.4,LQ>1
3,NAICS 42 Wholesale trade,NAICS 42,Wholesale trade,254,232,22,9.4,1.04,0.95,9.4,LQ>1
4,NAICS 44-45 Retail trade,NAICS 44-45,Retail trade,866,886,-20,-2.2,1.32,1.36,-2.9,LQ>1


In [13]:
# Import plotly
adam_bubble = px.scatter(
    adam_data,
    x="Per_change_LQ",
    y="LQ_2018",
    size="Eri_2018",
    color="LQ_comparison",
    hover_name="NAICS_Industry_Sector",
    size_max=None)

adam_bubble.update_layout(
    title_text="Industry Cluster Bubble Chart for Adam's County, Ohio", 
    title_x=0.5, #center alignment for title,
    xaxis=dict(title="Percent change in LQ (2015-2018)"),
    yaxis=dict(title="LQ in 2018"),
    font=dict(family="Helvetica", size=14, color="black"),
    hoverlabel=dict(bgcolor='white', font=dict(color='black')))

# Add shapes
adam_bubble.add_shape(
    # Line Vertical
    dict(type="line", x0=0, y0=0, x1=0, y1=6.5, line=dict(color="Black", width=0.4)))

adam_bubble.add_shape(
    # Line Horizontal
    type="line", x0=-50, y0=1, x1=220, y1=1, line=dict(color="Black", width=0.4))

# Adding annotation to the figure
adam_bubble.add_annotation(
    x=200, y=0.12, text="Educational Services")
adam_bubble.add_annotation(
    x=-6.3, y=6.06, text="Utilities")

adam_bubble.update_annotations(dict(
    xref="x", yref="y", font=dict(size=12),
    # showarrow=True,
    # arrowhead=7,
    ax=15, ay=-20  # location of text on the coordinates
))

adam_bubble.show()  # print the figure

## check cluster bubble chart for Adams County

In [14]:
# Saves the file in pdf format (pdf format saves resolution of picture)
# Do you want the application “orca.app” to accept incoming network connections? Allow
adam_bubble.write_image("./cluster_chart/adam_bubble_all.pdf")

## Filter extremes for clear observation

In [15]:
# Remove extreme points. use the '&' operator to specify that both conditions are required
adam_filtered = adam_data[(adam_data.Per_change_LQ < 70) & (adam_data.LQ_2018 < 1.8)]

In [16]:
adam_filtered.shape
# adam_filtered.shape

(13, 11)

In [17]:
# Interactive plots using plotly....hover over data point to see the observations
adam_bubble_filtered = px.scatter(
    adam_filtered,
    x="Per_change_LQ", y="LQ_2018",
    size="Eri_2018", color="LQ_comparison", hover_name="NAICS_Industry_Sector", size_max=None)

# adam_bubble_filtered.update_xaxes(range=[-5, 6]) # update x and y axis ranges 
# adam_bubble_filtered.update_yaxes(range=[0.3, 1.8])
# ohio_bubble_filtered.update_yaxes(nticks=7)

# Add shapes
adam_bubble_filtered.add_shape(
    # Line Vertical
    dict(type="line", x0=0, y0=0, x1=0, y1=2, line=dict(color="Black", width=0.4)))

adam_bubble_filtered.add_shape(
    # Line Horizontal
    type="line", x0=-30, y0=1, x1=80, y1=1, line=dict(color="Black", width=0.4))

adam_bubble_filtered.update_layout(
    hoverlabel=dict(bgcolor='white', font=dict(color='black')),
    title="Industry Cluster Bubble Chart for Adam's County, Ohio (excluding 2)", 
    title_x=0.5, #center alignment for title
    xaxis_title="Percent change in LQ (2015-2018)",
    yaxis_title="LQ in 2018",
    font=dict(family="Helvetica", size=14, color="black"),
    # plot_bgcolor='rgba(0,0,0,0)', paper_bgcolor='rgba(0,0,0,0)' #for background color for graph and plot
    # showlegend=False # to hide the legend
)

adam_bubble_filtered.show() # print the figure

In [18]:
# Adding muliple Annotation to the figure
adam_bubble_filtered.add_annotation(
    x=16.4, y=1.63, text="Manufacturing")
adam_bubble_filtered.add_annotation(
    x=-2.9, y=1.32, text="Retail Trade")
adam_bubble_filtered.add_annotation(
    x=-12.9, y=1.01, text="Healthcare/Social Assistance")
adam_bubble_filtered.add_annotation(
    x=9.4, y=1.04, text="Wholesale trade")
adam_bubble_filtered.add_annotation(
    x=57.1, y=0.88, text="Construction")
adam_bubble_filtered.add_annotation(
    x=69.6, y=0.56, text="Administrative & waste service")
adam_bubble_filtered.add_annotation(
    x=10.1, y=0.76, text="Finance and Insurance")


adam_bubble_filtered.update_annotations(dict(
    xref="x", yref="y", font=dict(size=8),
    # showarrow=True,
    # arrowhead=7,
    ax=15, ay=-20  # location of text on the coordinates
))

adam_bubble_filtered.show()  # print the figure

In [19]:
# Saves the file in pdf format (pdf format saves resolution of picture)
# Do you want the application “orca.app” to accept incoming network connections? Allow
adam_bubble_filtered.write_image("./cluster_chart/adam_bubble_filtered.pdf")

In [20]:
# to convert jupyter notebook into html, run this code on terminal or here using !

# pip install nbconvert
! jupyter nbconvert --to html clustercharts.ipynb  

[NbConvertApp] Converting notebook clustercharts.ipynb to html
[NbConvertApp] Writing 685967 bytes to clustercharts.html
