In [2]:
import pandas as pd
import numpy as np

## Key Questions
## Refactored Key Questions
- Which **counties** in Colorado have the highest number of business filings?
- What specific **regions** in Colorado show the highest number of business filings? *(Note: Define "regions" if it refers to specific geographical/administrative areas.)*
- At the state level, which **types of businesses** or **industries** are most prevalent?
- On a county basis, which **types of businesses** or **industries** are most common?
- In different regions, what are the most common **types of businesses** or **industries**?
- **Trend Analysis:** Is there an increasing trend in the number of business filings in Colorado over the past 5 years?
- **Trend Analysis:** What has been the trend in business filings over the past 10 years?
- **Trend Analysis:** How have business filings varied month-over-month in the last 12 months?


In [3]:
# import business.csv
df = pd.read_csv('Business.csv')

In [4]:
df.head()

Unnamed: 0,entityid,entityname,principaladdress1,principaladdress2,principalcity,principalstate,principalzipcode,principalcountry,mailingaddress1,mailingaddress2,...,agentprincipalstate,agentprincipalzipcode,agentprincipalcountry,agentmailingaddress1,agentmailingaddress2,agentmailingcity,agentmailingstate,agentmailingzipcode,agentmailingcountry,entityformdate
0,18861217679,"DENVER UNION CORPROATION, Dissolved January 17...",1512 LARIMER STREET #760,,Denver,CO,80202,,,,...,CO,80202.0,US,,,,,,,03/29/1886
1,19871114819,GROWER-SHIPPER POTATO COMPANY,0132 CO RD 34,,Monte Vista,CO,81144,US,PO BOX 432,,...,CO,81101.0,US,PO BOX 809,,ALAMOSA,CO,81101.0,US,06/28/1948
2,18911012261,"GRADEN MERCANTILE COMPANY (THE), Dissolved May...",777 Main Ave.,,Durango,CO,801302,US,P.O. Box Dr F,,...,CO,81301.0,US,,,,,,,07/01/1891
3,19031090218,"A. CARBONE AND COMPANY, INC., Dissolved Decemb...",846 East Eighteenth Avenue,,Denver,CO,80218,US,,,...,,,,,,,,,,03/31/1903
4,19871132823,"WIREMAN'S BROTHERHOOD FUND, INC.",5660 Logan St,,Denver,CO,80216,US,,,...,CO,80216.0,US,,,,,,,06/18/1956


In [5]:
num_rows, num_cols = df.shape
print("Number of rows:", num_rows)
print("Number of columns:", num_cols)


Number of rows: 2586441
Number of columns: 35


In [12]:
df['entityformdate'] = pd.to_datetime(df['entityformdate']).dt.date
df.dtypes

entityid                   int64
entityname                object
principaladdress1         object
principaladdress2         object
principalcity             object
principalstate            object
principalzipcode          object
principalcountry          object
mailingaddress1           object
mailingaddress2           object
mailingcity               object
mailingstate              object
mailingzipcode            object
mailingcountry            object
entitystatus              object
jurisdictonofformation    object
entitytype                object
agentfirstname            object
agentmiddlename           object
agentlastname             object
agentsuffix               object
agentorganizationname     object
agentprincipaladdress1    object
agentprincipaladdress2    object
agentprincipalcity        object
agentprincipalstate       object
agentprincipalzipcode     object
agentprincipalcountry     object
agentmailingaddress1      object
agentmailingaddress2      object
agentmaili

In [14]:
date_range = (df['entityformdate'].min().strftime('%Y-%m-%d'), df['entityformdate'].max().strftime('%Y-%m-%d'))
print("Range of business formation dates:", date_range)


Range of business formation dates: ('1864-03-05', '2023-12-26')


In [15]:
# what counties are represented?
df.columns

Index(['entityid', 'entityname', 'principaladdress1', 'principaladdress2',
       'principalcity', 'principalstate', 'principalzipcode',
       'principalcountry', 'mailingaddress1', 'mailingaddress2', 'mailingcity',
       'mailingstate', 'mailingzipcode', 'mailingcountry', 'entitystatus',
       'jurisdictonofformation', 'entitytype', 'agentfirstname',
       'agentmiddlename', 'agentlastname', 'agentsuffix',
       'agentorganizationname', 'agentprincipaladdress1',
       'agentprincipaladdress2', 'agentprincipalcity', 'agentprincipalstate',
       'agentprincipalzipcode', 'agentprincipalcountry',
       'agentmailingaddress1', 'agentmailingaddress2', 'agentmailingcity',
       'agentmailingstate', 'agentmailingzipcode', 'agentmailingcountry',
       'entityformdate'],
      dtype='object')

In [17]:
# what zip codes are represented?
df.principalzipcode.unique()
len(df.principalzipcode.unique())

101032

In [18]:
df.head(1)

Unnamed: 0,entityid,entityname,principaladdress1,principaladdress2,principalcity,principalstate,principalzipcode,principalcountry,mailingaddress1,mailingaddress2,...,agentprincipalstate,agentprincipalzipcode,agentprincipalcountry,agentmailingaddress1,agentmailingaddress2,agentmailingcity,agentmailingstate,agentmailingzipcode,agentmailingcountry,entityformdate
0,18861217679,"DENVER UNION CORPROATION, Dissolved January 17...",1512 LARIMER STREET #760,,Denver,CO,80202,,,,...,CO,80202,US,,,,,,,1886-03-29
