In [1]:
# for manipulating dataframes
import pandas as pd

# for visualizations
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="whitegrid")

# to print out all the outputs
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)

# Listing of Active Businesses in Los  Angeles

In [2]:
# Read a csv file
df = pd.read_csv('../data/in/Listing_of_Active_Businesses_20240703.csv')

In [3]:
df.shape

(587032, 16)

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 587032 entries, 0 to 587031
Data columns (total 16 columns):
 #   Column                     Non-Null Count   Dtype  
---  ------                     --------------   -----  
 0   LOCATION ACCOUNT #         587032 non-null  object 
 1   BUSINESS NAME              587031 non-null  object 
 2   DBA NAME                   198301 non-null  object 
 3   STREET ADDRESS             587029 non-null  object 
 4   CITY                       586980 non-null  object 
 5   ZIP CODE                   587032 non-null  object 
 6   LOCATION DESCRIPTION       587007 non-null  object 
 7   MAILING ADDRESS            306533 non-null  object 
 8   MAILING CITY               306546 non-null  object 
 9   MAILING ZIP CODE           306446 non-null  object 
 10  NAICS                      523121 non-null  float64
 11  PRIMARY NAICS DESCRIPTION  523121 non-null  object 
 12  COUNCIL DISTRICT           587032 non-null  int64  
 13  LOCATION START DATE        58

In [5]:
df.columns

Index(['LOCATION ACCOUNT #', 'BUSINESS NAME', 'DBA NAME', 'STREET ADDRESS',
       'CITY', 'ZIP CODE', 'LOCATION DESCRIPTION', 'MAILING ADDRESS',
       'MAILING CITY', 'MAILING ZIP CODE', 'NAICS',
       'PRIMARY NAICS DESCRIPTION', 'COUNCIL DISTRICT', 'LOCATION START DATE',
       'LOCATION END DATE', 'LOCATION'],
      dtype='object')

In [6]:
def get_values(df, columns):
    """
    Take a dataframe and a list of columns and
    returns the value counts for the columns.
    """
    for column in columns:
        print(column)
        print('=====================================')
        print(df[column].value_counts(dropna=False))
        print('\n')

def show_values(df, param):
    if param == 'all':
        get_values(df, df.columns)
    else:
        get_values(df, param) 

show_values(df, ['CITY'])

CITY
CITY
LOS ANGELES                       279708
VAN NUYS                           20937
NORTH HOLLYWOOD                    17819
SHERMAN OAKS                       15670
WOODLAND HILLS                     13379
ENCINO                             11141
NORTHRIDGE                          9503
STUDIO CITY                         7509
CANOGA PARK                         7074
RESEDA                              6959
TARZANA                             6723
CHATSWORTH                          6025
SAN PEDRO                           5883
SYLMAR                              5543
GRANADA HILLS                       5539
SUN VALLEY                          5523
PANORAMA CITY                       5504
VENICE                              5407
PACOIMA                             4489
NORTH HILLS                         4222
VALLEY VILLAGE                      4098
GLENDALE                            4074
WEST HILLS                          4066
BEVERLY HILLS                       3581
WINNET

In [6]:
df = df[df['CITY'] == "LOS ANGELES"]

In [7]:
df.shape

(279708, 16)

In [8]:
df.head()

Unnamed: 0,LOCATION ACCOUNT #,BUSINESS NAME,DBA NAME,STREET ADDRESS,CITY,ZIP CODE,LOCATION DESCRIPTION,MAILING ADDRESS,MAILING CITY,MAILING ZIP CODE,NAICS,PRIMARY NAICS DESCRIPTION,COUNCIL DISTRICT,LOCATION START DATE,LOCATION END DATE,LOCATION
2,0002829017-0001-5,RICHARD JOHN SHERMAN,,2010 LA BREA TERRACE,LOS ANGELES,90046-2314,2010 LA BREA 90046-2314,,,,,,4,04/19/2014,,
5,0000111620-0001-4,SOUTHERN CALIFORNIA GRANTMAKERS,,1000 N ALAMEDA STREET SUITE #230,LOS ANGELES,90012-1804,1000 ALAMEDA 90012-1804,,,,,,14,07/01/1984,,"(34.0593, -118.2361)"
6,0003293756-0001-5,BHI RESIDENTIAL LONG TERM CORPORATION,,732 S SPRING STREET APT #1021,LOS ANGELES,90014-3058,732 SPRING 90014-3058,,,,,,14,09/01/2021,,"(34.0435, -118.2527)"
8,0002774873-0001-4,ISAIAH C. WILLIS III,,153 W 59TH STREET,LOS ANGELES,90003-1103,153 59TH 90003-1103,153 W 59TH STREET,LOS ANGELES,90003-1103,,,9,07/01/2014,,"(33.9869, -118.275)"
9,0002862088-0001-0,ARTURO ALBERTO ALARCON RAMIREZ,,853 E 33RD STREET,LOS ANGELES,90011-2415,853 33RD 90011-2415,,,,,,9,01/01/2014,,


In [9]:
df.head(900).to_csv("../data/out/filtered_los_angeles_900.csv", index=False)