# Data Engineering with ETL

The purpose of this project, is to explore the Columbus, Ohio points of interest data and columbus community data using the ETL process.

In [2]:
# Import libraries
import pandas as pd
import json

# Import Libraries for geocoding
from geopy.geocoders import Nominatim
import time
from tqdm import tqdm
import geopandas as gpd
from geopandas import GeoDataFrame

### 1. Data Extraction
Use Python and Pandas to extract and transform data from the poi.csv and columbus_communities.geojson datasets.

In [3]:
# Load POI Data from CSV
poi_file_path = "Resources/poi_data.csv"
poi_df = pd.read_csv(poi_file_path)
print("POI Data:")
print(poi_df.head())

POI Data:
              X              Y  OBJECTID   COLS_KEY                 LSN  \
0  1.796884e+06  731334.361343  20415656   626098.0  3750 ZANE TRACE DR   
1  1.842715e+06  749717.238291  20415657  1268125.0       2645 MORSE RD   
2  1.847615e+06  689890.356632  20415658   465109.0  3699 ALUM CREEK DR   
3  1.796091e+06  728398.931190  20415659   810355.0    2180 N WILSON RD   
4  1.806697e+06  712614.778047  20415660   885361.0     2872 W BROAD ST   

                             POI_NAME                    POI_TYPE POI_SOURCE  \
0                          Pet Brands      Industrial - Warehouse        NaN   
1  Kohler Bathroom & Kitchen Products  Retail - Commercial/Retail        NaN   
2  Shasta National Beverage Warehouse      Industrial - Warehouse        NaN   
3                  Metal Supermarkets  Retail - Commercial/Retail        NaN   
4            American Health Network              Medical - Other        NaN   

  PRIMARY_USE ALT_GEOM   PHONE_NUM                        

In [4]:
poi_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15940 entries, 0 to 15939
Data columns (total 19 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   X             15940 non-null  float64
 1   Y             15940 non-null  float64
 2   OBJECTID      15940 non-null  int64  
 3   COLS_KEY      15531 non-null  float64
 4   LSN           15925 non-null  object 
 5   POI_NAME      15940 non-null  object 
 6   POI_TYPE      15940 non-null  object 
 7   POI_SOURCE    2336 non-null   object 
 8   PRIMARY_USE   14839 non-null  object 
 9   ALT_GEOM      15940 non-null  object 
 10  PHONE_NUM     4583 non-null   object 
 11  GLOBALID      15940 non-null  object 
 12  WEBSITE       584 non-null    object 
 13  OB_GYN        97 non-null     object 
 14  PEDIATRICS    86 non-null     object 
 15  PRIMARY_CARE  134 non-null    object 
 16  SUTQ_RATING   1123 non-null   float64
 17  SNAP          1133 non-null   object 
 18  WIC           108 non-null

In [9]:
# View the total number of columns and rows
poi_df.shape

(15940, 19)

In [6]:
# Load the Columbus Communities dataset GeoJSON file
communities_gdf = gpd.read_file("Resources/columbus_communities.geojson")
print("Community Data:")
print(communities_gdf.head())

Community Data:
   OBJECTID               AREA_NAME              CREATED_DATE  \
0       359               Far South 2018-10-02 19:34:47+00:00   
1       360  Livingston Avenue Area 2018-10-02 19:34:47+00:00   
2       361               Southwest 2018-10-02 19:34:47+00:00   
3       362     University District 2018-10-02 19:34:47+00:00   
4       363                Westland 2018-10-02 19:34:47+00:00   

             LAST_EDIT_DATE  AREA_NO                                GLOBALID  \
0 2024-11-06 14:26:28+00:00       64  {DE1E5BD1-465B-4DA0-A5DB-4D518C3AC9DF}   
1 2022-11-17 14:33:57+00:00       62  {68BD6D27-A7AA-49EF-B7C0-6A2464E3438E}   
2 2022-11-17 15:02:09+00:00       59  {417925A7-E194-47C5-95EE-866972B32697}   
3 2022-11-10 18:37:10+00:00       44  {EB56F546-7152-4174-9982-16FEBB30AC7C}   
4 2018-10-02 19:36:55+00:00       52  {FEC6B62A-1C9D-4A05-8831-A34CBEBB5A88}   

    SHAPESTArea  SHAPESTLength  \
0  7.141404e+08  394997.951037   
1  3.088745e+07   32450.707488   
2  3.03163

In [7]:
communities_gdf.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 41 entries, 0 to 40
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype              
---  ------          --------------  -----              
 0   OBJECTID        41 non-null     int32              
 1   AREA_NAME       41 non-null     object             
 2   CREATED_DATE    41 non-null     datetime64[ms, UTC]
 3   LAST_EDIT_DATE  41 non-null     datetime64[ms, UTC]
 4   AREA_NO         41 non-null     int32              
 5   GLOBALID        41 non-null     object             
 6   SHAPESTArea     41 non-null     float64            
 7   SHAPESTLength   41 non-null     float64            
 8   geometry        41 non-null     geometry           
dtypes: datetime64[ms, UTC](2), float64(2), geometry(1), int32(2), object(2)
memory usage: 2.7+ KB


### 1. Transform
Use Geocoding and Pandas to transform the business location data within the dataframes.

In [10]:
# Filter the dataset to include only POI Types that start with "Public Places"
public_places_df = poi_df[poi_df['POI_TYPE'].str.startswith("Public Places")].copy()
print(public_places_df.head())

               X              Y  OBJECTID   COLS_KEY                  LSN  \
25  1.829872e+06  721968.778040  20415681  1232757.0        350 E 1ST AVE   
41  1.853989e+06  699122.803719  20415697   461583.0      3480 REFUGEE RD   
63  1.805330e+06  770084.667372  20415719   973552.0  2524 BILLINGSLEY RD   
75  1.809716e+06  715039.649182  20415731   268367.0     303 N EUREKA AVE   
87  1.835008e+06  735507.522616  20415743   155334.0   1254 BRIARWOOD AVE   

                             POI_NAME  \
25  Berry Brothers Boltworks Building   
41                First Church of God   
63             Regals Cinema Columbus   
75            Holton Community Center   
87            Linden Community Center   

                                       POI_TYPE POI_SOURCE PRIMARY_USE  \
25                        Public Places - Other        NaN           Y   
41             Public Places - House of Worship        NaN           Y   
63         Public Places - Theater/Concert Hall        NaN          

In [11]:
# View the updated number of rows and columns
public_places_df.shape

(958, 19)

In [12]:
# In order to merge the community data coordinates with the public business coordinates, 
# we need to convert the X and Y coordinates into longitude/latitude.

# Initialize Nominatim geocoder with a long timeout
geolocator = Nominatim(user_agent="myGeocoder", timeout=10)  # 10-second timeout

# Create a list to store failed addresses
failed_addresses = []

# Function to geocode an address
def geocode_address(address):
    try:
        # Append ", Columbus, OH, USA" to the address
        location = geolocator.geocode(address + ", Columbus, OH, USA")
        if location:
            return location.latitude, location.longitude
        else:
            print(f"Address not found: {address}")
            failed_addresses.append(address)  # Add to failed addresses list
            return None, None
    except Exception as e:
        print(f"Error geocoding {address}: {e}")
        failed_addresses.append(address)  # Add to failed addresses list
        return None, None

# Apply geocoding to the filtered LSN column with a progress bar
tqdm.pandas()  # Enable progress bar for pandas
public_places_df['latitude'], public_places_df['longitude'] = zip(*public_places_df['LSN'].progress_apply(geocode_address))

# Add a delay between requests to avoid hitting rate limits
time.sleep(1)  # 1-second delay between requests

  4%|▍         | 36/958 [00:39<19:11,  1.25s/it]

Address not found: 3901 PARSONS AVE  COMM


  4%|▍         | 42/958 [00:45<14:46,  1.03it/s]

Error geocoding nan: unsupported operand type(s) for +: 'float' and 'str'


  5%|▌         | 49/958 [00:51<15:55,  1.05s/it]

Address not found: 555 YMCA PL, GAHANNA, OH


  5%|▌         | 52/958 [00:57<25:34,  1.69s/it]

Address not found: 250 S NAPOLEAN AVE


  6%|▌         | 58/958 [01:03<17:45,  1.18s/it]

Address not found: 1470 DAVIDSON DR


  7%|▋         | 64/958 [01:10<15:53,  1.07s/it]

Address not found: 2800 TREMONT RD


  7%|▋         | 66/958 [01:14<21:00,  1.41s/it]

Address not found: 542 W JOHNSTOWN RD


  7%|▋         | 67/958 [01:15<20:11,  1.36s/it]

Address not found: 455 CLARK STATE RD


  8%|▊         | 77/958 [01:27<17:39,  1.20s/it]

Address not found: 1945 LANE RD


  9%|▊         | 82/958 [01:32<14:51,  1.02s/it]

Address not found: 1581 CAMBRIDGE BLVD


  9%|▉         | 84/958 [01:35<14:58,  1.03s/it]

Address not found: 1320 CAMBRIDGE BLVD


  9%|▉         | 86/958 [01:37<16:52,  1.16s/it]

Address not found: 165 PARKVIEW AVE


  9%|▉         | 87/958 [01:38<14:55,  1.03s/it]

Address not found: 222 SCHOOLHOUSE LN


 10%|▉         | 92/958 [01:43<15:20,  1.06s/it]

Address not found: 1636 GRAHAM RD


 10%|█         | 99/958 [01:51<16:49,  1.18s/it]

Address not found: 750 CROSS POINTE RD


 11%|█         | 104/958 [01:55<14:25,  1.01s/it]

Address not found: 4770 HOOVER RD


 11%|█▏        | 108/958 [02:00<16:13,  1.14s/it]

Address not found: 6305 BRAND RD


 11%|█▏        | 109/958 [02:01<14:49,  1.05s/it]

Address not found: 6700 RINGS RD


 12%|█▏        | 111/958 [02:03<15:32,  1.10s/it]

Address not found: 5775 DUBLINSHIRE DR


 12%|█▏        | 112/958 [02:05<16:49,  1.19s/it]

Address not found: 6400 POST RD


 12%|█▏        | 113/958 [02:07<23:08,  1.64s/it]

Address not found: 51 N STATE ST


 16%|█▌        | 154/958 [02:53<16:22,  1.22s/it]

Address not found: 2812 N OSCEOLA ST


 18%|█▊        | 172/958 [03:12<15:19,  1.17s/it]

Address not found: 895 PARSONS AVE UNIT A


 20%|██        | 192/958 [03:36<19:09,  1.50s/it]

Address not found: 2143 N JOYCE AVE


 23%|██▎       | 221/958 [04:08<21:02,  1.71s/it]

Address not found: 2833 VALLEY VIEW DR


 46%|████▌     | 439/958 [08:14<11:37,  1.34s/it]

Address not found: 1060 E MAIN ST UNIT C


 53%|█████▎    | 511/958 [09:39<09:28,  1.27s/it]

Address not found: 6000 COOPER RD D


 62%|██████▏   | 594/958 [11:16<12:26,  2.05s/it]

Address not found: 275 EASTON TOWN CENTER


 71%|███████   | 681/958 [13:03<06:08,  1.33s/it]

Address not found: 7510 E BROAD ST BLDG B


 75%|███████▌  | 721/958 [13:46<04:47,  1.22s/it]

Address not found: 1442 1/2 SULLIVANT AVE


 81%|████████▏ | 779/958 [14:56<04:58,  1.67s/it]

Address not found: 647 1/2 W BROAD ST UNIT 27


 87%|████████▋ | 836/958 [15:56<02:28,  1.22s/it]

Address not found: 2879 VALLEY VIEW DR


 88%|████████▊ | 840/958 [16:01<02:40,  1.36s/it]

Address not found: 5718 KATHY RUN LN 8


 92%|█████████▏| 884/958 [16:55<01:49,  1.47s/it]

Address not found: 160 S HIGH ST UNIT PV


 93%|█████████▎| 889/958 [17:00<01:18,  1.14s/it]

Address not found: 3761 HARDING DR REAR


 97%|█████████▋| 931/958 [17:47<00:39,  1.46s/it]

Address not found: 570 E WOODROW AVE UNIT A


 97%|█████████▋| 934/958 [17:50<00:30,  1.26s/it]

Address not found: 1100 RHOADS AVE UNIT CC


 98%|█████████▊| 936/958 [17:52<00:24,  1.13s/it]

Address not found: 6135 RINGS RD


 98%|█████████▊| 938/958 [17:55<00:22,  1.12s/it]

Address not found: 4500 HICKORY CHASE WAY


 98%|█████████▊| 942/958 [17:59<00:18,  1.15s/it]

Address not found: 188 EASTON TOWN CENTER


 99%|█████████▉| 948/958 [18:05<00:10,  1.06s/it]

Address not found: 1780 BARRINGTON RD


 99%|█████████▉| 950/958 [18:07<00:08,  1.08s/it]

Address not found: 2300 LYTHAM RD


 99%|█████████▉| 951/958 [18:09<00:09,  1.31s/it]

Address not found: 717 E 17TH AVE BLDG CC


100%|██████████| 958/958 [18:17<00:00,  1.15s/it]


In [13]:
# Remove rows with failed addresses
public_places_df = public_places_df[~public_places_df['LSN'].isin(failed_addresses)]

# Display the results
print("\nPublic Places POI Data with Latitude and Longitude from Geocoding (Columbus, OH):")
print(public_places_df[['LSN', 'POI_NAME', 'POI_TYPE', 'latitude', 'longitude']])


Public Places POI Data with Latitude and Longitude from Geocoding (Columbus, OH):
                       LSN                                 POI_NAME  \
25           350 E 1ST AVE        Berry Brothers Boltworks Building   
41         3480 REFUGEE RD                      First Church of God   
63     2524 BILLINGSLEY RD                   Regals Cinema Columbus   
75        303 N EUREKA AVE                  Holton Community Center   
87      1254 BRIARWOOD AVE                  Linden Community Center   
...                    ...                                      ...   
15838   1060 MT VERNON AVE       Ethiopian Orthodox Tewahedo Church   
15843   2596 CLEVELAND AVE                   The Church of Columbus   
15865      276 S NELSON RD            Cleo Dumaree Athletic Complex   
15888   1127 MT VERNON AVE                 Mt Vernon Ave AME Church   
15936     1113 PARSONS AVE  Columbus Metropolitan Library - Parsons   

                                          POI_TYPE   latitude  l

In [14]:
# View updated number of rows and columns
public_places_df.shape

(915, 21)

In [15]:
# Select a few columns to use and explore for the data
public_places_df = public_places_df[['latitude', 'longitude', 'OBJECTID', 'LSN', 'POI_NAME', 'POI_TYPE']]
print(public_places_df.head())



     latitude  longitude  OBJECTID                  LSN  \
25  39.980974 -82.994742  20415681        350 E 1ST AVE   
41  39.917078 -82.906951  20415697      3480 REFUGEE RD   
63  40.112462 -83.083348  20415719  2524 BILLINGSLEY RD   
75  39.962526 -83.066428  20415731     303 N EUREKA AVE   
87  40.016975 -82.977456  20415743   1254 BRIARWOOD AVE   

                             POI_NAME  \
25  Berry Brothers Boltworks Building   
41                First Church of God   
63             Regals Cinema Columbus   
75            Holton Community Center   
87            Linden Community Center   

                                       POI_TYPE  
25                        Public Places - Other  
41             Public Places - House of Worship  
63         Public Places - Theater/Concert Hall  
75  Public Places - Community/Recreation Center  
87  Public Places - Community/Recreation Center  


In [16]:
# Rename columns
public_places_df = public_places_df.rename(columns={'OBJECTID': 'Id','LSN': 'address'})
print(public_places_df.head())

     latitude  longitude        Id              address  \
25  39.980974 -82.994742  20415681        350 E 1ST AVE   
41  39.917078 -82.906951  20415697      3480 REFUGEE RD   
63  40.112462 -83.083348  20415719  2524 BILLINGSLEY RD   
75  39.962526 -83.066428  20415731     303 N EUREKA AVE   
87  40.016975 -82.977456  20415743   1254 BRIARWOOD AVE   

                             POI_NAME  \
25  Berry Brothers Boltworks Building   
41                First Church of God   
63             Regals Cinema Columbus   
75            Holton Community Center   
87            Linden Community Center   

                                       POI_TYPE  
25                        Public Places - Other  
41             Public Places - House of Worship  
63         Public Places - Theater/Concert Hall  
75  Public Places - Community/Recreation Center  
87  Public Places - Community/Recreation Center  


In [None]:
# Convert Public Places dataset to a GeoDataFrame
public_places_gdf = gpd.GeoDataFrame(
    public_places_df,
    geometry=gpd.points_from_xy(public_places_df.longitude, public_places_df.latitude),
    crs="EPSG:4326"  # Assuming coordinates are in EPSG (latitude/longitude) for the Coordinate Reference System(CRS)
)

In [18]:
# Ensure both datasets are in the same CRS
communities_gdf = communities_gdf.to_crs(public_places_gdf.crs)

Merge public places and community data

In [19]:
# Perform a spatial join to associate public places with communities
poi_with_communities = gpd.sjoin(public_places_gdf, communities_gdf, how="left", predicate="within")

# Display the results
print(poi_with_communities.head())

     latitude  longitude        Id              address  \
25  39.980974 -82.994742  20415681        350 E 1ST AVE   
41  39.917078 -82.906951  20415697      3480 REFUGEE RD   
63  40.112462 -83.083348  20415719  2524 BILLINGSLEY RD   
75  39.962526 -83.066428  20415731     303 N EUREKA AVE   
87  40.016975 -82.977456  20415743   1254 BRIARWOOD AVE   

                             POI_NAME  \
25  Berry Brothers Boltworks Building   
41                First Church of God   
63             Regals Cinema Columbus   
75            Holton Community Center   
87            Linden Community Center   

                                       POI_TYPE                    geometry  \
25                        Public Places - Other  POINT (-82.99474 39.98097)   
41             Public Places - House of Worship  POINT (-82.90695 39.91708)   
63         Public Places - Theater/Concert Hall  POINT (-83.08335 40.11246)   
75  Public Places - Community/Recreation Center  POINT (-83.06643 39.96253)   
87 

In [20]:
poi_with_communities.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 915 entries, 25 to 15936
Data columns (total 16 columns):
 #   Column          Non-Null Count  Dtype              
---  ------          --------------  -----              
 0   latitude        915 non-null    float64            
 1   longitude       915 non-null    float64            
 2   Id              915 non-null    int64              
 3   address         915 non-null    object             
 4   POI_NAME        915 non-null    object             
 5   POI_TYPE        915 non-null    object             
 6   geometry        915 non-null    geometry           
 7   index_right     904 non-null    float64            
 8   OBJECTID        904 non-null    float64            
 9   AREA_NAME       904 non-null    object             
 10  CREATED_DATE    904 non-null    datetime64[ms, UTC]
 11  LAST_EDIT_DATE  904 non-null    datetime64[ms, UTC]
 12  AREA_NO         904 non-null    float64            
 13  GLOBALID        904 non-null 

In [21]:
poi_with_communities.shape

(915, 16)

In [22]:
# Remove public places whose Community_Name is null
poi_with_communities = poi_with_communities.dropna(subset=["AREA_NAME"])

print(poi_with_communities.info())

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 904 entries, 25 to 15936
Data columns (total 16 columns):
 #   Column          Non-Null Count  Dtype              
---  ------          --------------  -----              
 0   latitude        904 non-null    float64            
 1   longitude       904 non-null    float64            
 2   Id              904 non-null    int64              
 3   address         904 non-null    object             
 4   POI_NAME        904 non-null    object             
 5   POI_TYPE        904 non-null    object             
 6   geometry        904 non-null    geometry           
 7   index_right     904 non-null    float64            
 8   OBJECTID        904 non-null    float64            
 9   AREA_NAME       904 non-null    object             
 10  CREATED_DATE    904 non-null    datetime64[ms, UTC]
 11  LAST_EDIT_DATE  904 non-null    datetime64[ms, UTC]
 12  AREA_NO         904 non-null    float64            
 13  GLOBALID        904 non-null 

In [23]:
# Remove "Public Place - " from the beginning of the POI_TYPE column
poi_with_communities['POI_TYPE'] = poi_with_communities['POI_TYPE'].str.replace("Public Places - ", "", regex=False)
poi_with_communities.head()

Unnamed: 0,latitude,longitude,Id,address,POI_NAME,POI_TYPE,geometry,index_right,OBJECTID,AREA_NAME,CREATED_DATE,LAST_EDIT_DATE,AREA_NO,GLOBALID,SHAPESTArea,SHAPESTLength
25,39.980974,-82.994742,20415681,350 E 1ST AVE,Berry Brothers Boltworks Building,Other,POINT (-82.99474 39.98097),21.0,380.0,Italian Village,2018-10-02 19:34:47+00:00,2022-11-17 14:39:43+00:00,47.0,{23B97897-A442-4B4A-AAAF-BB956C49A386},12450790.0,14763.923784
41,39.917078,-82.906951,20415697,3480 REFUGEE RD,First Church of God,House of Worship,POINT (-82.90695 39.91708),29.0,388.0,Mid East,2018-10-02 19:34:47+00:00,2022-11-17 15:14:08+00:00,57.0,{4BE4DBEC-082A-4779-B872-1C922A21BB54},330456800.0,139179.76647
63,40.112462,-83.083348,20415719,2524 BILLINGSLEY RD,Regals Cinema Columbus,Theater/Concert Hall,POINT (-83.08335 40.11246),25.0,384.0,Far Northwest,2018-10-02 19:34:47+00:00,2018-10-02 19:41:44+00:00,31.0,{63A965E8-9A26-42A7-9763-5283C1590319},195709600.0,125543.124063
75,39.962526,-83.066428,20415731,303 N EUREKA AVE,Holton Community Center,Community/Recreation Center,POINT (-83.06643 39.96253),11.0,370.0,Greater Hilltop,2018-10-02 19:34:47+00:00,2022-11-17 14:47:44+00:00,53.0,{B2EB0D99-1757-412E-8223-3BDA6EC70595},431335500.0,80863.448157
87,40.016975,-82.977456,20415743,1254 BRIARWOOD AVE,Linden Community Center,Community/Recreation Center,POINT (-82.97746 40.01698),14.0,373.0,North Linden,2018-10-02 19:34:47+00:00,2022-11-10 18:37:10+00:00,40.0,{EBD72502-8437-49EA-8B84-B94262C09F21},137109500.0,48990.929642


In [24]:
# Rename columns
poi_with_communities = poi_with_communities.rename(columns={'Id': 'business_id',
                                                            'POI_NAME': 'business_name',
                                                            'POI_TYPE': 'business_type',
                                                            'OBJECTID': 'community_id',
                                                            'AREA_NAME': 'community_name',
                                                            'CREATED_DATE': 'community_created_date', 
                                                            'LAST_EDIT_DATE': 'last_edit_date',
                                                            'AREA_NO': 'community_no',
                                                            'GLOBALID': 'global_id',
                                                            'SHAPESTArea': 'shape_area',
                                                            'SHAPESTLength': 'shape_length'})

print(poi_with_communities.head())

     latitude  longitude  business_id              address  \
25  39.980974 -82.994742     20415681        350 E 1ST AVE   
41  39.917078 -82.906951     20415697      3480 REFUGEE RD   
63  40.112462 -83.083348     20415719  2524 BILLINGSLEY RD   
75  39.962526 -83.066428     20415731     303 N EUREKA AVE   
87  40.016975 -82.977456     20415743   1254 BRIARWOOD AVE   

                        business_name                business_type  \
25  Berry Brothers Boltworks Building                        Other   
41                First Church of God             House of Worship   
63             Regals Cinema Columbus         Theater/Concert Hall   
75            Holton Community Center  Community/Recreation Center   
87            Linden Community Center  Community/Recreation Center   

                      geometry  index_right  community_id   community_name  \
25  POINT (-82.99474 39.98097)         21.0         380.0  Italian Village   
41  POINT (-82.90695 39.91708)         29.0       

Prepare the data for SQL database

In [25]:
# Define the columns for each DataFrame
business_columns = ['latitude', 'longitude', 'business_id', 'address', 'business_name', 'business_type','community_id']
community_columns = ['community_id', 'community_name', 'community_no', 'global_id']

# Create DataFrames
business_df = poi_with_communities[business_columns].copy()
community_df = poi_with_communities[community_columns].copy()

In [26]:
# Display the first few rows of each DataFrame for verification
print("Business Info DataFrame:")
print(business_df.head())

print("\nCommunity Info DataFrame:")
print(community_df.head())

Business Info DataFrame:
     latitude  longitude  business_id              address  \
25  39.980974 -82.994742     20415681        350 E 1ST AVE   
41  39.917078 -82.906951     20415697      3480 REFUGEE RD   
63  40.112462 -83.083348     20415719  2524 BILLINGSLEY RD   
75  39.962526 -83.066428     20415731     303 N EUREKA AVE   
87  40.016975 -82.977456     20415743   1254 BRIARWOOD AVE   

                        business_name                business_type  \
25  Berry Brothers Boltworks Building                        Other   
41                First Church of God             House of Worship   
63             Regals Cinema Columbus         Theater/Concert Hall   
75            Holton Community Center  Community/Recreation Center   
87            Linden Community Center  Community/Recreation Center   

    community_id  
25         380.0  
41         388.0  
63         384.0  
75         370.0  
87         373.0  

Community Info DataFrame:
    community_id   community_name  commu

In [27]:
# Save the DataFrames to separate CSV files
business_df.to_csv("Resources/Output/business_data.csv", index=False)
community_df.to_csv("Resources/Output/community_data.csv", index=False)