# Creating Melbourne Business Database

## Extract Data From API

In [12]:
# Dependencies
import pandas as pd, requests, json
from sodapy import Socrata
from pandas_geojson import to_geojson
from sqlalchemy import create_engine

In [13]:
#!pip install pandas-geojson

In [14]:
# API call
client = Socrata("data.melbourne.vic.gov.au", None)
results = client.get("xt2y-tnn9", limit = 100000)



In [15]:
# Create dataframe
results_df = pd.DataFrame.from_records(results)
results_df.to_csv('melbourne_business_data.csv')
results_df.head(2)

Unnamed: 0,census_year,block_id,property_id,base_property_id,street_address,clue_small_area,trading_name,industry_anzsic4_code,industry_anzsic4_description,seating_type,number_of_seats,x_coordinate,y_coordinate,geocoded_column
0,2002,2,111467,103973,0 King Street MELBOURNE 3000,Melbourne (CBD),469-479 King Street MELBOURNE 3000,8921,Zoological and Botanical Gardens Operation,Seats - Indoor,113,144.9595,-37.8206,"{'type': 'Point', 'coordinates': [144.9595, -3..."
1,2002,2,111467,103973,0 King Street MELBOURNE 3000,Melbourne (CBD),469-479 King Street MELBOURNE 3000,8921,Zoological and Botanical Gardens Operation,Seats - Outdoor,38,144.9595,-37.8206,"{'type': 'Point', 'coordinates': [144.9595, -3..."


In [16]:
len(results_df)

54052

## Transform Data

In [17]:
# remove column
transformed_result_df = results_df[["census_year", "clue_small_area", "trading_name", "industry_anzsic4_description", 
                                    "seating_type", "number_of_seats","x_coordinate", "y_coordinate"]]

# drop na
transformed_result_df = transformed_result_df.dropna()
transformed_result_df.to_csv('cleaned_melbourne_business_data.csv')
print(len(transformed_result_df))
transformed_result_df.dtypes

53990


census_year                     object
clue_small_area                 object
trading_name                    object
industry_anzsic4_description    object
seating_type                    object
number_of_seats                 object
x_coordinate                    object
y_coordinate                    object
dtype: object

In [18]:
# change data type
transformed_result_df ['census_year'] = transformed_result_df ['census_year'].astype('int')
transformed_result_df ['number_of_seats'] = transformed_result_df ['number_of_seats'].astype('int')
transformed_result_df ['x_coordinate'] = transformed_result_df ['x_coordinate'].astype('float')
transformed_result_df ['y_coordinate'] = transformed_result_df ['y_coordinate'].astype('float')
transformed_result_df.dtypes

census_year                       int32
clue_small_area                  object
trading_name                     object
industry_anzsic4_description     object
seating_type                     object
number_of_seats                   int32
x_coordinate                    float64
y_coordinate                    float64
dtype: object

In [19]:
# create geojson
geo_json = to_geojson(df=transformed_result_df, lat='y_coordinate', lon='x_coordinate',properties=["census_year", "clue_small_area", "trading_name", "industry_anzsic4_description", "seating_type", "number_of_seats"])
# print(geo_json)

In [20]:
# save geojson file
from geojson import dump
with open(']melbourne_business_data.geojson', 'w') as output_file:
    dump(geo_json,output_file)

## Load dataframe to database 

In [24]:
# create connection
connection = "postgres:David$1986@localhost:5432/Melbourne_Business_db"
engine = create_engine(f'postgresql://{connection}')

In [25]:
# Confirm tables
engine.table_names()

  


['melbourne_business']

In [26]:
# # load dataframe
transformed_result_df.to_sql(name="melbourne_business", con=engine, if_exists='append', index=False)