In [1]:
import pandas as pd
import numpy as np
from sqlalchemy import Column, String
from geoalchemy2 import Geometry
from sqlalchemy.orm import sessionmaker
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
import logging

In [2]:
user_name = 'l01-4'
#use the file way of storing password or set passw to your password
with open("l01-4_password.txt") as f:
    passw = f.read()

# Calgary Communities
## Source: City of Calgary's Open Data Portal
Changelog for Data Export to SQL Table

*From Community Points*
- Include 
CLASS	CLASS_CODE	COMM_CODE	NAME	SECTOR	SRG	COMM_STRUCTURE	longitude	latitude	POINT

Link: https://data.calgary.ca/Base-Maps/Community-Points/j9ps-fyst/about_data


*Community_District_Boundaries*
- Include polygon
- Join this dataset to Community Points

*Merged datasets for Community tablet*
- Make COMM_CODE a pary keyrithis is to improve query performance as future queries will join or filter on COMM_CODEkey
- Make Point and Polygon geometry data types
- Rename Point and Polygon to COMMUNITY_POINT and COMMUNITY_POLYGON to avoid keywords as n names and it is more meaningful name

Link: https://data.calgary.ca/Base-Maps/Community-Boundaries-2011/j3yz-fb4u/about_data

Link: https://data.calgary.ca/Base-Maps/Community-District-Boundaries/surr-xmvs/about_dataata

In [3]:
df_communities = pd.read_csv("Community_Points_20240306.csv")
df_communities.head(5)

Unnamed: 0,CLASS,CLASS_CODE,COMM_CODE,NAME,SECTOR,SRG,COMM_STRUCTURE,longitude,latitude,POINT
0,Residential,1,BED,BEDDINGTON HEIGHTS,NORTH,ESTABLISHED,1960s/1970s,-114.085021,51.131633,POINT (-114.085021395442 51.131632808734)
1,Residential,1,RVW,RANGEVIEW,SOUTHEAST,DEVELOPING,BUILDING OUT,-113.923717,50.874129,POINT (-113.923716806793 50.874129341662)
2,Residential,1,WND,WINDSOR PARK,CENTRE,ESTABLISHED,1950s,-114.083549,51.005043,POINT (-114.083548819223 51.005043176725)
3,Residential,1,ROY,ROYAL OAK,NORTHWEST,COMPLETE,2010s,-114.219745,51.142667,POINT (-114.219744952347 51.142667391135)
4,Residential,1,SVR,SYMONS VALLEY RANCH,NORTH,DEVELOPING,BUILDING OUT,-114.138077,51.184799,POINT (-114.138077268409 51.184799253171)


In [4]:
df_community_boundaries = pd.read_csv("Community_District_Boundaries_20240312.csv")
df_community_boundaries.head(5)

Unnamed: 0,CLASS,CLASS_CODE,COMM_CODE,NAME,SECTOR,SRG,COMM_STRUCTURE,CREATED_DT,MODIFIED_DT,MULTIPOLYGON
0,Residential,1,LEB,LEWISBURG,NORTH,DEVELOPING,BUILDING OUT,2016/12/21,2019/11/26,"MULTIPOLYGON (((-114.0480237 51.1749865, -114...."
1,Residential,1,CSC,CITYSCAPE,NORTHEAST,DEVELOPING,BUILDING OUT,2016/12/21,2016/12/21,"MULTIPOLYGON (((-113.9524996 51.1543075, -113...."
2,Industrial,2,ST1,STONEY 1,NORTH,,EMPLOYMENT,2016/12/21,2016/12/21,"MULTIPOLYGON (((-114.0133015 51.1744266, -114...."
3,Residential,1,MRT,MARTINDALE,NORTHEAST,ESTABLISHED,1980s/1990s,2016/12/21,2020/10/22,"MULTIPOLYGON (((-113.9648991 51.1251901, -113...."
4,Industrial,2,ST2,STONEY 2,NORTHEAST,,EMPLOYMENT,2016/12/21,2016/12/21,"MULTIPOLYGON (((-113.9939281 51.153327, -113.9..."


In [11]:
# Merge the DataFrames based on a common column
# Assuming 'COMM_CODE' is the common key
df_community_merged = pd.merge(df_communities, df_community_boundaries[['COMM_CODE', 'MULTIPOLYGON']], on='COMM_CODE', how='left')
# Rename the 'polygon' column to 'COMMUNITY_BOUNDARY'
df_community_merged = df_community_merged.rename(columns={'MULTIPOLYGON': 'COMMUNITY_BOUNDARY'})
df_community_merged = df_community_merged.rename(columns={'POINT': 'COMMUNITY_POINT'})
df_community_merged = df_community_merged.rename(columns={'NAME': 'COMMUNITY_NAME'})

# Convert NaN to None
df_community_merged.replace({np.nan: None}, inplace=True)
# Rename columns to uppercase
df_community_merged.columns = df_community_merged.columns.str.upper()
df_community_merged.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 310 entries, 0 to 309
Data columns (total 11 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   CLASS               310 non-null    object 
 1   CLASS_CODE          310 non-null    int64  
 2   COMM_CODE           310 non-null    object 
 3   NAME                310 non-null    object 
 4   SECTOR              307 non-null    object 
 5   SRG                 247 non-null    object 
 6   COMM_STRUCTURE      308 non-null    object 
 7   LONGITUDE           310 non-null    float64
 8   LATITUDE            310 non-null    float64
 9   COMMUNITY_POINT     310 non-null    object 
 10  COMMUNITY_BOUNDARY  307 non-null    object 
dtypes: float64(2), int64(1), object(8)
memory usage: 26.8+ KB


In [7]:
# Define the database connection string
database_url = f"mysql+mysqlconnector://{user_name}:{passw}@datasciencedb.ucalgary.ca/{user_name}"
# Set the logging level for SQLAlchemy to WARNING, from now on no more INFO, CATEGORY
logging.getLogger('sqlalchemy').setLevel(logging.WARNING)
# Create the SQLAlchemy engine
engine = create_engine(database_url, echo=True)  # Set echo to True for debugging

# Test the connection
with engine.connect() as connection:
    result = connection.execute("SELECT 1")
    print(result.scalar())

2024-03-14 12:30:39,399 INFO sqlalchemy.engine.Engine SELECT DATABASE()
2024-03-14 12:30:39,402 INFO sqlalchemy.engine.Engine [raw sql] {}
2024-03-14 12:30:39,460 INFO sqlalchemy.engine.Engine SELECT @@sql_mode
2024-03-14 12:30:39,462 INFO sqlalchemy.engine.Engine [raw sql] {}
2024-03-14 12:30:39,495 INFO sqlalchemy.engine.Engine SELECT @@lower_case_table_names
2024-03-14 12:30:39,500 INFO sqlalchemy.engine.Engine [raw sql] {}
2024-03-14 12:30:39,537 INFO sqlalchemy.engine.Engine SELECT 1
2024-03-14 12:30:39,540 INFO sqlalchemy.engine.Engine [raw sql] {}
1


In [8]:
# Define the Base class
Base = declarative_base()
class Community(Base):
    __tablename__ = 'communities'

    COMM_CODE = Column(String, primary_key=True)
    CLASS_CODE = Column(String)
    COMMUNITY_NAME = Column(String)
    SECTOR = Column(String)
    SRG = Column(String)
    COMM_STRUCTURE = Column(String)
    LONGITUDE = Column(String)
    LATITUDE = Column(String)
    COMMUNITY_POINT = Column(Geometry('POINT', srid=4326))
    COMMUNITY_BOUNDARY = Column(Geometry('POLYGON', srid=4326))
# Create the engine and bind it to the Base
Base.metadata.create_all(engine)

2024-03-14 12:33:29,969 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2024-03-14 12:33:29,994 INFO sqlalchemy.engine.Engine SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = %(table_schema)s AND table_name = %(table_name)s
2024-03-14 12:33:29,999 INFO sqlalchemy.engine.Engine [generated in 0.02090s] {'table_schema': 'l01-4', 'table_name': 'communities'}
2024-03-14 12:33:30,015 INFO sqlalchemy.engine.Engine COMMIT


In [9]:
Session = sessionmaker(bind=engine)
session = Session()

In [12]:
# Loop through DataFrame rows and insert data into the database
for index, row in df_community_merged.iterrows():
    try:
        community = Community(
        COMM_CODE=row['COMM_CODE'],
        CLASS_CODE =row['CLASS_CODE'],
        COMMUNITY_NAME =row['COMMUNITY_NAME'],
        SECTOR =row['SECTOR'],
        SRG=row['SRG'],
        COMM_STRUCTURE =row['COMM_STRUCTURE'],
        LONGITUDE =row['LONGITUDE'],
        LATITUDE =row['LATITUDE'],
        COMMUNITY_POINT=row['COMMUNITY_POINT'],
        COMMUNITY_BOUNDARY=row['COMMUNITY_BOUNDARY'])
        session.add(community)


    except Exception as e:
        print(f"Error in row {index}: {e}")
        print(row)  # Print the entire row for reference
        session.rollback()  # Rollback the transaction to continue with the next row
session.commit()  # Commit the transaction

2024-03-14 12:36:11,284 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2024-03-14 12:36:11,330 INFO sqlalchemy.engine.Engine INSERT INTO communities (`COMM_CODE`, `CLASS_CODE`, `NAME`, `SECTOR`, `SRG`, `COMM_STRUCTURE`, `LONGITUDE`, `LATITUDE`, `COMMUNITY_POINT`, `COMMUNITY_BOUNDARY`) VALUES (%(COMM_CODE)s, %(CLASS_CODE)s, %(NAME)s, %(SECTOR)s, %(SRG)s, %(COMM_STRUCTURE)s, %(LONGITUDE)s, %(LATITUDE)s, ST_GeomFromText(%(COMMUNITY_POINT)s, 4326), ST_GeomFromText(%(COMMUNITY_BOUNDARY)s, 4326))
2024-03-14 12:36:11,333 INFO sqlalchemy.engine.Engine [generated in 0.02834s] ({'COMM_CODE': 'BED', 'CLASS_CODE': 1, 'NAME': 'BEDDINGTON HEIGHTS', 'SECTOR': 'NORTH', 'SRG': 'ESTABLISHED', 'COMM_STRUCTURE': '1960s/1970s', 'LONGITUDE': -114.08502139544244, 'LATITUDE': 51.13163280873361, 'COMMUNITY_POINT': 'POINT (-114.085021395442 51.131632808734)', 'COMMUNITY_BOUNDARY': 'MULTIPOLYGON (((-114.065193 51.132128, -114.0683966 51.1331831, -114.0702898 51.1338407, -114.071444 51.134295, -114.072102 51.1345

In [None]:
session.close()