In [12]:
import pandas as pd
import numpy as np
from sqlalchemy import Column, String, Integer, Text, Date
from geoalchemy2 import Geometry
from sqlalchemy.orm import sessionmaker
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base


In [7]:
user_name = 'l01-4'
#use the file way of storing password or set passw to your password
with open("l01-4_password.txt") as f:
    passw = f.read()

# Police Service Stations
## Source: City of Calgary's Open Data Portal
*Changelog for Data Export to SQL Table*

- Include NAME	STATION_TY	ADDRESS	INFO	CREATED_DT	MODIFIED_DT	POINT
- review names, set to a naming standard - change to uppercase so it is consistent
-- rename NAME to STATION_NAME, STATION_TY to STATION_TYPE
- primary key has to be create as STATION_ID and auto-generate

Link: https://data.calgary.ca/Health-and-Safety/Police-Service/ap4r-bav3/about_data

In [8]:
df_police = pd.read_csv("Police_Service_20240306.csv")
df_police.head(5)

Unnamed: 0,NAME,STATION_TY,ADDRESS,INFO,CREATED_DT,MODIFIED_DT,POINT
0,CPS TRAFFIC SECTION,District Office,6528 11 ST NE,(403)428-5500,2012/09/26,2019/01/03,POINT (-114.0361427 51.1120646)
1,DISTRICT 8 OFFICE (TRI-SERVICES),District Office,450 MIDPARK WY SE,(403)428-6800,2012/09/26,2015/07/23,POINT (-114.0594053 50.9067762)
2,DISTRICT 5 OFFICE,District Office,800 SADDLETOWNE CI NE,(403)428-6500,2012/09/26,2015/07/23,POINT (-113.9445035 51.1244639)
3,CHINOOK CENTRE MALL STATION,COPS Stn.,6455 MACLEOD TR S,(403)428-5810,2012/09/26,2015/07/23,POINT (-114.0719993 50.9976618)
4,DISTRICT 2 OFFICE,District Office,4506 17 AV SW,(403)428-6200,2012/09/26,2015/07/23,POINT (-114.1514266 51.0381119)


In [9]:
# Rename the 'polygon' column to 'COMMUNITY_BOUNDARY'
df_police = df_police.rename(columns={'STATION_TY': 'STATION_TYPE'})
df_police = df_police.rename(columns={'NAME': 'STATION_NAME'})
df_police = df_police.rename(columns={'POINT': 'STATION_POINT'})
# Convert NaN to None
df_police.replace({np.nan: None}, inplace=True)
# Rename columns to uppercase
df_police.columns = df_police.columns.str.upper()
df_police.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12 entries, 0 to 11
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   STATION_NAME   12 non-null     object
 1   STATION_TYPE   12 non-null     object
 2   ADDRESS        12 non-null     object
 3   INFO           12 non-null     object
 4   CREATED_DT     12 non-null     object
 5   MODIFIED_DT    12 non-null     object
 6   STATION_POINT  12 non-null     object
dtypes: object(7)
memory usage: 804.0+ bytes


In [10]:
# Define the database connection string
database_url = f"mysql+mysqlconnector://{user_name}:{passw}@datasciencedb.ucalgary.ca/{user_name}"

# Create the SQLAlchemy engine
engine = create_engine(database_url, echo=True)  # Set echo to True for debugging

# Test the connection
with engine.connect() as connection:
    result = connection.execute("SELECT 1")
    print(result.scalar())

2024-03-19 17:39:12,562 INFO sqlalchemy.engine.Engine SELECT DATABASE()
2024-03-19 17:39:12,562 INFO sqlalchemy.engine.Engine [raw sql] {}
2024-03-19 17:39:12,594 INFO sqlalchemy.engine.Engine SELECT @@sql_mode
2024-03-19 17:39:12,596 INFO sqlalchemy.engine.Engine [raw sql] {}
2024-03-19 17:39:12,606 INFO sqlalchemy.engine.Engine SELECT @@lower_case_table_names
2024-03-19 17:39:12,608 INFO sqlalchemy.engine.Engine [raw sql] {}
2024-03-19 17:39:12,620 INFO sqlalchemy.engine.Engine SELECT 1
2024-03-19 17:39:12,624 INFO sqlalchemy.engine.Engine [raw sql] {}
1


In [13]:
# Define the Base class
Base = declarative_base()

class PoliceServiceStations(Base):
    __tablename__ = 'POLICE_SERVICE_STATIONS'
    
    STATION_ID = Column(Integer, primary_key=True, autoincrement=True)
    STATION_NAME = Column(String(255))
    STATION_TYPE = Column(String(255))
    ADDRESS = Column(String(255))
    INFO = Column(String(20))
    CREATED_DT = Column(Date)
    MODIFIED_DT = Column(Date)
    STATION_POINT = Column(Geometry(geometry_type='POINT', srid=4326), default=None)

    
Base.metadata.create_all(engine)

2024-03-19 17:39:42,081 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2024-03-19 17:39:42,092 INFO sqlalchemy.engine.Engine SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = %(table_schema)s AND table_name = %(table_name)s
2024-03-19 17:39:42,094 INFO sqlalchemy.engine.Engine [generated in 0.00778s] {'table_schema': 'l01-4', 'table_name': 'POLICE_SERVICE_STATIONS'}
2024-03-19 17:39:42,105 INFO sqlalchemy.engine.Engine COMMIT


In [14]:
Session = sessionmaker(bind=engine)
session = Session()

In [19]:
# Loop through DataFrame rows and insert data into the PoliceServiceStations table
for index, row in df_police.head(10).iterrows():
    try:
        police_station = PoliceServiceStations(
            STATION_NAME=row['STATION_NAME'],
            STATION_TYPE=row['STATION_TYPE'],
            ADDRESS=row['ADDRESS'],
            INFO=row['INFO'],
            CREATED_DT=row['CREATED_DT'],
            MODIFIED_DT=row['MODIFIED_DT'],
            STATION_POINT=row['STATION_POINT']  # Assuming you have this data in the correct format
        )
        session.add(police_station)
        if (index + 1) % 1000 == 0:
            # Commit every 1000 records to avoid a very large transaction
            session.commit()
            print(f"Committed {index + 1} records")

    except Exception as e:
        print(f"Error in row {index}: {e}")
        session.rollback()  # Rollback the transaction to continue with the next row

# Commit any remaining rows that weren't committed inside the loop
session.commit()

2024-03-19 17:42:18,655 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2024-03-19 17:42:18,669 INFO sqlalchemy.engine.Engine INSERT INTO `POLICE_SERVICE_STATIONS` (`STATION_NAME`, `STATION_TYPE`, `ADDRESS`, `INFO`, `CREATED_DT`, `MODIFIED_DT`, `STATION_POINT`) VALUES (%(STATION_NAME)s, %(STATION_TYPE)s, %(ADDRESS)s, %(INFO)s, %(CREATED_DT)s, %(MODIFIED_DT)s, ST_GeomFromText(%(STATION_POINT)s, 4326))
2024-03-19 17:42:18,672 INFO sqlalchemy.engine.Engine [cached since 121.9s ago] {'STATION_NAME': 'CPS TRAFFIC SECTION', 'STATION_TYPE': 'District Office', 'ADDRESS': '6528 11 ST NE', 'INFO': '(403)428-5500', 'CREATED_DT': '2012/09/26', 'MODIFIED_DT': '2019/01/03', 'STATION_POINT': 'POINT (-114.0361427 51.1120646)'}
2024-03-19 17:42:18,680 INFO sqlalchemy.engine.Engine INSERT INTO `POLICE_SERVICE_STATIONS` (`STATION_NAME`, `STATION_TYPE`, `ADDRESS`, `INFO`, `CREATED_DT`, `MODIFIED_DT`, `STATION_POINT`) VALUES (%(STATION_NAME)s, %(STATION_TYPE)s, %(ADDRESS)s, %(INFO)s, %(CREATED_DT)s, %(MODIF

In [20]:
session.close()
#close the session, end of script

In [18]:
session.rollback()