In [14]:
import pandas as pd
import numpy as np
from sqlalchemy import Column, String, Integer, Text
from geoalchemy2 import Geometry
from sqlalchemy.orm import sessionmaker
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base


In [15]:
user_name = 'l01-4'
#use the file way of storing password or set passw to your password
with open("l01-4_password.txt") as f:
    passw = f.read()

# Transit LRT Stations
## Source: City of Calgary's Open Data Portal
Changelog for Data Export to SQL Table
- Include
STATIONNAM	LEG	DIRECTION	DIST_NB	DIST_SB	DIST_EB	DIST_WB	ROUTE	STATUS	the_geo

- Make the_geom a GEOMETRY data type, rename to LRT_POINT so it is more meaningful
- review names, set to a naming standar - change to uppercase so it is consistent
-- rename STATIONNAM to STATION_NMEd
- primary key has to be createas STATION_ID d and auto-generat number, Open Data Calgary or Calgary Transit website does not provide a unique identifer for each LRT Stationeions

Link: https://data.calgary.ca/Transportation-Transit/Transit-LRT-Stations/2axz-xm4q/about_data

In [16]:
df_LRT = pd.read_csv("Transit_LRT_Stations_20240229.csv")
df_LRT.head(5)

Unnamed: 0,STATIONNAM,LEG,DIRECTION,DIST_NB,DIST_SB,DIST_EB,DIST_WB,ROUTE,STATUS,the_geom
0,45 Street SW Station,West,West/East,,,,,202,Current,POINT (-114.1541947 51.0379605)
1,Sirocco Station,West,West/East,,,,,202,Current,POINT (-114.1690442 51.0383797)
2,City Hall Station,DTWestbnd,West,,,200.0,,201/202,Current,POINT (-114.0570028 51.0463789)
3,1st Street SW Station,DTWestbnd,West,0.0,0.0,467.0,439.0,201/202,Current,POINT (-114.0640924 51.0465842)
4,Dalhousie Station,NW,North/South,3966.0,2732.0,0.0,0.0,201,Current,POINT (-114.1605254 51.1032838)


In [17]:
# Rename the 'polygon' column to 'COMMUNITY_BOUNDARY'
df_LRT = df_LRT.rename(columns={'the_geom': 'LRT_POINT'})
df_LRT = df_LRT.rename(columns={'STATIONNAM': 'STATION_NAME'})
# Convert NaN to None
df_LRT.replace({np.nan: None}, inplace=True)
# Rename columns to uppercase
df_LRT.columns = df_LRT.columns.str.upper()
df_LRT.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 47 entries, 0 to 46
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   STATION_NAME  47 non-null     object
 1   LEG           47 non-null     object
 2   DIRECTION     47 non-null     object
 3   DIST_NB       37 non-null     object
 4   DIST_SB       37 non-null     object
 5   DIST_EB       36 non-null     object
 6   DIST_WB       35 non-null     object
 7   ROUTE         47 non-null     object
 8   STATUS        47 non-null     object
 9   LRT_POINT     47 non-null     object
dtypes: object(10)
memory usage: 3.8+ KB


In [18]:
#append the dataset for future Green Line LRT Transit Stations
df_green_lrt = pd.read_csv("Green_Line_Stations_20240319.csv")
# Rename columns to uppercase
df_green_lrt.columns = df_green_lrt.columns.str.upper()
df_green_lrt = df_green_lrt.rename(columns={'NAME': 'STATION_NAME'})
df_green_lrt = df_green_lrt.rename(columns={'POINT': 'LRT_POINT'})
df_green_lrt['STATUS'] = 'Future'
df_green_lrt['LEG'] = 'Green' #placeholder value until final construction
df_green_lrt.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29 entries, 0 to 28
Data columns (total 12 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   STATION_NAME         29 non-null     object
 1   INFORMATION          29 non-null     object
 2   TOD                  29 non-null     object
 3   UNDERGROUND          29 non-null     object
 4   STATION TYPE         15 non-null     object
 5   INTRODUCTION TEXT    29 non-null     object
 6   NEARBY DESTINATIONS  15 non-null     object
 7   CONSTRUCTION STAGE   15 non-null     object
 8   LOCATION             14 non-null     object
 9   LRT_POINT            29 non-null     object
 10  STATUS               29 non-null     object
 11  LEG                  29 non-null     object
dtypes: object(12)
memory usage: 2.8+ KB


In [19]:
# Define the database connection string
database_url = f"mysql+mysqlconnector://{user_name}:{passw}@datasciencedb.ucalgary.ca/{user_name}"

# Create the SQLAlchemy engine
engine = create_engine(database_url, echo=True)  # Set echo to True for debugging

# Test the connection
with engine.connect() as connection:
    result = connection.execute("SELECT 1")
    print(result.scalar())

2024-03-20 18:01:48,050 INFO sqlalchemy.engine.Engine SELECT DATABASE()
2024-03-20 18:01:48,061 INFO sqlalchemy.engine.Engine [raw sql] {}
2024-03-20 18:01:48,075 INFO sqlalchemy.engine.Engine SELECT @@sql_mode
2024-03-20 18:01:48,083 INFO sqlalchemy.engine.Engine [raw sql] {}
2024-03-20 18:01:48,093 INFO sqlalchemy.engine.Engine SELECT @@lower_case_table_names
2024-03-20 18:01:48,093 INFO sqlalchemy.engine.Engine [raw sql] {}
2024-03-20 18:01:48,107 INFO sqlalchemy.engine.Engine SELECT 1
2024-03-20 18:01:48,109 INFO sqlalchemy.engine.Engine [raw sql] {}
1


In [20]:
# Define the Base class
Base = declarative_base()

class TransitLRTStation(Base):
    __tablename__ = 'transit_lrt_stations'

    STATION_ID = Column(Integer, primary_key=True, autoincrement=True)
    STATION_NAME = Column(Text, default=None)
    LEG = Column(Text, default=None)
    DIRECTION = Column(Text, default=None)
    DIST_NB = Column(Text, default=None)
    DIST_SB = Column(Text, default=None)
    DIST_EB = Column(Text, default=None)
    DIST_WB = Column(Text, default=None)
    ROUTE = Column(Text, default=None)
    STATUS = Column(Text, default=None)
    LRT_POINT = Column(Geometry(geometry_type='POINT', srid=4326), default=None)
    
Base.metadata.create_all(engine)

2024-03-20 18:01:48,948 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2024-03-20 18:01:48,965 INFO sqlalchemy.engine.Engine SELECT COUNT(*) FROM information_schema.tables WHERE table_schema = %(table_schema)s AND table_name = %(table_name)s
2024-03-20 18:01:48,965 INFO sqlalchemy.engine.Engine [generated in 0.01608s] {'table_schema': 'l01-4', 'table_name': 'transit_lrt_stations'}
2024-03-20 18:01:48,974 INFO sqlalchemy.engine.Engine COMMIT


In [21]:
Session = sessionmaker(bind=engine)
session = Session()

In [22]:
# Loop through DataFrame rows and insert data into the database
for index, row in df_LRT.iterrows():
    try:
        lrt_station = TransitLRTStation(
            STATION_NAME=row['STATION_NAME'],
            LEG=row['LEG'],
            DIRECTION=row['DIRECTION'],
            DIST_NB=row['DIST_NB'],
            DIST_SB=row['DIST_SB'],
            DIST_EB=row['DIST_EB'],
            DIST_WB=row['DIST_WB'],
            ROUTE=row['ROUTE'],
            STATUS=row['STATUS'],
            LRT_POINT=row['LRT_POINT']
        )
        session.add(lrt_station)
        session.commit()  # Commit the transaction

    except Exception as e:
        print(f"Error in row {index}: {e}")
        print(row)  # Print the entire row for reference
        session.rollback()  # Rollback the transaction to continue with the next row

2024-03-20 18:01:50,807 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2024-03-20 18:01:50,818 INFO sqlalchemy.engine.Engine INSERT INTO transit_lrt_stations (`STATION_NAME`, `LEG`, `DIRECTION`, `DIST_NB`, `DIST_SB`, `DIST_EB`, `DIST_WB`, `ROUTE`, `STATUS`, `LRT_POINT`) VALUES (%(STATION_NAME)s, %(LEG)s, %(DIRECTION)s, %(DIST_NB)s, %(DIST_SB)s, %(DIST_EB)s, %(DIST_WB)s, %(ROUTE)s, %(STATUS)s, ST_GeomFromText(%(LRT_POINT)s, 4326))
2024-03-20 18:01:50,828 INFO sqlalchemy.engine.Engine [generated in 0.01406s] {'STATION_NAME': '45 Street SW Station', 'LEG': 'West', 'DIRECTION': 'West/East', 'DIST_NB': None, 'DIST_SB': None, 'DIST_EB': None, 'DIST_WB': None, 'ROUTE': '202', 'STATUS': 'Current', 'LRT_POINT': 'POINT (-114.1541947 51.0379605)'}
2024-03-20 18:01:50,844 INFO sqlalchemy.engine.Engine COMMIT
2024-03-20 18:01:50,855 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2024-03-20 18:01:50,863 INFO sqlalchemy.engine.Engine INSERT INTO transit_lrt_stations (`STATION_NAME`, `LEG`, `DIRECTION

In [23]:
# Loop through DataFrame rows and insert data into the database
for index, row in df_green_lrt.iterrows():
    try:
        lrt_station = TransitLRTStation(
            STATION_NAME=row['STATION_NAME'],
            STATUS=row['STATUS'],
            LEG=row['LEG'],
            LRT_POINT=row['LRT_POINT']
        )
        session.add(lrt_station)
        session.commit()  # Commit the transaction

    except Exception as e:
        print(f"Error in row {index}: {e}")
        print(row)  # Print the entire row for reference
        session.rollback()  # Rollback the transaction to continue with the next row

2024-03-20 18:01:52,425 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2024-03-20 18:01:52,427 INFO sqlalchemy.engine.Engine INSERT INTO transit_lrt_stations (`STATION_NAME`, `LEG`, `DIRECTION`, `DIST_NB`, `DIST_SB`, `DIST_EB`, `DIST_WB`, `ROUTE`, `STATUS`, `LRT_POINT`) VALUES (%(STATION_NAME)s, %(LEG)s, %(DIRECTION)s, %(DIST_NB)s, %(DIST_SB)s, %(DIST_EB)s, %(DIST_WB)s, %(ROUTE)s, %(STATUS)s, ST_GeomFromText(%(LRT_POINT)s, 4326))
2024-03-20 18:01:52,434 INFO sqlalchemy.engine.Engine [cached since 1.62s ago] {'STATION_NAME': 'Douglas Glen', 'LEG': 'Green', 'DIRECTION': None, 'DIST_NB': None, 'DIST_SB': None, 'DIST_EB': None, 'DIST_WB': None, 'ROUTE': None, 'STATUS': 'Future', 'LRT_POINT': 'POINT (-113.9969718 50.9522514)'}
2024-03-20 18:01:52,434 INFO sqlalchemy.engine.Engine COMMIT
2024-03-20 18:01:52,444 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2024-03-20 18:01:52,458 INFO sqlalchemy.engine.Engine INSERT INTO transit_lrt_stations (`STATION_NAME`, `LEG`, `DIRECTION`, `DIST_NB`, `

In [25]:
session.close()
#close the session, end of script