In [1]:
import pandas as pd
import numpy as np
import json
import requests
from io import StringIO
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient
from math import ceil
import datetime
import calendar
from sqlalchemy import create_engine


In [2]:
# Specify the path to your JSON configuration file
config_file_path = "/Users/gabisanches/Desktop/CIS9440 - Data Warehouse/Homework/Homework_GabrieleSanches_CIS9440/scripts/config.json"

# Load the JSON configuration file
with open(config_file_path, 'r') as config_file:
    config = json.load(config_file)


CONNECTION_STRING_AZURE_STORAGE = config["connectionString"]
CONTAINER_AZURE = 'carcrash'

# Initialize the BlobServiceClient
blob_service_client = BlobServiceClient.from_connection_string(CONNECTION_STRING_AZURE_STORAGE)
# Get the container client
container_client = blob_service_client.get_container_client(CONTAINER_AZURE)


car_crash_df = pd.DataFrame()

# List all blobs in the specified container
blob_list = container_client.list_blobs()
for blob in blob_list:
    print(blob.name)
    blob_client = container_client.get_blob_client(blob=blob.name)
    blob_data = blob_client.download_blob()
    blob_content = blob_data.readall().decode('utf-8')
    df = pd.read_csv(StringIO(blob_content))
    # Display the head of the DataFrame
    print(df.shape)
    car_crash_df = df.copy()

car_crash.csv


  df = pd.read_csv(StringIO(blob_content))


(2077866, 29)


In [3]:
#printcolumns in the data set
print(car_crash_df.columns)

Index(['crash_date', 'crash_time', 'on_street_name', 'off_street_name',
       'number_of_persons_injured', 'number_of_persons_killed',
       'number_of_pedestrians_injured', 'number_of_pedestrians_killed',
       'number_of_cyclist_injured', 'number_of_cyclist_killed',
       'number_of_motorist_injured', 'number_of_motorist_killed',
       'contributing_factor_vehicle_1', 'contributing_factor_vehicle_2',
       'collision_id', 'vehicle_type_code1', 'vehicle_type_code2', 'borough',
       'zip_code', 'latitude', 'longitude', 'location', 'cross_street_name',
       'contributing_factor_vehicle_3', 'vehicle_type_code_3',
       'contributing_factor_vehicle_4', 'vehicle_type_code_4',
       'contributing_factor_vehicle_5', 'vehicle_type_code_5'],
      dtype='object')


In [4]:
car_crash_df.head()

Unnamed: 0,crash_date,crash_time,on_street_name,off_street_name,number_of_persons_injured,number_of_persons_killed,number_of_pedestrians_injured,number_of_pedestrians_killed,number_of_cyclist_injured,number_of_cyclist_killed,...,latitude,longitude,location,cross_street_name,contributing_factor_vehicle_3,vehicle_type_code_3,contributing_factor_vehicle_4,vehicle_type_code_4,contributing_factor_vehicle_5,vehicle_type_code_5
0,2021-09-11T00:00:00.000,2:39,WHITESTONE EXPRESSWAY,20 AVENUE,2.0,0.0,0,0,0,0,...,,,,,,,,,,
1,2022-03-26T00:00:00.000,11:45,QUEENSBORO BRIDGE UPPER,,1.0,0.0,0,0,0,0,...,,,,,,,,,,
2,2022-06-29T00:00:00.000,6:55,THROGS NECK BRIDGE,,0.0,0.0,0,0,0,0,...,,,,,,,,,,
3,2021-09-11T00:00:00.000,9:35,,,0.0,0.0,0,0,0,0,...,40.667202,-73.8665,"{'latitude': '40.667202', 'longitude': '-73.86...",1211 LORING AVENUE,,,,,,
4,2021-12-14T00:00:00.000,8:13,SARATOGA AVENUE,DECATUR STREET,0.0,0.0,0,0,0,0,...,40.683304,-73.917274,"{'latitude': '40.683304', 'longitude': '-73.91...",,,,,,,


In [5]:
#dropping columns that are not needed
car_crash_df = car_crash_df.drop(columns=['contributing_factor_vehicle_3'])
car_crash_df = car_crash_df.drop(columns=['vehicle_type_code_3'])
car_crash_df = car_crash_df.drop(columns=['contributing_factor_vehicle_4'])
car_crash_df = car_crash_df.drop(columns=['vehicle_type_code_4'])
car_crash_df = car_crash_df.drop(columns=['contributing_factor_vehicle_5'])
car_crash_df = car_crash_df.drop(columns=['vehicle_type_code_5'])

In [6]:
car_crash_df = car_crash_df.drop(columns=['contributing_factor_vehicle_2'])
car_crash_df = car_crash_df.drop(columns=['vehicle_type_code2'])

In [7]:
car_crash_df = car_crash_df.drop(columns=['location'])

In [8]:
car_crash_df = car_crash_df.drop(columns=['cross_street_name'])

In [9]:
car_crash_df.head()

Unnamed: 0,crash_date,crash_time,on_street_name,off_street_name,number_of_persons_injured,number_of_persons_killed,number_of_pedestrians_injured,number_of_pedestrians_killed,number_of_cyclist_injured,number_of_cyclist_killed,number_of_motorist_injured,number_of_motorist_killed,contributing_factor_vehicle_1,collision_id,vehicle_type_code1,borough,zip_code,latitude,longitude
0,2021-09-11T00:00:00.000,2:39,WHITESTONE EXPRESSWAY,20 AVENUE,2.0,0.0,0,0,0,0,2,0,Aggressive Driving/Road Rage,4455765,Sedan,,,,
1,2022-03-26T00:00:00.000,11:45,QUEENSBORO BRIDGE UPPER,,1.0,0.0,0,0,0,0,1,0,Pavement Slippery,4513547,Sedan,,,,
2,2022-06-29T00:00:00.000,6:55,THROGS NECK BRIDGE,,0.0,0.0,0,0,0,0,0,0,Following Too Closely,4541903,Sedan,,,,
3,2021-09-11T00:00:00.000,9:35,,,0.0,0.0,0,0,0,0,0,0,Unspecified,4456314,Sedan,BROOKLYN,11208.0,40.667202,-73.8665
4,2021-12-14T00:00:00.000,8:13,SARATOGA AVENUE,DECATUR STREET,0.0,0.0,0,0,0,0,0,0,,4486609,,BROOKLYN,11233.0,40.683304,-73.917274


In [10]:
#rename columns
car_crash_df.rename(columns={'contributing_factor_vehicle_1': 'contributing_factor_vehicle'}, inplace=True)
car_crash_df.rename(columns={'vehicle_type_code1': 'vehicle_type_code'}, inplace=True)

In [11]:
#check column names after initial cleaning and transformation
print(car_crash_df.columns)

Index(['crash_date', 'crash_time', 'on_street_name', 'off_street_name',
       'number_of_persons_injured', 'number_of_persons_killed',
       'number_of_pedestrians_injured', 'number_of_pedestrians_killed',
       'number_of_cyclist_injured', 'number_of_cyclist_killed',
       'number_of_motorist_injured', 'number_of_motorist_killed',
       'contributing_factor_vehicle', 'collision_id', 'vehicle_type_code',
       'borough', 'zip_code', 'latitude', 'longitude'],
      dtype='object')


In [12]:
#drop na - drop rows that do not have data in the "on_street_name" column
#car_crash_df['on_street_name'].dropna(inplace=True)
car_crash_df = car_crash_df[car_crash_df['on_street_name'].notna()]

car_crash_df.head()

Unnamed: 0,crash_date,crash_time,on_street_name,off_street_name,number_of_persons_injured,number_of_persons_killed,number_of_pedestrians_injured,number_of_pedestrians_killed,number_of_cyclist_injured,number_of_cyclist_killed,number_of_motorist_injured,number_of_motorist_killed,contributing_factor_vehicle,collision_id,vehicle_type_code,borough,zip_code,latitude,longitude
0,2021-09-11T00:00:00.000,2:39,WHITESTONE EXPRESSWAY,20 AVENUE,2.0,0.0,0,0,0,0,2,0,Aggressive Driving/Road Rage,4455765,Sedan,,,,
1,2022-03-26T00:00:00.000,11:45,QUEENSBORO BRIDGE UPPER,,1.0,0.0,0,0,0,0,1,0,Pavement Slippery,4513547,Sedan,,,,
2,2022-06-29T00:00:00.000,6:55,THROGS NECK BRIDGE,,0.0,0.0,0,0,0,0,0,0,Following Too Closely,4541903,Sedan,,,,
4,2021-12-14T00:00:00.000,8:13,SARATOGA AVENUE,DECATUR STREET,0.0,0.0,0,0,0,0,0,0,,4486609,,BROOKLYN,11233.0,40.683304,-73.917274
5,2021-04-14T00:00:00.000,12:47,MAJOR DEEGAN EXPRESSWAY RAMP,,0.0,0.0,0,0,0,0,0,0,Unspecified,4407458,Dump,,,,


In [13]:
#reformat to int format 
car_crash_df['number_of_persons_injured'] = car_crash_df['number_of_persons_injured'].astype('Int64')
car_crash_df['number_of_persons_killed'] = car_crash_df['number_of_persons_killed'].astype('Int64')
car_crash_df['number_of_pedestrians_injured'] = car_crash_df['number_of_pedestrians_injured'].astype('Int64')
car_crash_df['number_of_pedestrians_killed'] = car_crash_df['number_of_pedestrians_killed'].astype('Int64')
car_crash_df['number_of_cyclist_injured'] = car_crash_df['number_of_cyclist_injured'].astype('Int64')
car_crash_df['number_of_cyclist_killed'] = car_crash_df['number_of_cyclist_killed'].astype('Int64')
car_crash_df['number_of_motorist_injured'] = car_crash_df['number_of_motorist_injured'].astype('Int64')
car_crash_df['number_of_motorist_killed'] = car_crash_df['number_of_motorist_killed'].astype('Int64')

car_crash_df.head()

Unnamed: 0,crash_date,crash_time,on_street_name,off_street_name,number_of_persons_injured,number_of_persons_killed,number_of_pedestrians_injured,number_of_pedestrians_killed,number_of_cyclist_injured,number_of_cyclist_killed,number_of_motorist_injured,number_of_motorist_killed,contributing_factor_vehicle,collision_id,vehicle_type_code,borough,zip_code,latitude,longitude
0,2021-09-11T00:00:00.000,2:39,WHITESTONE EXPRESSWAY,20 AVENUE,2,0,0,0,0,0,2,0,Aggressive Driving/Road Rage,4455765,Sedan,,,,
1,2022-03-26T00:00:00.000,11:45,QUEENSBORO BRIDGE UPPER,,1,0,0,0,0,0,1,0,Pavement Slippery,4513547,Sedan,,,,
2,2022-06-29T00:00:00.000,6:55,THROGS NECK BRIDGE,,0,0,0,0,0,0,0,0,Following Too Closely,4541903,Sedan,,,,
4,2021-12-14T00:00:00.000,8:13,SARATOGA AVENUE,DECATUR STREET,0,0,0,0,0,0,0,0,,4486609,,BROOKLYN,11233.0,40.683304,-73.917274
5,2021-04-14T00:00:00.000,12:47,MAJOR DEEGAN EXPRESSWAY RAMP,,0,0,0,0,0,0,0,0,Unspecified,4407458,Dump,,,,


In [14]:
print(car_crash_df.columns)

Index(['crash_date', 'crash_time', 'on_street_name', 'off_street_name',
       'number_of_persons_injured', 'number_of_persons_killed',
       'number_of_pedestrians_injured', 'number_of_pedestrians_killed',
       'number_of_cyclist_injured', 'number_of_cyclist_killed',
       'number_of_motorist_injured', 'number_of_motorist_killed',
       'contributing_factor_vehicle', 'collision_id', 'vehicle_type_code',
       'borough', 'zip_code', 'latitude', 'longitude'],
      dtype='object')


In [15]:
#Creating vehicle_type dimension

# Get unique values from the 'vehicle_type_code' column
unique_vehicle_types = car_crash_df['vehicle_type_code'].unique()

# Create DataFrame for the dimension table
dim_vehicle_type = pd.DataFrame(unique_vehicle_types, columns=['vehicle_type_code'])

# Add primary key column
dim_vehicle_type['vehicle_id'] = range(1, len(dim_vehicle_type) + 1)

# Rearrange columns if needed
dim_vehicle_type = dim_vehicle_type[['vehicle_id', 'vehicle_type_code']]

# Print dimension table
print(dim_vehicle_type)


      vehicle_id                    vehicle_type_code
0              1                                Sedan
1              2                                  NaN
2              3                                 Dump
3              4  Station Wagon/Sport Utility Vehicle
4              5                                  Bus
...          ...                                  ...
1246        1247                           Lime Scoot
1247        1248                           moped scoo
1248        1249                            USPS Mail
1249        1250                             SUBN/Van
1250        1251                           charter bu

[1251 rows x 2 columns]


In [16]:
#Creating contributing_factors dimension

# Get unique values from the 'contributing_factor_vehicle' column
unique_contributing_factors = car_crash_df['contributing_factor_vehicle'].dropna().unique()

# Create DataFrame for the dimension table
dim_contributing_factors = pd.DataFrame(unique_contributing_factors, columns=['contributing_factor'])

# Add primary key column
dim_contributing_factors['contributingfactor_id'] = range(1, len(dim_contributing_factors) + 1)

# Rearrange columns if needed
dim_contributing_factors = dim_contributing_factors[['contributingfactor_id', 'contributing_factor']]

# Print dimension table
print(dim_contributing_factors)

    contributingfactor_id                   contributing_factor
0                       1          Aggressive Driving/Road Rage
1                       2                     Pavement Slippery
2                       3                 Following Too Closely
3                       4                           Unspecified
4                       5                   Passing Too Closely
..                    ...                                   ...
56                     57  Reaction to Other Uninvolved Vehicle
57                     58                                     1
58                     59                       Drugs (Illegal)
59                     60                               Illness
60                     61                Cell Phone (hand-held)

[61 rows x 2 columns]


In [20]:
#Creating location dimension

# Extract relevant columns for dim_location
dim_location = car_crash_df[['borough', 'latitude', 'longitude', 'zip_code', 'on_street_name', 'off_street_name']]

# Drop duplicate rows
dim_location.drop_duplicates(inplace=True)

# Reset index
dim_location.reset_index(drop=True, inplace=True)

# Add location_id as primary key
dim_location['location_id'] = dim_location.index + 1

# Rearrange columns
dim_location = dim_location[['location_id', 'borough', 'latitude', 'longitude', 'zip_code', 'on_street_name', 'off_street_name']]

# Print dimension table
print(dim_location)

        location_id   borough   latitude  longitude zip_code   
0                 1       NaN        NaN        NaN      NaN  \
1                 2       NaN        NaN        NaN      NaN   
2                 3       NaN        NaN        NaN      NaN   
3                 4  BROOKLYN  40.683304 -73.917274  11233.0   
4                 5       NaN        NaN        NaN      NaN   
...             ...       ...        ...        ...      ...   
346409       346410       NaN  40.828990 -73.845240      NaN   
346410       346411  BROOKLYN  40.594276 -73.978110  11223.0   
346411       346412       NaN  40.754690 -73.995360      NaN   
346412       346413       NaN  40.637222 -73.959800      NaN   
346413       346414  BROOKLYN  40.699276 -73.988594  11201.0   

                      on_street_name    off_street_name  
0              WHITESTONE EXPRESSWAY          20 AVENUE  
1            QUEENSBORO BRIDGE UPPER                NaN  
2                 THROGS NECK BRIDGE                NaN  

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dim_location.drop_duplicates(inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dim_location['location_id'] = dim_location.index + 1


In [26]:
# Define a function to calculate the week of the month
def week_of_month(dt):
    year = dt.year
    month = dt.month
    day = dt.day

    cal = calendar.monthcalendar(year, month)
    week_number = (day - 1) // 7 + 1
    return week_number

# Extract relevant date-related columns from your dataset
start_date = car_crash_df['crash_date'].min()
end_date = car_crash_df['crash_date'].max()

# Create a DataFrame for the date dimension
dim_date = pd.DataFrame({'date': pd.date_range(start_date, end_date, freq='H')})

# Extract attributes
dim_date['year_number'] = dim_date['date'].dt.year
dim_date['quarter_number'] = dim_date['date'].dt.quarter
dim_date['month_number'] = dim_date['date'].dt.month
dim_date['month_name'] = dim_date['date'].dt.strftime('%B')
dim_date['day_number'] = dim_date['date'].dt.day
dim_date['day_name'] = dim_date['date'].dt.strftime('%A')
dim_date['hour_number'] = dim_date['date'].dt.hour
dim_date['date_iso_format'] = dim_date['date'].apply(lambda x: x.isoformat())
dim_date['date_id'] = dim_date['date'].dt.strftime('%Y%m%d%H')

# Add week of the month and week of the year
dim_date['week_of_month'] = dim_date['date'].apply(week_of_month)
dim_date['week_of_year'] = dim_date['date'].dt.strftime('%U')

# Add crash_time column
#date_dimension['time'] = car_crash_df['crash_time']

# Define the order of columns in the date dimension
new_order = ['date_id', 'date_iso_format','year_number','quarter_number','month_number','month_name','day_number','day_name', 'hour_number','week_of_month','week_of_year']
dim_date = dim_date[new_order]

# Print the date dimension
print(dim_date.head(25))

       date_id      date_iso_format  year_number  quarter_number   
0   2012070100  2012-07-01T00:00:00         2012               3  \
1   2012070101  2012-07-01T01:00:00         2012               3   
2   2012070102  2012-07-01T02:00:00         2012               3   
3   2012070103  2012-07-01T03:00:00         2012               3   
4   2012070104  2012-07-01T04:00:00         2012               3   
5   2012070105  2012-07-01T05:00:00         2012               3   
6   2012070106  2012-07-01T06:00:00         2012               3   
7   2012070107  2012-07-01T07:00:00         2012               3   
8   2012070108  2012-07-01T08:00:00         2012               3   
9   2012070109  2012-07-01T09:00:00         2012               3   
10  2012070110  2012-07-01T10:00:00         2012               3   
11  2012070111  2012-07-01T11:00:00         2012               3   
12  2012070112  2012-07-01T12:00:00         2012               3   
13  2012070113  2012-07-01T13:00:00         2012

In [40]:
print(dim_date.columns)


Index(['date_id', 'date_iso_format', 'year_number', 'quarter_number',
       'month_number', 'month_name', 'day_number', 'day_name', 'hour_number',
       'week_of_month', 'week_of_year'],
      dtype='object')


In [24]:
print(car_crash_df.columns)

Index(['crash_date', 'crash_time', 'on_street_name', 'off_street_name',
       'number_of_persons_injured', 'number_of_persons_killed',
       'number_of_pedestrians_injured', 'number_of_pedestrians_killed',
       'number_of_cyclist_injured', 'number_of_cyclist_killed',
       'number_of_motorist_injured', 'number_of_motorist_killed',
       'contributing_factor_vehicle', 'collision_id', 'vehicle_type_code',
       'borough', 'zip_code', 'latitude', 'longitude', 'location_id',
       'vehicle_id'],
      dtype='object')


In [42]:
# Step 1: Create contributingfactor_id column
# Creating a dictionary mapping contributing_factor_vehicle to contributingfactor_id
contributingfactor_id_mapping = dict(zip(dim_contributing_factors['contributing_factor'], dim_contributing_factors['contributingfactor_id']))
# Adding contributingfactor_id column to the original dataset using the mapping
car_crash_df['contributingfactor_id'] = car_crash_df['contributing_factor'].map(contributingfactor_id_mapping)

# Step 2: Create location_id column
# Creating a dictionary mapping location attributes to location_id
location_attributes = ['borough', 'latitude', 'longitude', 'zip_code', 'on_street_name', 'off_street_name']
location_id_mapping = dim_location.set_index(location_attributes)['location_id'].to_dict()
# Creating a tuple of location attributes
car_crash_df['location_tuple'] = car_crash_df[location_attributes].apply(tuple, axis=1)
# Adding location_id column to the original dataset using the mapping
car_crash_df['location_id'] = car_crash_df['location_tuple'].map(location_id_mapping)

# Step 3: Create date_id column
# Creating a dictionary mapping crash_date to date_id
date_id_mapping = dict(zip(dim_date['date_id'], dim_date['date_id']))
# Adding date_id column to the original dataset using the mapping
car_crash_df['date_id'] = car_crash_df['crash_date'].map(date_id_mapping)

# Step 4: Create vehicle_id column
# Creating a dictionary mapping vehicle_type_code to vehicle_id
vehicle_id_mapping = dict(zip(dim_vehicle_type['vehicle_type_code'], dim_vehicle_type['vehicle_id']))
# Adding vehicle_id column to the original dataset using the mapping
car_crash_df['vehicle_id'] = car_crash_df['vehicle_type_code'].map(vehicle_id_mapping)

# Step 5: Reorder columns
new_order = ['fact_id', 'number_of_persons_injured', 'number_of_persons_killed', 'number_of_pedestrians_injured', 
             'number_of_pedestrians_killed', 'number_of_cyclist_injured', 'number_of_cyclist_killed', 
             'number_of_motorist_injured', 'number_of_motorist_killed', 
             'location_id', 'date_id', 'vehicle_id', 'contributingfactor_id']

car_crash_df = car_crash_df[new_order]

# Display the updated DataFrame
car_crash_df.head()


Unnamed: 0,fact_id,number_of_persons_injured,number_of_persons_killed,number_of_pedestrians_injured,number_of_pedestrians_killed,number_of_cyclist_injured,number_of_cyclist_killed,number_of_motorist_injured,number_of_motorist_killed,location_id,date_id,vehicle_id,contributingfactor_id
0,4455765,2,0,0,0,0,0,2,0,1,,1,1.0
1,4513547,1,0,0,0,0,0,1,0,2,,1,2.0
2,4541903,0,0,0,0,0,0,0,0,3,,1,3.0
3,4486609,0,0,0,0,0,0,0,0,4,,2,
4,4407458,0,0,0,0,0,0,0,0,5,,3,4.0


In [48]:
import sys
print(sys.path)


['/Users/gabisanches/Desktop/CIS9440 - Data Warehouse/Homework/Homework_GabrieleSanches_CIS9440/scripts', '/Users/gabisanches/.vscode/extensions/ms-toolsai.jupyter-2022.11.1003412109/pythonFiles', '/Users/gabisanches/.vscode/extensions/ms-toolsai.jupyter-2022.11.1003412109/pythonFiles/lib/python', '/Library/Frameworks/Python.framework/Versions/3.10/lib/python310.zip', '/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10', '/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/lib-dynload', '', '/Users/gabisanches/Library/Python/3.10/lib/python/site-packages', '/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages']


In [2]:
%pip install snowflake-sqlalchemy

Defaulting to user installation because normal site-packages is not writeable
Collecting snowflake-sqlalchemy
  Downloading snowflake_sqlalchemy-1.5.3-py3-none-any.whl.metadata (20 kB)
Collecting snowflake-connector-python<4.0.0 (from snowflake-sqlalchemy)
  Downloading snowflake_connector_python-3.9.0-cp310-cp310-macosx_11_0_arm64.whl.metadata (62 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.3/62.3 kB[0m [31m649.7 kB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting sqlalchemy<2.0.0,>=1.4.19 (from snowflake-sqlalchemy)
  Downloading SQLAlchemy-1.4.52.tar.gz (8.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.5/8.5 MB[0m [31m454.9 kB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Installing backend dependencies ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
[?25hCollecting asn1cr

In [3]:
from sqlalchemy import create_engine

# Replace the placeholders below with your Snowflake account information
account_name = 'tilmwpx-rma66037.snowflakecomputing.com'
user = 'GABRIELEDASANCHES'
password = 'Gabi2911!'
warehouse = 'HOMEWORK_CIS9440'
database = 'CARCRASH_CIS9440'
schema = 'CARCRASH'

connection_string = f'snowflake://{user}:{password}@{account_name}.snowflakecomputing.com/?warehouse={warehouse}&database={database}&schema={schema}'

# Create the SQLAlchemy engine with explicit dialect
engine = create_engine(connection_string, connect_args={'dialect_name': 'snowflake'})


# Test the connection
try:
    with engine.connect() as connection:
        result = connection.execute('SELECT current_version()').fetchone()
        print(f"Snowflake version: {result[0]}")
except Exception as e:
    print(f"Connection failed: {e}")

AttributeError: module 'sqlalchemy.util' has no attribute 'threading'