import geopandas as gpd
import zipfile
# Read the large GeoJSON file
gdf = gpd.read_file("Traffic_Collisions.geojson")

# Step 2: Drop unnecessary columns
gdf = gdf.drop(columns=['geometry', 'OCC_DATE', '_id'])

# Step 3: Convert OCC_YEAR and OCC_HOUR to integers
gdf['OCC_YEAR'] = gdf['OCC_YEAR'].astype(int)
gdf['OCC_HOUR'] = gdf['OCC_HOUR'].astype(int)

# Step 4: Convert LONG_WGS84 and LAT_WGS84 to float
gdf['LONG_WGS84'] = gdf['LONG_WGS84'].astype(float)
gdf['LAT_WGS84'] = gdf['LAT_WGS84'].astype(float)

# Step 5: Convert YES/NO to 1/0 for vehicle and pedestrian involvement
columns_to_convert = ['AUTOMOBILE', 'MOTORCYCLE', 'PASSENGER', 'BICYCLE', 'PEDESTRIAN']
gdf[columns_to_convert] = gdf[columns_to_convert].applymap(lambda x: 1 if x == 'YES' else 0)

gdf = gdf[gdf['LONG_WGS84'] != 0].reset_index(drop=True)

gdf

for year in range(2014, 2025):
    yearly_df = gdf[gdf['OCC_YEAR'] == year]
    filename = f"Traffic_Collisions_{year}.csv"
    yearly_df.to_csv(filename, index=False)


import zipfile
# Save the CSV file temporarily
csv_filename = "Traffic_Collisions_All.csv"
zip_filename = "Traffic_Collisions_All.zip"

gdf.to_csv(csv_filename, index=False)

# Create a ZIP archive and add the CSV file to it
with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
    zipf.write(csv_filename)

print("Saved as ZIP successfully.")
