In [1]:
import pandas as pd
import numpy as np
import os
import re

In [2]:

# --- PATH CONFIGURATION ---
BASE_PATH = "/Users/beratzengin/Desktop/Github/EcoAir SmartCity Predictor/Data"
TRAFFIC_SOURCE_DIR = os.path.join(BASE_PATH, "--traffic")
STATIONS_CSV = os.path.join(BASE_PATH, "stations_info.csv")

In [None]:

def parse_coords(location_str):
    match = re.search(r'\((.*?)\)', location_str)
    if match:
        coords = match.group(1).split(' ')
        return float(coords[1]), float(coords[0]) # Returns (lat, lon)
    return None, None

In [4]:

# Load station metadata
stations_df = pd.read_csv(STATIONS_CSV)

# List all monthly traffic files (e.g., january_01.csv)
traffic_files = sorted([f for f in os.listdir(TRAFFIC_SOURCE_DIR) if f.endswith('.csv')])

In [5]:

for t_file in traffic_files:
    print(f"ğŸšœ Processing Source File: {t_file}")
    file_path = os.path.join(TRAFFIC_SOURCE_DIR, t_file)
    
    # Load the big monthly dataset
    df_month = pd.read_csv(file_path)
    df_month.columns = [c.upper() for c in df_month.columns] # Standardize headers
    
    # Extract month number from filename (assuming format like 'january_01.csv')
    month_id = re.search(r'(\d+)', t_file).group(1)
    
    for _, s_row in stations_df.iterrows():
        s_name = s_row['Name'].replace(" ", "_").replace("/", "-")
        s_lat, s_lon = parse_coords(s_row['Location'])
        target_dir = os.path.join(BASE_PATH, s_name)
        
        # Geofencing: Filter sensors within ~2km radius (approx 0.02 degrees)
        local_mask = (np.abs(df_month['LATITUDE'] - s_lat) < 0.02) & \
                     (np.abs(df_month['LONGITUDE'] - s_lon) < 0.02)
        
        local_traffic = df_month[local_mask].copy()
        
        if not local_traffic.empty:
            # 1. Convert to datetime
            local_traffic['DATE_TIME'] = pd.to_datetime(local_traffic['DATE_TIME'])
            
            # 2. Hourly Downsampling/Aggregation
            # Average speed and total number of vehicles per hour
            hourly_traffic = local_traffic.groupby(local_traffic['DATE_TIME'].dt.floor('h')).agg({
                'AVERAGE_SPEED': 'mean',
                'NUMBER_OF_VEHICLES': 'sum'
            }).reset_index()
            
            # Rename columns for clarity
            hourly_traffic.columns = ['date', 'avg_speed', 'total_vehicles']
            
            # 3. Save as localized monthly file
            output_path = os.path.join(target_dir, f"traffic_{month_id}_2024.csv")
            hourly_traffic.to_csv(output_path, index=False)
            
            print(f"{s_name}: Saved {len(hourly_traffic)} hours for month {month_id}")
        else:
            print(f"{s_name}: No traffic sensors found nearby in {t_file}")


ğŸšœ Processing Source File: april_04.csv
Maslak: Saved 684 hours for month 04
Esenler: Saved 684 hours for month 04
Yenibosna: Saved 691 hours for month 04
BeylikdÃ¼zÃ¼: No traffic sensors found nearby in april_04.csv
Ãœmraniye_1: Saved 701 hours for month 04
Aksaray: Saved 681 hours for month 04
Mobil: Saved 683 hours for month 04
BeÅŸiktaÅŸ: Saved 685 hours for month 04
KadÄ±kÃ¶y: Saved 683 hours for month 04
Sultangazi_1: Saved 686 hours for month 04
AvcÄ±lar: Saved 686 hours for month 04
Sultangazi_3: Saved 684 hours for month 04
ÃœskÃ¼dar_1: Saved 689 hours for month 04
AlibeykÃ¶y: Saved 683 hours for month 04
Selimiye: Saved 689 hours for month 04
D-100_: Saved 686 hours for month 04
KaÄŸÄ±thane_1: Saved 685 hours for month 04
Kandilli_1: Saved 681 hours for month 04
Kartal: Saved 689 hours for month 04
Ã‡atladÄ±kapÄ±: Saved 681 hours for month 04
Sultangazi_2: Saved 687 hours for month 04
BaÄŸcÄ±lar: Saved 686 hours for month 04
KumkÃ¶y: Saved 674 hours for month 04
SarÄ±yer: S