In [1]:
# Fetch and get data from internet 

from pathlib import Path
import requests
from datetime import datetime

def download_one_file_of_raw_data(year: int, month: int, save_dir: str = "./data/raw/") -> Path:
    """
    Download a single month's data for the specified year.
    
    Parameters:
        year (int): The year of the data to download.
        month (int): The month of the data to download.
        save_dir (str): Directory to save the downloaded file.
        
    Returns:
        Path: The path to the saved file.
    """
    # URL template
    URL = f'https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_{year}-{month:02d}.parquet'
    response = requests.get(URL)

    # Check if the request succeeded
    if response.status_code == 200:
        # Save the file
        path = Path(save_dir) / f"yellow_tripdata_{year}-{month:02d}.parquet"
        path.parent.mkdir(parents=True, exist_ok=True)  # Create directories if needed
        with open(path, "wb") as file:
            file.write(response.content)
        print(f"Downloaded: {path}")
        return path
    else:
        print(f"File not found for {year}-{month:02d}: {URL}")
        return None

def download_full_dataset(start_year: int, start_month: int, end_year: int, end_month: int, save_dir: str = "./data/raw/"):
    """
    Download the full dataset from the specified start year/month to end year/month.
    
    Parameters:
        start_year (int): The starting year.
        start_month (int): The starting month.
        end_year (int): The ending year.
        end_month (int): The ending month.
        save_dir (str): Directory to save the downloaded files.
    """
    for year in range(start_year, end_year + 1):
        for month in range(1, 13):
            # Stop if the year and month exceed the range
            if year == end_year and month > end_month:
                break
            # Download the file
            try:
                download_one_file_of_raw_data(year, month, save_dir)
            except Exception as e:
                print(f"Error downloading {year}-{month:02d}: {e}")

# Example usage: Download all data from January 2009 to September 2024
download_full_dataset(start_year=2009, start_month=1, end_year=2024, end_month=9)


Downloaded: data\raw\yellow_tripdata_2009-01.parquet
Downloaded: data\raw\yellow_tripdata_2009-02.parquet
Downloaded: data\raw\yellow_tripdata_2009-03.parquet
Downloaded: data\raw\yellow_tripdata_2009-04.parquet
Downloaded: data\raw\yellow_tripdata_2009-05.parquet
Downloaded: data\raw\yellow_tripdata_2009-06.parquet
Downloaded: data\raw\yellow_tripdata_2009-07.parquet
Downloaded: data\raw\yellow_tripdata_2009-08.parquet
Downloaded: data\raw\yellow_tripdata_2009-09.parquet
Downloaded: data\raw\yellow_tripdata_2009-10.parquet
Downloaded: data\raw\yellow_tripdata_2009-11.parquet
Downloaded: data\raw\yellow_tripdata_2009-12.parquet
Downloaded: data\raw\yellow_tripdata_2010-01.parquet
Downloaded: data\raw\yellow_tripdata_2010-02.parquet
Downloaded: data\raw\yellow_tripdata_2010-03.parquet
Downloaded: data\raw\yellow_tripdata_2010-04.parquet
Downloaded: data\raw\yellow_tripdata_2010-05.parquet
Downloaded: data\raw\yellow_tripdata_2010-06.parquet
Downloaded: data\raw\yellow_tripdata_2010-07.p

In [2]:
## Checking the first file to see what we really have and from there continue with the cleaning process for the rest of the files

import pandas as pd

# Correct the file path to the actual location
file_path = "C:/Users/Usuario/Documents/ML_Projects/taxi_demand_predictor/notebooks/data/raw/yellow_tripdata_2009-01.parquet"

# Load the Parquet file into a Pandas DataFrame
data = pd.read_parquet(file_path)

# Display the first few rows of the DataFrame
print("Sample Data:")
print(data.head())

# Generate descriptive statistics for the dataset
print("\nDescriptive Statistics:")
print(data.describe(include='all'))

# Provide column-wise info for further insights
print("\nColumn Info:")
data.info()


Sample Data:
  vendor_name Trip_Pickup_DateTime Trip_Dropoff_DateTime  Passenger_Count  \
0         VTS  2009-01-04 02:52:00   2009-01-04 03:02:00                1   
1         VTS  2009-01-04 03:31:00   2009-01-04 03:38:00                3   
2         VTS  2009-01-03 15:43:00   2009-01-03 15:57:00                5   
3         DDS  2009-01-01 20:52:58   2009-01-01 21:14:00                1   
4         DDS  2009-01-24 16:18:23   2009-01-24 16:24:56                1   

   Trip_Distance  Start_Lon  Start_Lat  Rate_Code  store_and_forward  \
0           2.63 -73.991957  40.721567        NaN                NaN   
1           4.55 -73.982102  40.736290        NaN                NaN   
2          10.35 -74.002587  40.739748        NaN                NaN   
3           5.00 -73.974267  40.790955        NaN                NaN   
4           0.40 -74.001580  40.719382        NaN                NaN   

     End_Lon    End_Lat Payment_Type  Fare_Amt  surcharge  mta_tax  Tip_Amt  \
0 -73.993803

In [3]:
## Trying the first version of the cleaning process to the first file. If it works we will iterate trought the rest of the files

import pandas as pd
from pathlib import Path

# Define the absolute paths
base_raw_path = Path(r"C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\raw")
base_transformed_path = Path(r"C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed")

# File to process
file_name = "yellow_tripdata_2009-01.parquet"
raw_file_path = base_raw_path / file_name
transformed_file_path = base_transformed_path / file_name

# Load the raw data
df = pd.read_parquet(raw_file_path)

# Select relevant columns
columns_to_keep = [
    "Trip_Pickup_DateTime",
    "Trip_Dropoff_DateTime",
    "Passenger_Count",
    "Trip_Distance",
    "Start_Lon",
    "Start_Lat",
    "End_Lon",
    "End_Lat",
    "Fare_Amt",
    "Total_Amt"
]
df = df[columns_to_keep]

# Convert datetime columns
df["Trip_Pickup_DateTime"] = pd.to_datetime(df["Trip_Pickup_DateTime"], errors="coerce")
df["Trip_Dropoff_DateTime"] = pd.to_datetime(df["Trip_Dropoff_DateTime"], errors="coerce")

# Filter rows based on the correct year and month (2009-01)
df = df[
    (df["Trip_Pickup_DateTime"].dt.year == 2009) & 
    (df["Trip_Pickup_DateTime"].dt.month == 1)
]

# Add derived metrics
df["trip_duration_minutes"] = (
    (df["Trip_Dropoff_DateTime"] - df["Trip_Pickup_DateTime"]).dt.total_seconds() / 60
)
df["average_speed_kmph"] = (df["Trip_Distance"] / df["trip_duration_minutes"]) * 60

# Validate and clean the data
# Remove rows with invalid or extreme values
df = df[
    (df["Passenger_Count"] > 0) & 
    (df["Trip_Distance"] > 0) &
    (df["trip_duration_minutes"] > 0) &
    (df["Start_Lon"].between(-180, 180)) &
    (df["Start_Lat"].between(-90, 90)) &
    (df["End_Lon"].between(-180, 180)) &
    (df["End_Lat"].between(-90, 90)) &
    (df["Fare_Amt"] >= 0) &
    (df["Total_Amt"] >= 0)
]

# Save the transformed data as a Parquet file
df.to_parquet(transformed_file_path, index=False)

# Display a summary of the transformed data
print(f"Transformed file saved to: {transformed_file_path}")
print("Sample Data:")
print(df.head())
print("\nDescriptive Statistics:")
print(df.describe())


Transformed file saved to: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2009-01.parquet
Sample Data:
  Trip_Pickup_DateTime Trip_Dropoff_DateTime  Passenger_Count  Trip_Distance  \
0  2009-01-04 02:52:00   2009-01-04 03:02:00                1           2.63   
1  2009-01-04 03:31:00   2009-01-04 03:38:00                3           4.55   
2  2009-01-03 15:43:00   2009-01-03 15:57:00                5          10.35   
3  2009-01-01 20:52:58   2009-01-01 21:14:00                1           5.00   
4  2009-01-24 16:18:23   2009-01-24 16:24:56                1           0.40   

   Start_Lon  Start_Lat    End_Lon    End_Lat  Fare_Amt  Total_Amt  \
0 -73.991957  40.721567 -73.993803  40.695922       8.9       9.40   
1 -73.982102  40.736290 -73.955850  40.768030      12.1      14.60   
2 -74.002587  40.739748 -73.869983  40.770225      23.7      28.44   
3 -73.974267  40.790955 -73.996558  40.731849      14.9      18.45   
4 -74.001

In [4]:
## Iteration process to transform the data in the whole set of files 

import pandas as pd
from pathlib import Path

# Define paths
base_raw_path = Path(r"C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\raw")
base_transformed_path = Path(r"C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed")

# List of expected columns
expected_columns = [
    "Trip_Pickup_DateTime",
    "Trip_Dropoff_DateTime",
    "Passenger_Count",
    "Trip_Distance",
    "Start_Lon",
    "Start_Lat",
    "End_Lon",
    "End_Lat",
    "Fare_Amt",
    "Total_Amt"
]

# Function to process a single file
def process_file(file_path):
    try:
        # Check if the transformed file already exists
        transformed_file_path = base_transformed_path / file_path.name
        if transformed_file_path.exists():
            print(f"File already processed. Skipping: {file_path.name}")
            return
        
        print(f"Processing file: {file_path.name}")
        
        # Load the raw data
        df = pd.read_parquet(file_path)
        
        # Check for missing columns
        missing_columns = [col for col in expected_columns if col not in df.columns]
        if missing_columns:
            print(f"Warning: Missing columns in {file_path.name}: {missing_columns}")
            # Add missing columns with default values (e.g., NaN)
            for col in missing_columns:
                df[col] = pd.NA
        
        # Select only the expected columns
        df = df[expected_columns]
        
        # Convert datetime columns
        df["Trip_Pickup_DateTime"] = pd.to_datetime(df["Trip_Pickup_DateTime"], errors="coerce")
        df["Trip_Dropoff_DateTime"] = pd.to_datetime(df["Trip_Dropoff_DateTime"], errors="coerce")
        
        # Filter rows for the correct year and month
        year = int(file_path.name.split('_')[-1].split('-')[0])
        month = int(file_path.name.split('_')[-1].split('-')[1].split('.')[0])
        df = df[
            (df["Trip_Pickup_DateTime"].dt.year == year) &
            (df["Trip_Pickup_DateTime"].dt.month == month)
        ]
        
        # Add derived metrics
        df["trip_duration_minutes"] = (
            (df["Trip_Dropoff_DateTime"] - df["Trip_Pickup_DateTime"]).dt.total_seconds() / 60
        )
        df["average_speed_kmph"] = (df["Trip_Distance"] / df["trip_duration_minutes"]) * 60
        
        # Validate and clean data
        df = df[
            (df["Passenger_Count"] > 0) & 
            (df["Trip_Distance"] > 0) &
            (df["trip_duration_minutes"] > 0) &
            (df["Start_Lon"].between(-180, 180)) &
            (df["Start_Lat"].between(-90, 90)) &
            (df["End_Lon"].between(-180, 180)) &
            (df["End_Lat"].between(-90, 90)) &
            (df["Fare_Amt"] >= 0) &
            (df["Total_Amt"] >= 0)
        ]
        
        # Save the transformed data
        df.to_parquet(transformed_file_path, index=False)
        print(f"Transformed file saved: {transformed_file_path}")
        
    except Exception as e:
        print(f"Error processing file {file_path.name}: {e}")

# Process all files in the raw folder
for file_path in base_raw_path.glob("*.parquet"):
    process_file(file_path)


File already processed. Skipping: yellow_tripdata_2009-01.parquet
Processing file: yellow_tripdata_2009-02.parquet
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2009-02.parquet
Processing file: yellow_tripdata_2009-03.parquet
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2009-03.parquet
Processing file: yellow_tripdata_2009-04.parquet
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2009-04.parquet
Processing file: yellow_tripdata_2009-05.parquet
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2009-05.parquet
Processing file: yellow_tripdata_2009-06.parquet
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\y

In [6]:
## Some files didn't work because some difference in the structure of the tables. taking the log output and convert it in files for further inspection and check next steps. 

import csv

# Define the log as a raw string to avoid Unicode errors with backslashes
log_text = r"""
File already processed. Skipping: yellow_tripdata_2009-01.parquet
Processing file: yellow_tripdata_2009-02.parquet
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2009-02.parquet
Processing file: yellow_tripdata_2009-03.parquet
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2009-03.parquet
Processing file: yellow_tripdata_2009-04.parquet
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2009-04.parquet
Processing file: yellow_tripdata_2009-05.parquet
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2009-05.parquet
Processing file: yellow_tripdata_2009-06.parquet
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2009-06.parquet
Processing file: yellow_tripdata_2009-07.parquet
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2009-07.parquet
Processing file: yellow_tripdata_2009-08.parquet
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2009-08.parquet
Processing file: yellow_tripdata_2009-09.parquet
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2009-09.parquet
Processing file: yellow_tripdata_2009-10.parquet
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2009-10.parquet
Processing file: yellow_tripdata_2009-11.parquet
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2009-11.parquet
Processing file: yellow_tripdata_2009-12.parquet
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2009-12.parquet
Processing file: yellow_tripdata_2010-01.parquet
Warning: Missing columns in yellow_tripdata_2010-01.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2010-01.parquet
Processing file: yellow_tripdata_2010-02.parquet
Warning: Missing columns in yellow_tripdata_2010-02.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2010-02.parquet
Processing file: yellow_tripdata_2010-03.parquet
Warning: Missing columns in yellow_tripdata_2010-03.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2010-03.parquet
Processing file: yellow_tripdata_2010-04.parquet
Warning: Missing columns in yellow_tripdata_2010-04.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2010-04.parquet
Processing file: yellow_tripdata_2010-05.parquet
Warning: Missing columns in yellow_tripdata_2010-05.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2010-05.parquet
Processing file: yellow_tripdata_2010-06.parquet
Warning: Missing columns in yellow_tripdata_2010-06.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2010-06.parquet
Processing file: yellow_tripdata_2010-07.parquet
Warning: Missing columns in yellow_tripdata_2010-07.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2010-07.parquet
Processing file: yellow_tripdata_2010-08.parquet
Warning: Missing columns in yellow_tripdata_2010-08.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2010-08.parquet
Processing file: yellow_tripdata_2010-09.parquet
Warning: Missing columns in yellow_tripdata_2010-09.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2010-09.parquet
Processing file: yellow_tripdata_2010-10.parquet
Warning: Missing columns in yellow_tripdata_2010-10.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2010-10.parquet
Processing file: yellow_tripdata_2010-11.parquet
Warning: Missing columns in yellow_tripdata_2010-11.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2010-11.parquet
Processing file: yellow_tripdata_2010-12.parquet
Warning: Missing columns in yellow_tripdata_2010-12.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2010-12.parquet
Processing file: yellow_tripdata_2011-01.parquet
Warning: Missing columns in yellow_tripdata_2011-01.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2011-01.parquet
Processing file: yellow_tripdata_2011-02.parquet
Warning: Missing columns in yellow_tripdata_2011-02.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2011-02.parquet
Processing file: yellow_tripdata_2011-03.parquet
Warning: Missing columns in yellow_tripdata_2011-03.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2011-03.parquet
Processing file: yellow_tripdata_2011-04.parquet
Warning: Missing columns in yellow_tripdata_2011-04.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2011-04.parquet
Processing file: yellow_tripdata_2011-05.parquet
Warning: Missing columns in yellow_tripdata_2011-05.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2011-05.parquet
Processing file: yellow_tripdata_2011-06.parquet
Warning: Missing columns in yellow_tripdata_2011-06.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2011-06.parquet
Processing file: yellow_tripdata_2011-07.parquet
Warning: Missing columns in yellow_tripdata_2011-07.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2011-07.parquet
Processing file: yellow_tripdata_2011-08.parquet
Warning: Missing columns in yellow_tripdata_2011-08.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2011-08.parquet
Processing file: yellow_tripdata_2011-09.parquet
Warning: Missing columns in yellow_tripdata_2011-09.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2011-09.parquet
Processing file: yellow_tripdata_2011-10.parquet
Warning: Missing columns in yellow_tripdata_2011-10.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2011-10.parquet
Processing file: yellow_tripdata_2011-11.parquet
Warning: Missing columns in yellow_tripdata_2011-11.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2011-11.parquet
Processing file: yellow_tripdata_2011-12.parquet
Warning: Missing columns in yellow_tripdata_2011-12.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2011-12.parquet
Processing file: yellow_tripdata_2012-01.parquet
Warning: Missing columns in yellow_tripdata_2012-01.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2012-01.parquet
Processing file: yellow_tripdata_2012-02.parquet
Warning: Missing columns in yellow_tripdata_2012-02.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2012-02.parquet
Processing file: yellow_tripdata_2012-03.parquet
Warning: Missing columns in yellow_tripdata_2012-03.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2012-03.parquet
Processing file: yellow_tripdata_2012-04.parquet
Warning: Missing columns in yellow_tripdata_2012-04.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2012-04.parquet
Processing file: yellow_tripdata_2012-05.parquet
Warning: Missing columns in yellow_tripdata_2012-05.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2012-05.parquet
Processing file: yellow_tripdata_2012-06.parquet
Warning: Missing columns in yellow_tripdata_2012-06.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2012-06.parquet
Processing file: yellow_tripdata_2012-07.parquet
Warning: Missing columns in yellow_tripdata_2012-07.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2012-07.parquet
Processing file: yellow_tripdata_2012-08.parquet
Warning: Missing columns in yellow_tripdata_2012-08.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2012-08.parquet
Processing file: yellow_tripdata_2012-09.parquet
Warning: Missing columns in yellow_tripdata_2012-09.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2012-09.parquet
Processing file: yellow_tripdata_2012-10.parquet
Warning: Missing columns in yellow_tripdata_2012-10.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2012-10.parquet
Processing file: yellow_tripdata_2012-11.parquet
Warning: Missing columns in yellow_tripdata_2012-11.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2012-11.parquet
Processing file: yellow_tripdata_2012-12.parquet
Warning: Missing columns in yellow_tripdata_2012-12.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2012-12.parquet
Processing file: yellow_tripdata_2013-01.parquet
Warning: Missing columns in yellow_tripdata_2013-01.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2013-01.parquet
Processing file: yellow_tripdata_2013-02.parquet
Warning: Missing columns in yellow_tripdata_2013-02.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2013-02.parquet
Processing file: yellow_tripdata_2013-03.parquet
Warning: Missing columns in yellow_tripdata_2013-03.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2013-03.parquet
Processing file: yellow_tripdata_2013-04.parquet
Warning: Missing columns in yellow_tripdata_2013-04.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2013-04.parquet
Processing file: yellow_tripdata_2013-05.parquet
Warning: Missing columns in yellow_tripdata_2013-05.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2013-05.parquet
Processing file: yellow_tripdata_2013-06.parquet
Warning: Missing columns in yellow_tripdata_2013-06.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2013-06.parquet
Processing file: yellow_tripdata_2013-07.parquet
Warning: Missing columns in yellow_tripdata_2013-07.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2013-07.parquet
Processing file: yellow_tripdata_2013-08.parquet
Warning: Missing columns in yellow_tripdata_2013-08.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2013-08.parquet
Processing file: yellow_tripdata_2013-09.parquet
Warning: Missing columns in yellow_tripdata_2013-09.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2013-09.parquet
Processing file: yellow_tripdata_2013-10.parquet
Warning: Missing columns in yellow_tripdata_2013-10.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2013-10.parquet
Processing file: yellow_tripdata_2013-11.parquet
Warning: Missing columns in yellow_tripdata_2013-11.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2013-11.parquet
Processing file: yellow_tripdata_2013-12.parquet
Warning: Missing columns in yellow_tripdata_2013-12.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2013-12.parquet
Processing file: yellow_tripdata_2014-01.parquet
Warning: Missing columns in yellow_tripdata_2014-01.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2014-01.parquet
Processing file: yellow_tripdata_2014-02.parquet
Warning: Missing columns in yellow_tripdata_2014-02.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2014-02.parquet
Processing file: yellow_tripdata_2014-03.parquet
Warning: Missing columns in yellow_tripdata_2014-03.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2014-03.parquet
Processing file: yellow_tripdata_2014-04.parquet
Warning: Missing columns in yellow_tripdata_2014-04.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2014-04.parquet
Processing file: yellow_tripdata_2014-05.parquet
Warning: Missing columns in yellow_tripdata_2014-05.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2014-05.parquet
Processing file: yellow_tripdata_2014-06.parquet
Warning: Missing columns in yellow_tripdata_2014-06.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2014-06.parquet
Processing file: yellow_tripdata_2014-07.parquet
Warning: Missing columns in yellow_tripdata_2014-07.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2014-07.parquet
Processing file: yellow_tripdata_2014-08.parquet
Warning: Missing columns in yellow_tripdata_2014-08.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2014-08.parquet
Processing file: yellow_tripdata_2014-09.parquet
Warning: Missing columns in yellow_tripdata_2014-09.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2014-09.parquet
Processing file: yellow_tripdata_2014-10.parquet
Warning: Missing columns in yellow_tripdata_2014-10.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2014-10.parquet
Processing file: yellow_tripdata_2014-11.parquet
Warning: Missing columns in yellow_tripdata_2014-11.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2014-11.parquet
Processing file: yellow_tripdata_2014-12.parquet
Warning: Missing columns in yellow_tripdata_2014-12.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2014-12.parquet
Processing file: yellow_tripdata_2015-01.parquet
Warning: Missing columns in yellow_tripdata_2015-01.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2015-01.parquet
Processing file: yellow_tripdata_2015-02.parquet
Warning: Missing columns in yellow_tripdata_2015-02.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2015-02.parquet
Processing file: yellow_tripdata_2015-03.parquet
Warning: Missing columns in yellow_tripdata_2015-03.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2015-03.parquet
Processing file: yellow_tripdata_2015-04.parquet
Warning: Missing columns in yellow_tripdata_2015-04.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2015-04.parquet
Processing file: yellow_tripdata_2015-05.parquet
Warning: Missing columns in yellow_tripdata_2015-05.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2015-05.parquet
Processing file: yellow_tripdata_2015-06.parquet
Warning: Missing columns in yellow_tripdata_2015-06.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2015-06.parquet
Processing file: yellow_tripdata_2015-07.parquet
Warning: Missing columns in yellow_tripdata_2015-07.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2015-07.parquet
Processing file: yellow_tripdata_2015-08.parquet
Warning: Missing columns in yellow_tripdata_2015-08.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2015-08.parquet
Processing file: yellow_tripdata_2015-09.parquet
Warning: Missing columns in yellow_tripdata_2015-09.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2015-09.parquet
Processing file: yellow_tripdata_2015-10.parquet
Warning: Missing columns in yellow_tripdata_2015-10.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2015-10.parquet
Processing file: yellow_tripdata_2015-11.parquet
Warning: Missing columns in yellow_tripdata_2015-11.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2015-11.parquet
Processing file: yellow_tripdata_2015-12.parquet
Warning: Missing columns in yellow_tripdata_2015-12.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2015-12.parquet
Processing file: yellow_tripdata_2016-01.parquet
Warning: Missing columns in yellow_tripdata_2016-01.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2016-01.parquet
Processing file: yellow_tripdata_2016-02.parquet
Warning: Missing columns in yellow_tripdata_2016-02.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2016-02.parquet
Processing file: yellow_tripdata_2016-03.parquet
Warning: Missing columns in yellow_tripdata_2016-03.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2016-03.parquet
Processing file: yellow_tripdata_2016-04.parquet
Warning: Missing columns in yellow_tripdata_2016-04.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2016-04.parquet
Processing file: yellow_tripdata_2016-05.parquet
Warning: Missing columns in yellow_tripdata_2016-05.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2016-05.parquet
Processing file: yellow_tripdata_2016-06.parquet
Warning: Missing columns in yellow_tripdata_2016-06.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2016-06.parquet
Processing file: yellow_tripdata_2016-07.parquet
Warning: Missing columns in yellow_tripdata_2016-07.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2016-07.parquet
Processing file: yellow_tripdata_2016-08.parquet
Warning: Missing columns in yellow_tripdata_2016-08.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2016-08.parquet
Processing file: yellow_tripdata_2016-09.parquet
Warning: Missing columns in yellow_tripdata_2016-09.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2016-09.parquet
Processing file: yellow_tripdata_2016-10.parquet
Warning: Missing columns in yellow_tripdata_2016-10.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2016-10.parquet
Processing file: yellow_tripdata_2016-11.parquet
Warning: Missing columns in yellow_tripdata_2016-11.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2016-11.parquet
Processing file: yellow_tripdata_2016-12.parquet
Warning: Missing columns in yellow_tripdata_2016-12.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2016-12.parquet
Processing file: yellow_tripdata_2017-01.parquet
Warning: Missing columns in yellow_tripdata_2017-01.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2017-01.parquet
Processing file: yellow_tripdata_2017-02.parquet
Warning: Missing columns in yellow_tripdata_2017-02.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2017-02.parquet
Processing file: yellow_tripdata_2017-03.parquet
Warning: Missing columns in yellow_tripdata_2017-03.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2017-03.parquet
Processing file: yellow_tripdata_2017-04.parquet
Warning: Missing columns in yellow_tripdata_2017-04.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2017-04.parquet
Processing file: yellow_tripdata_2017-05.parquet
Warning: Missing columns in yellow_tripdata_2017-05.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2017-05.parquet
Processing file: yellow_tripdata_2017-06.parquet
Warning: Missing columns in yellow_tripdata_2017-06.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2017-06.parquet
Processing file: yellow_tripdata_2017-07.parquet
Warning: Missing columns in yellow_tripdata_2017-07.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2017-07.parquet
Processing file: yellow_tripdata_2017-08.parquet
Warning: Missing columns in yellow_tripdata_2017-08.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2017-08.parquet
Processing file: yellow_tripdata_2017-09.parquet
Warning: Missing columns in yellow_tripdata_2017-09.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2017-09.parquet
Processing file: yellow_tripdata_2017-10.parquet
Warning: Missing columns in yellow_tripdata_2017-10.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2017-10.parquet
Processing file: yellow_tripdata_2017-11.parquet
Warning: Missing columns in yellow_tripdata_2017-11.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2017-11.parquet
Processing file: yellow_tripdata_2017-12.parquet
Warning: Missing columns in yellow_tripdata_2017-12.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2017-12.parquet
Processing file: yellow_tripdata_2018-01.parquet
Warning: Missing columns in yellow_tripdata_2018-01.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2018-01.parquet
Processing file: yellow_tripdata_2018-02.parquet
Warning: Missing columns in yellow_tripdata_2018-02.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2018-02.parquet
Processing file: yellow_tripdata_2018-03.parquet
Warning: Missing columns in yellow_tripdata_2018-03.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2018-03.parquet
Processing file: yellow_tripdata_2018-04.parquet
Warning: Missing columns in yellow_tripdata_2018-04.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2018-04.parquet
Processing file: yellow_tripdata_2018-05.parquet
Warning: Missing columns in yellow_tripdata_2018-05.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2018-05.parquet
Processing file: yellow_tripdata_2018-06.parquet
Warning: Missing columns in yellow_tripdata_2018-06.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2018-06.parquet
Processing file: yellow_tripdata_2018-07.parquet
Warning: Missing columns in yellow_tripdata_2018-07.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2018-07.parquet
Processing file: yellow_tripdata_2018-08.parquet
Warning: Missing columns in yellow_tripdata_2018-08.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2018-08.parquet
Processing file: yellow_tripdata_2018-09.parquet
Warning: Missing columns in yellow_tripdata_2018-09.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2018-09.parquet
Processing file: yellow_tripdata_2018-10.parquet
Warning: Missing columns in yellow_tripdata_2018-10.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2018-10.parquet
Processing file: yellow_tripdata_2018-11.parquet
Warning: Missing columns in yellow_tripdata_2018-11.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2018-11.parquet
Processing file: yellow_tripdata_2018-12.parquet
Warning: Missing columns in yellow_tripdata_2018-12.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2018-12.parquet
Processing file: yellow_tripdata_2019-01.parquet
Warning: Missing columns in yellow_tripdata_2019-01.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2019-01.parquet
Processing file: yellow_tripdata_2019-02.parquet
Warning: Missing columns in yellow_tripdata_2019-02.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2019-02.parquet
Processing file: yellow_tripdata_2019-03.parquet
Warning: Missing columns in yellow_tripdata_2019-03.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2019-03.parquet
Processing file: yellow_tripdata_2019-04.parquet
Warning: Missing columns in yellow_tripdata_2019-04.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2019-04.parquet
Processing file: yellow_tripdata_2019-05.parquet
Warning: Missing columns in yellow_tripdata_2019-05.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2019-05.parquet
Processing file: yellow_tripdata_2019-06.parquet
Warning: Missing columns in yellow_tripdata_2019-06.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2019-06.parquet
Processing file: yellow_tripdata_2019-07.parquet
Warning: Missing columns in yellow_tripdata_2019-07.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2019-07.parquet
Processing file: yellow_tripdata_2019-08.parquet
Warning: Missing columns in yellow_tripdata_2019-08.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2019-08.parquet
Processing file: yellow_tripdata_2019-09.parquet
Warning: Missing columns in yellow_tripdata_2019-09.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2019-09.parquet
Processing file: yellow_tripdata_2019-10.parquet
Warning: Missing columns in yellow_tripdata_2019-10.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2019-10.parquet
Processing file: yellow_tripdata_2019-11.parquet
Warning: Missing columns in yellow_tripdata_2019-11.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2019-11.parquet
Processing file: yellow_tripdata_2019-12.parquet
Warning: Missing columns in yellow_tripdata_2019-12.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2019-12.parquet
Processing file: yellow_tripdata_2020-01.parquet
Warning: Missing columns in yellow_tripdata_2020-01.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2020-01.parquet
Processing file: yellow_tripdata_2020-02.parquet
Warning: Missing columns in yellow_tripdata_2020-02.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2020-02.parquet
Processing file: yellow_tripdata_2020-03.parquet
Warning: Missing columns in yellow_tripdata_2020-03.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2020-03.parquet
Processing file: yellow_tripdata_2020-04.parquet
Warning: Missing columns in yellow_tripdata_2020-04.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2020-04.parquet
Processing file: yellow_tripdata_2020-05.parquet
Warning: Missing columns in yellow_tripdata_2020-05.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2020-05.parquet
Processing file: yellow_tripdata_2020-06.parquet
Warning: Missing columns in yellow_tripdata_2020-06.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2020-06.parquet
Processing file: yellow_tripdata_2020-07.parquet
Warning: Missing columns in yellow_tripdata_2020-07.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2020-07.parquet
Processing file: yellow_tripdata_2020-08.parquet
Warning: Missing columns in yellow_tripdata_2020-08.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2020-08.parquet
Processing file: yellow_tripdata_2020-09.parquet
Warning: Missing columns in yellow_tripdata_2020-09.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2020-09.parquet
Processing file: yellow_tripdata_2020-10.parquet
Warning: Missing columns in yellow_tripdata_2020-10.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2020-10.parquet
Processing file: yellow_tripdata_2020-11.parquet
Warning: Missing columns in yellow_tripdata_2020-11.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2020-11.parquet
Processing file: yellow_tripdata_2020-12.parquet
Warning: Missing columns in yellow_tripdata_2020-12.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2020-12.parquet
Processing file: yellow_tripdata_2021-01.parquet
Warning: Missing columns in yellow_tripdata_2021-01.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2021-01.parquet
Processing file: yellow_tripdata_2021-02.parquet
Warning: Missing columns in yellow_tripdata_2021-02.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2021-02.parquet
Processing file: yellow_tripdata_2021-03.parquet
Warning: Missing columns in yellow_tripdata_2021-03.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2021-03.parquet
Processing file: yellow_tripdata_2021-04.parquet
Warning: Missing columns in yellow_tripdata_2021-04.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2021-04.parquet
Processing file: yellow_tripdata_2021-05.parquet
Warning: Missing columns in yellow_tripdata_2021-05.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2021-05.parquet
Processing file: yellow_tripdata_2021-06.parquet
Warning: Missing columns in yellow_tripdata_2021-06.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2021-06.parquet
Processing file: yellow_tripdata_2021-07.parquet
Warning: Missing columns in yellow_tripdata_2021-07.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2021-07.parquet
Processing file: yellow_tripdata_2021-08.parquet
Warning: Missing columns in yellow_tripdata_2021-08.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2021-08.parquet
Processing file: yellow_tripdata_2021-09.parquet
Warning: Missing columns in yellow_tripdata_2021-09.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2021-09.parquet
Processing file: yellow_tripdata_2021-10.parquet
Warning: Missing columns in yellow_tripdata_2021-10.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2021-10.parquet
Processing file: yellow_tripdata_2021-11.parquet
Warning: Missing columns in yellow_tripdata_2021-11.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2021-11.parquet
Processing file: yellow_tripdata_2021-12.parquet
Warning: Missing columns in yellow_tripdata_2021-12.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2021-12.parquet
Processing file: yellow_tripdata_2022-01.parquet
Warning: Missing columns in yellow_tripdata_2022-01.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2022-01.parquet
Processing file: yellow_tripdata_2022-02.parquet
Warning: Missing columns in yellow_tripdata_2022-02.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2022-02.parquet
Processing file: yellow_tripdata_2022-03.parquet
Warning: Missing columns in yellow_tripdata_2022-03.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2022-03.parquet
Processing file: yellow_tripdata_2022-04.parquet
Warning: Missing columns in yellow_tripdata_2022-04.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2022-04.parquet
Processing file: yellow_tripdata_2022-05.parquet
Warning: Missing columns in yellow_tripdata_2022-05.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2022-05.parquet
Processing file: yellow_tripdata_2022-06.parquet
Warning: Missing columns in yellow_tripdata_2022-06.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2022-06.parquet
Processing file: yellow_tripdata_2022-07.parquet
Warning: Missing columns in yellow_tripdata_2022-07.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2022-07.parquet
Processing file: yellow_tripdata_2022-08.parquet
Warning: Missing columns in yellow_tripdata_2022-08.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2022-08.parquet
Processing file: yellow_tripdata_2022-09.parquet
Warning: Missing columns in yellow_tripdata_2022-09.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2022-09.parquet
Processing file: yellow_tripdata_2022-10.parquet
Warning: Missing columns in yellow_tripdata_2022-10.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2022-10.parquet
Processing file: yellow_tripdata_2022-11.parquet
Warning: Missing columns in yellow_tripdata_2022-11.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2022-11.parquet
Processing file: yellow_tripdata_2022-12.parquet
Warning: Missing columns in yellow_tripdata_2022-12.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2022-12.parquet
Processing file: yellow_tripdata_2023-01.parquet
Warning: Missing columns in yellow_tripdata_2023-01.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2023-01.parquet
Processing file: yellow_tripdata_2023-02.parquet
Warning: Missing columns in yellow_tripdata_2023-02.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2023-02.parquet
Processing file: yellow_tripdata_2023-03.parquet
Warning: Missing columns in yellow_tripdata_2023-03.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2023-03.parquet
Processing file: yellow_tripdata_2023-04.parquet
Warning: Missing columns in yellow_tripdata_2023-04.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2023-04.parquet
Processing file: yellow_tripdata_2023-05.parquet
Warning: Missing columns in yellow_tripdata_2023-05.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2023-05.parquet
Processing file: yellow_tripdata_2023-06.parquet
Warning: Missing columns in yellow_tripdata_2023-06.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2023-06.parquet
Processing file: yellow_tripdata_2023-07.parquet
Warning: Missing columns in yellow_tripdata_2023-07.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2023-07.parquet
Processing file: yellow_tripdata_2023-08.parquet
Warning: Missing columns in yellow_tripdata_2023-08.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2023-08.parquet
Processing file: yellow_tripdata_2023-09.parquet
Warning: Missing columns in yellow_tripdata_2023-09.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2023-09.parquet
Processing file: yellow_tripdata_2023-10.parquet
Warning: Missing columns in yellow_tripdata_2023-10.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2023-10.parquet
Processing file: yellow_tripdata_2023-11.parquet
Warning: Missing columns in yellow_tripdata_2023-11.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2023-11.parquet
Processing file: yellow_tripdata_2023-12.parquet
Warning: Missing columns in yellow_tripdata_2023-12.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2023-12.parquet
Processing file: yellow_tripdata_2024-01.parquet
Warning: Missing columns in yellow_tripdata_2024-01.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2024-01.parquet
Processing file: yellow_tripdata_2024-02.parquet
Warning: Missing columns in yellow_tripdata_2024-02.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2024-02.parquet
Processing file: yellow_tripdata_2024-03.parquet
Warning: Missing columns in yellow_tripdata_2024-03.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2024-03.parquet
Processing file: yellow_tripdata_2024-04.parquet
Warning: Missing columns in yellow_tripdata_2024-04.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2024-04.parquet
Processing file: yellow_tripdata_2024-05.parquet
Warning: Missing columns in yellow_tripdata_2024-05.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2024-05.parquet
Processing file: yellow_tripdata_2024-06.parquet
Warning: Missing columns in yellow_tripdata_2024-06.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2024-06.parquet
Processing file: yellow_tripdata_2024-07.parquet
Warning: Missing columns in yellow_tripdata_2024-07.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2024-07.parquet
Processing file: yellow_tripdata_2024-08.parquet
Warning: Missing columns in yellow_tripdata_2024-08.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2024-08.parquet
Processing file: yellow_tripdata_2024-09.parquet
Warning: Missing columns in yellow_tripdata_2024-09.parquet: ['Trip_Pickup_DateTime', 'Trip_Dropoff_DateTime', 'Passenger_Count', 'Trip_Distance', 'Start_Lon', 'Start_Lat', 'End_Lon', 'End_Lat', 'Fare_Amt', 'Total_Amt']
Transformed file saved: C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2024-09.parquet

"""

# Split the log into lines and process
log_lines = log_text.strip().split("\n")

# Initialize lists to capture files that are fine and those needing further inspection
files_ready = []
files_inspect = []

# Iterate through each line in the log
for line in log_lines:
    if "Transformed file saved:" in line:
        # Extract the file path
        file_path = line.split(":")[-1].strip()
        files_ready.append(file_path)
    elif "Warning: Missing columns" in line:
        # Extract the file name from the warning line
        file_name = line.split("Warning: Missing columns in ")[-1].split(":")[0].strip()
        files_inspect.append(file_name)

# Save the "Files Ready to Go" list to a CSV file
with open("files_ready.csv", mode="w", newline="", encoding="utf-8") as ready_file:
    writer = csv.writer(ready_file)
    writer.writerow(["Files Ready to Go"])
    for file in files_ready:
        writer.writerow([file])

# Save the "Files Requiring Further Inspection" list to a CSV file
with open("files_inspect.csv", mode="w", newline="", encoding="utf-8") as inspect_file:
    writer = csv.writer(inspect_file)
    writer.writerow(["Files Requiring Further Inspection"])
    for file in files_inspect:
        writer.writerow([file])

# Print confirmation
print("Files processed and saved as 'files_ready.csv' and 'files_inspect.csv'.")


Files processed and saved as 'files_ready.csv' and 'files_inspect.csv'.


In [8]:
# Checking what are the differences to proceed with a different structure and logic to correct all the files. 

import os
import pyarrow.parquet as pq

# Define folder paths
raw_data_folder = "C:/Users/Usuario/Documents/ML_Projects/taxi_demand_predictor/notebooks/data/raw"

# File paths for inspection
file_to_inspect = os.path.join(raw_data_folder, "yellow_tripdata_2010-01.parquet")
sample_success_file = os.path.join(raw_data_folder, "yellow_tripdata_2009-01.parquet")

def inspect_parquet(file_path):
    """
    Function to inspect the structure of a parquet file.
    It will return column names and the first few rows.
    """
    try:
        # Load the parquet file
        table = pq.read_table(file_path)
        df = table.to_pandas()
        
        # Print basic details
        print(f"Structure of file: {file_path}")
        print(f"Number of rows: {df.shape[0]}, Number of columns: {df.shape[1]}")
        print("Column names:")
        print(df.columns.tolist())
        print("\nSample rows:")
        print(df.head())
        
        return set(df.columns)
    except Exception as e:
        print(f"Error reading file {file_path}: {e}")
        return None

# Inspect the file with issues
columns_inspect = inspect_parquet(file_to_inspect)

# Inspect a successfully processed file
columns_success = inspect_parquet(sample_success_file)

# Compare columns
if columns_inspect and columns_success:
    print("\nColumns in successful file but missing in inspected file:")
    print(columns_success - columns_inspect)
    print("\nColumns in inspected file but not in successful file:")
    print(columns_inspect - columns_success)


Structure of file: C:/Users/Usuario/Documents/ML_Projects/taxi_demand_predictor/notebooks/data/raw\yellow_tripdata_2010-01.parquet
Number of rows: 14863778, Number of columns: 18
Column names:
['vendor_id', 'pickup_datetime', 'dropoff_datetime', 'passenger_count', 'trip_distance', 'pickup_longitude', 'pickup_latitude', 'rate_code', 'store_and_fwd_flag', 'dropoff_longitude', 'dropoff_latitude', 'payment_type', 'fare_amount', 'surcharge', 'mta_tax', 'tip_amount', 'tolls_amount', 'total_amount']

Sample rows:
  vendor_id      pickup_datetime     dropoff_datetime  passenger_count  \
0       VTS  2010-01-26 07:41:00  2010-01-26 07:45:00                1   
1       DDS  2010-01-30 23:31:00  2010-01-30 23:46:12                1   
2       DDS  2010-01-18 20:22:20  2010-01-18 20:38:12                1   
3       VTS  2010-01-09 01:18:00  2010-01-09 01:35:00                2   
4       CMT  2010-01-18 19:10:14  2010-01-18 19:17:07                1   

   trip_distance  pickup_longitude  pickup_

In [9]:
# checking if those files that we couldnt process share the same structure or vary.

import os
import pandas as pd
import pyarrow.parquet as pq

# Paths
files_inspect_path = r"C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\files_inspect.csv"
raw_folder_path = r"C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\raw"
output_log_path = r"C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\structure_analysis_log.csv"

# Read files_inspect.csv
files_to_check = pd.read_csv(files_inspect_path)["Files Requiring Further Inspection"].tolist()

# Initialize the structure log
structure_log = []

# Iterate through files in the raw folder based on files_inspect.csv
for file_name in files_to_check:
    file_path = os.path.join(raw_folder_path, file_name)
    try:
        # Check if the file exists
        if not os.path.exists(file_path):
            structure_log.append({"File": file_name, "Status": "File not found", "Columns": None})
            continue

        # Read the Parquet file and log its structure
        parquet_file = pq.ParquetFile(file_path)
        columns = parquet_file.schema.names
        structure_log.append({"File": file_name, "Status": "Success", "Columns": columns})
    except Exception as e:
        # Log any errors during the process
        structure_log.append({"File": file_name, "Status": f"Error: {str(e)}", "Columns": None})

# Save the structure log to a CSV file
pd.DataFrame(structure_log).to_csv(output_log_path, index=False)

print(f"Structure analysis complete. Results saved to {output_log_path}.")


Structure analysis complete. Results saved to C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\structure_analysis_log.csv.


In [10]:
# Having the structure clear, we are going to implement the changes in two batches (2010 first and then 2011 and forward later) and check if the rest of the files were corrected as expected.

import pandas as pd
from pathlib import Path

# Define paths
base_raw_path = Path(r"C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\raw")
base_transformed_path = Path(r"C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed")
files_inspect_path = Path(r"C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\files_inspect.csv")

# Read files_inspect.csv
files_to_check = pd.read_csv(files_inspect_path)["Files Requiring Further Inspection"].tolist()

# Process files for the year 2010
for file_name in files_to_check:
    if "2010" in file_name:  # Filter only for 2010 files
        raw_file_path = base_raw_path / file_name
        transformed_file_path = base_transformed_path / file_name

        try:
            # Load the raw data
            df = pd.read_parquet(raw_file_path)

            # Select relevant columns (2010 structure)
            columns_to_keep = [
                "pickup_datetime",
                "dropoff_datetime",
                "passenger_count",
                "trip_distance",
                "pickup_longitude",
                "pickup_latitude",
                "dropoff_longitude",
                "dropoff_latitude",
                "fare_amount",
                "total_amount"
            ]
            df = df[columns_to_keep]

            # Convert datetime columns
            df["pickup_datetime"] = pd.to_datetime(df["pickup_datetime"], errors="coerce")
            df["dropoff_datetime"] = pd.to_datetime(df["dropoff_datetime"], errors="coerce")

            # Filter rows based on the correct year (2010) and month from filename
            year, month = 2010, int(file_name.split("-")[1].split(".")[0])
            df = df[
                (df["pickup_datetime"].dt.year == year) & 
                (df["pickup_datetime"].dt.month == month)
            ]

            # Add derived metrics
            df["trip_duration_minutes"] = (
                (df["dropoff_datetime"] - df["pickup_datetime"]).dt.total_seconds() / 60
            )
            df["average_speed_kmph"] = (df["trip_distance"] / df["trip_duration_minutes"]) * 60

            # Validate and clean the data
            df = df[
                (df["passenger_count"] > 0) & 
                (df["trip_distance"] > 0) & 
                (df["trip_duration_minutes"] > 0) & 
                (df["pickup_longitude"].between(-180, 180)) & 
                (df["pickup_latitude"].between(-90, 90)) & 
                (df["dropoff_longitude"].between(-180, 180)) & 
                (df["dropoff_latitude"].between(-90, 90)) & 
                (df["fare_amount"] >= 0) & 
                (df["total_amount"] >= 0)
            ]

            # Save the transformed data
            df.to_parquet(transformed_file_path, index=False)

            print(f"Successfully processed: {file_name}")

        except Exception as e:
            print(f"Error processing {file_name}: {e}")



Successfully processed: yellow_tripdata_2010-01.parquet
Successfully processed: yellow_tripdata_2010-02.parquet
Successfully processed: yellow_tripdata_2010-03.parquet
Successfully processed: yellow_tripdata_2010-04.parquet
Successfully processed: yellow_tripdata_2010-05.parquet
Successfully processed: yellow_tripdata_2010-06.parquet
Successfully processed: yellow_tripdata_2010-07.parquet
Successfully processed: yellow_tripdata_2010-08.parquet
Successfully processed: yellow_tripdata_2010-09.parquet
Successfully processed: yellow_tripdata_2010-10.parquet
Successfully processed: yellow_tripdata_2010-11.parquet
Successfully processed: yellow_tripdata_2010-12.parquet


In [11]:
# 2011 and further here:

import pandas as pd
from pathlib import Path

# Define paths
base_raw_path = Path(r"C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\raw")
base_transformed_path = Path(r"C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed")
files_inspect_path = Path(r"C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\files_inspect.csv")

# Read files_inspect.csv
files_to_check = pd.read_csv(files_inspect_path)["Files Requiring Further Inspection"].tolist()

# Process files for 2011 and beyond
for file_name in files_to_check:
    if "2011" in file_name or "2012" in file_name or "2013" in file_name or "2014" in file_name or "2015" in file_name:
        raw_file_path = base_raw_path / file_name
        transformed_file_path = base_transformed_path / file_name

        try:
            # Load the raw data
            df = pd.read_parquet(raw_file_path)

            # Select relevant columns (2011 and beyond structure)
            columns_to_keep = [
                "tpep_pickup_datetime",
                "tpep_dropoff_datetime",
                "passenger_count",
                "trip_distance",
                "PULocationID",
                "DOLocationID",
                "fare_amount",
                "total_amount"
            ]
            df = df[columns_to_keep]

            # Convert datetime columns
            df["tpep_pickup_datetime"] = pd.to_datetime(df["tpep_pickup_datetime"], errors="coerce")
            df["tpep_dropoff_datetime"] = pd.to_datetime(df["tpep_dropoff_datetime"], errors="coerce")

            # Filter rows based on the correct year and month from the filename
            year, month = int(file_name.split("-")[0].split("_")[-1]), int(file_name.split("-")[1].split(".")[0])
            df = df[
                (df["tpep_pickup_datetime"].dt.year == year) & 
                (df["tpep_pickup_datetime"].dt.month == month)
            ]

            # Add derived metrics
            df["trip_duration_minutes"] = (
                (df["tpep_dropoff_datetime"] - df["tpep_pickup_datetime"]).dt.total_seconds() / 60
            )
            df["average_speed_kmph"] = (df["trip_distance"] / df["trip_duration_minutes"]) * 60

            # Validate and clean the data
            df = df[
                (df["passenger_count"] > 0) & 
                (df["trip_distance"] > 0) & 
                (df["trip_duration_minutes"] > 0) & 
                (df["fare_amount"] >= 0) & 
                (df["total_amount"] >= 0)
            ]

            # Save the transformed data
            df.to_parquet(transformed_file_path, index=False)

            print(f"Successfully processed: {file_name}")

        except Exception as e:
            print(f"Error processing {file_name}: {e}")


Successfully processed: yellow_tripdata_2011-01.parquet
Successfully processed: yellow_tripdata_2011-02.parquet
Successfully processed: yellow_tripdata_2011-03.parquet
Successfully processed: yellow_tripdata_2011-04.parquet
Successfully processed: yellow_tripdata_2011-05.parquet
Successfully processed: yellow_tripdata_2011-06.parquet
Successfully processed: yellow_tripdata_2011-07.parquet
Successfully processed: yellow_tripdata_2011-08.parquet
Successfully processed: yellow_tripdata_2011-09.parquet
Successfully processed: yellow_tripdata_2011-10.parquet
Successfully processed: yellow_tripdata_2011-11.parquet
Successfully processed: yellow_tripdata_2011-12.parquet
Successfully processed: yellow_tripdata_2012-01.parquet
Successfully processed: yellow_tripdata_2012-02.parquet
Successfully processed: yellow_tripdata_2012-03.parquet
Successfully processed: yellow_tripdata_2012-04.parquet
Successfully processed: yellow_tripdata_2012-05.parquet
Successfully processed: yellow_tripdata_2012-06.

In [12]:
# Script for standardize again the columns, this time the whole transformed folder file by file. 

import pandas as pd
from pathlib import Path

# Define paths
transformed_path = Path(r"C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed")
output_path = transformed_path  # Overwriting the transformed files

# Unified column mapping
column_mapping = {
    "Trip_Pickup_DateTime": "pickup_datetime",
    "Trip_Dropoff_DateTime": "dropoff_datetime",
    "pickup_datetime": "pickup_datetime",
    "dropoff_datetime": "dropoff_datetime",
    "Start_Lon": "pickup_longitude",
    "Start_Lat": "pickup_latitude",
    "End_Lon": "dropoff_longitude",
    "End_Lat": "dropoff_latitude",
    "pickup_longitude": "pickup_longitude",
    "pickup_latitude": "pickup_latitude",
    "dropoff_longitude": "dropoff_longitude",
    "dropoff_latitude": "dropoff_latitude",
    "Fare_Amt": "fare_amount",
    "Total_Amt": "total_amount",
    "Trip_Distance": "trip_distance",
    "Passenger_Count": "passenger_count"
}

# List all transformed files
transformed_files = list(transformed_path.glob("*.parquet"))

for file in transformed_files:
    try:
        # Load each file
        df = pd.read_parquet(file)
        
        # Rename columns based on the mapping
        df = df.rename(columns=column_mapping)
        
        # Ensure unified data types
        df["pickup_datetime"] = pd.to_datetime(df["pickup_datetime"], errors="coerce")
        df["dropoff_datetime"] = pd.to_datetime(df["dropoff_datetime"], errors="coerce")
        df["trip_distance"] = pd.to_numeric(df["trip_distance"], errors="coerce")
        df["fare_amount"] = pd.to_numeric(df["fare_amount"], errors="coerce")
        df["total_amount"] = pd.to_numeric(df["total_amount"], errors="coerce")
        df["passenger_count"] = pd.to_numeric(df["passenger_count"], errors="coerce")
        
        # Add derived metrics if missing
        if "trip_duration_minutes" not in df.columns:
            df["trip_duration_minutes"] = (
                (df["dropoff_datetime"] - df["pickup_datetime"]).dt.total_seconds() / 60
            )
        if "average_speed_kmph" not in df.columns:
            df["average_speed_kmph"] = (df["trip_distance"] / df["trip_duration_minutes"]) * 60
        
        # Validate data ranges
        df = df[
            (df["passenger_count"] > 0) &
            (df["trip_distance"] > 0) &
            (df["trip_duration_minutes"] > 0) &
            (df["pickup_longitude"].between(-180, 180)) &
            (df["pickup_latitude"].between(-90, 90)) &
            (df["dropoff_longitude"].between(-180, 180)) &
            (df["dropoff_latitude"].between(-90, 90)) &
            (df["fare_amount"] >= 0) &
            (df["total_amount"] >= 0)
        ]
        
        # Save the cleaned file
        df.to_parquet(output_path / file.name, index=False)
        print(f"Successfully standardized: {file.name}")
    
    except Exception as e:
        print(f"Error processing {file.name}: {e}")


Successfully standardized: yellow_tripdata_2009-01.parquet
Successfully standardized: yellow_tripdata_2009-02.parquet
Successfully standardized: yellow_tripdata_2009-03.parquet
Successfully standardized: yellow_tripdata_2009-04.parquet
Successfully standardized: yellow_tripdata_2009-05.parquet
Successfully standardized: yellow_tripdata_2009-06.parquet
Successfully standardized: yellow_tripdata_2009-07.parquet
Successfully standardized: yellow_tripdata_2009-08.parquet
Successfully standardized: yellow_tripdata_2009-09.parquet
Successfully standardized: yellow_tripdata_2009-10.parquet
Successfully standardized: yellow_tripdata_2009-11.parquet
Successfully standardized: yellow_tripdata_2009-12.parquet
Successfully standardized: yellow_tripdata_2010-01.parquet
Successfully standardized: yellow_tripdata_2010-02.parquet
Successfully standardized: yellow_tripdata_2010-03.parquet
Successfully standardized: yellow_tripdata_2010-04.parquet
Successfully standardized: yellow_tripdata_2010-05.parqu

In [13]:
# Checking for 2011 - 2015 structure to then use it for the rest of te files 

import pandas as pd
from pathlib import Path

# Define the absolute paths
base_transformed_path = Path(r"C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed")

# Define the range of years to inspect
years_to_check = range(2011, 2016)

# Iterate through the years and inspect one file per year
for year in years_to_check:
    file_name = f"yellow_tripdata_{year}-01.parquet"  # Using January files for inspection
    file_path = base_transformed_path / file_name
    try:
        # Load the data
        df = pd.read_parquet(file_path)
        
        # Print basic information
        print(f"Year: {year}")
        print(f"Columns in {file_name}:")
        print(df.columns.tolist())
        print("\nSample Data:")
        print(df.head())
        print("-" * 50)
    except Exception as e:
        print(f"Error loading {file_name}: {e}")


Year: 2011
Columns in yellow_tripdata_2011-01.parquet:
['tpep_pickup_datetime', 'tpep_dropoff_datetime', 'passenger_count', 'trip_distance', 'PULocationID', 'DOLocationID', 'fare_amount', 'total_amount', 'trip_duration_minutes', 'average_speed_kmph']

Sample Data:
  tpep_pickup_datetime tpep_dropoff_datetime  passenger_count  trip_distance  \
0  2011-01-01 00:58:10   2011-01-01 01:15:35                1            8.0   
1  2011-01-01 00:23:27   2011-01-01 00:39:39                1            1.6   
2  2011-01-01 00:42:08   2011-01-01 00:51:50                4            2.5   
3  2011-01-01 00:53:36   2011-01-01 01:17:43                2            3.9   
4  2011-01-01 00:37:47   2011-01-01 00:41:20                2            0.6   

   PULocationID  DOLocationID  fare_amount  total_amount  \
0           138           256         20.1         21.10   
1           170           237          9.3         10.30   
2           237           170          8.1          9.10   
3           17

In [14]:
#Code to finish standardazing the 2011 - 2015 files.

import pandas as pd
from pathlib import Path

# Define paths
base_transformed_path = Path(r"C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed")

# Define range of years to standardize (2011-2015)
years_to_standardize = range(2011, 2016)

# Iterate through the years and apply standardization
for year in years_to_standardize:
    for month in range(1, 13):
        file_name = f"yellow_tripdata_{year}-{month:02d}.parquet"
        file_path = base_transformed_path / file_name
        
        try:
            # Load data
            df = pd.read_parquet(file_path)
            
            # Rename columns to match the 2009-2010 schema
            df.rename(columns={
                'tpep_pickup_datetime': 'pickup_datetime',
                'tpep_dropoff_datetime': 'dropoff_datetime',
                'PULocationID': 'pickup_location_id',
                'DOLocationID': 'dropoff_location_id'
            }, inplace=True)
            
            # Standardize column order to match 2009-2010 files
            columns_order = [
                'pickup_datetime', 'dropoff_datetime', 'passenger_count', 'trip_distance',
                'pickup_location_id', 'dropoff_location_id', 'fare_amount', 'total_amount',
                'trip_duration_minutes', 'average_speed_kmph'
            ]
            df = df[columns_order]
            
            # Save the standardized data back to the transformed folder
            df.to_parquet(file_path, index=False)
            
            print(f"Successfully standardized: {file_name}")
        
        except Exception as e:
            print(f"Error processing {file_name}: {e}")
 

Successfully standardized: yellow_tripdata_2011-01.parquet
Successfully standardized: yellow_tripdata_2011-02.parquet
Successfully standardized: yellow_tripdata_2011-03.parquet
Successfully standardized: yellow_tripdata_2011-04.parquet
Successfully standardized: yellow_tripdata_2011-05.parquet
Successfully standardized: yellow_tripdata_2011-06.parquet
Successfully standardized: yellow_tripdata_2011-07.parquet
Successfully standardized: yellow_tripdata_2011-08.parquet
Successfully standardized: yellow_tripdata_2011-09.parquet
Successfully standardized: yellow_tripdata_2011-10.parquet
Successfully standardized: yellow_tripdata_2011-11.parquet
Successfully standardized: yellow_tripdata_2011-12.parquet
Successfully standardized: yellow_tripdata_2012-01.parquet
Successfully standardized: yellow_tripdata_2012-02.parquet
Successfully standardized: yellow_tripdata_2012-03.parquet
Successfully standardized: yellow_tripdata_2012-04.parquet
Successfully standardized: yellow_tripdata_2012-05.parqu

In [5]:
# We will need to convert those columns from longitude and latitude to locationIDs' to keep the format consistent
# To do that we need to check the 'taxi_zone.cs' file first and make sure we understand it to later on use it as a mapping tool

import pandas as pd

# Path to the taxi_zones.csv file
file_path = r"C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\taxi_zones.csv"

# Load the CSV file
taxi_zones_df = pd.read_csv(file_path)

# Display the column names
print("Columns in taxi_zones.csv:")
print(taxi_zones_df.columns)

# Display a few rows to understand the structure
print("\nSample rows from taxi_zones.csv:")
print(taxi_zones_df.head())

Columns in taxi_zones.csv:
Index(['OBJECTID', 'Shape_Leng', 'the_geom', 'Shape_Area', 'zone',
       'LocationID', 'borough'],
      dtype='object')

Sample rows from taxi_zones.csv:
   OBJECTID  Shape_Leng                                           the_geom  \
0         1    0.116357  MULTIPOLYGON (((-74.18445299999996 40.69499599...   
1         2    0.433470  MULTIPOLYGON (((-73.82337597260663 40.63898704...   
2         3    0.084341  MULTIPOLYGON (((-73.84792614099985 40.87134223...   
3         4    0.043567  MULTIPOLYGON (((-73.97177410965318 40.72582128...   
4         5    0.092146  MULTIPOLYGON (((-74.17421738099989 40.56256808...   

   Shape_Area                     zone  LocationID        borough  
0    0.000782           Newark Airport           1            EWR  
1    0.004866              Jamaica Bay           2         Queens  
2    0.000314  Allerton/Pelham Gardens           3          Bronx  
3    0.000112            Alphabet City           4      Manhattan  
4    0.0

In [2]:
# MAÑANA VAS A CONTINUAR CON EL ÚLTIMO CÓDIGO DADO POR CHATGPT
import pandas as pd
from shapely.wkt import loads as wkt_loads
from shapely.geometry import Point
import geopandas as gpd
from pathlib import Path
from concurrent.futures import ProcessPoolExecutor
import os

# Paths
transformed_folder = Path(r"C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed")
taxi_zones_path = Path(r"C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\taxi_zones.csv")

# Load taxi zones data
print("Loading and optimizing taxi zones...")
taxi_zones = pd.read_csv(taxi_zones_path)

# Parse geometry using shapely's WKT loader
print("Parsing geometry...")
taxi_zones["geometry"] = taxi_zones["the_geom"].apply(
    lambda geom: wkt_loads(geom) if isinstance(geom, str) and geom.startswith("MULTIPOLYGON") else None
)

# Convert to GeoDataFrame
taxi_zones = gpd.GeoDataFrame(taxi_zones, geometry="geometry", crs="EPSG:4326")

# Filter only valid geometries
taxi_zones = taxi_zones[taxi_zones.geometry.notnull()]

# Create spatial index
print("Building spatial index...")
spatial_index = taxi_zones.sindex

# Function to map coordinates to LocationID using spatial indexing
def map_coordinates_to_location_id(longitude, latitude):
    point = Point(longitude, latitude)
    possible_matches_index = list(spatial_index.intersection(point.bounds))
    possible_matches = taxi_zones.iloc[possible_matches_index]

    for _, zone in possible_matches.iterrows():
        if zone.geometry.contains(point):
            return zone["LocationID"]
    return None

# Function to process a single file
def process_file(file_path):
    try:
        print(f"Processing file: {file_path.name}")
        df = pd.read_parquet(file_path)

        # Check for necessary columns
        if {"pickup_longitude", "pickup_latitude", "dropoff_longitude", "dropoff_latitude"}.issubset(df.columns):
            # Map coordinates to LocationID using parallel apply
            df["pickup_location_id"] = df.apply(
                lambda row: map_coordinates_to_location_id(row["pickup_longitude"], row["pickup_latitude"]), axis=1
            )
            df["dropoff_location_id"] = df.apply(
                lambda row: map_coordinates_to_location_id(row["dropoff_longitude"], row["dropoff_latitude"]), axis=1
            )

            # Drop old longitude/latitude columns
            df.drop(["pickup_longitude", "pickup_latitude", "dropoff_longitude", "dropoff_latitude"], axis=1, inplace=True)

            # Save updated file with Snappy compression
            df.to_parquet(file_path, index=False, compression="snappy")
            print(f"File updated successfully: {file_path.name}")
        else:
            print(f"File skipped (missing necessary columns): {file_path.name}")
    except Exception as e:
        print(f"Error processing file {file_path.name}: {e}")

# Process files in parallel
def process_files_in_parallel():
    print("Processing transformed files in parallel...")
    file_paths = list(transformed_folder.glob("*.parquet"))

    with ProcessPoolExecutor(max_workers=os.cpu_count()) as executor:
        executor.map(process_file, file_paths)

    print("Processing completed.")

# Execute the processing
process_files_in_parallel()

Loading and optimizing taxi zones...
Parsing geometry...
Building spatial index...
Processing transformed files in parallel...
Processing completed.


In [3]:
import pandas as pd
from pathlib import Path

# Load a sample file
file_path = Path(r"C:\Users\Usuario\Documents\ML_Projects\taxi_demand_predictor\notebooks\data\transformed\yellow_tripdata_2009-01.parquet")
df = pd.read_parquet(file_path)

print(df.head())


      pickup_datetime    dropoff_datetime  passenger_count  trip_distance  \
0 2009-01-04 02:52:00 2009-01-04 03:02:00                1           2.63   
1 2009-01-04 03:31:00 2009-01-04 03:38:00                3           4.55   
2 2009-01-03 15:43:00 2009-01-03 15:57:00                5          10.35   
3 2009-01-01 20:52:58 2009-01-01 21:14:00                1           5.00   
4 2009-01-24 16:18:23 2009-01-24 16:24:56                1           0.40   

   pickup_longitude  pickup_latitude  dropoff_longitude  dropoff_latitude  \
0        -73.991957        40.721567         -73.993803         40.695922   
1        -73.982102        40.736290         -73.955850         40.768030   
2        -74.002587        40.739748         -73.869983         40.770225   
3        -73.974267        40.790955         -73.996558         40.731849   
4        -74.001580        40.719382         -74.008378         40.720350   

   fare_amount  total_amount  trip_duration_minutes  average_speed_kmph  
