In [1]:
import json, os, requests
from dateutil.relativedelta import relativedelta 
from datetime import datetime
import pandas as pd
pd.set_option('display.max_columns', 30)

In [2]:
current_datetime = datetime.now() - relativedelta(months=2)

formatted_datetime = current_datetime.strftime('%Y-%m-%d')


url = f"https://data.cityofchicago.org/resource/ajtu-isnz.json?$where=trip_start_timestamp >= '{formatted_datetime}T00:00:00.000' AND trip_start_timestamp <= '{formatted_datetime}T23:59:59.000'&$limit=30000"
headers = {'X-App-Token': os.environ.get('CHICAGO_API_TOKEN')}

response = requests.get(url, headers=headers, verify=False)

data = response.json()




In [3]:
taxi_trips = pd.DataFrame(data)

taxi_trips.head(5)

Unnamed: 0,trip_id,taxi_id,trip_start_timestamp,trip_end_timestamp,trip_seconds,trip_miles,pickup_community_area,dropoff_community_area,fare,tips,tolls,extras,trip_total,payment_type,company,pickup_centroid_latitude,pickup_centroid_longitude,pickup_centroid_location,dropoff_centroid_latitude,dropoff_centroid_longitude,dropoff_centroid_location,pickup_census_tract,dropoff_census_tract
0,71d174437061ee12d24a938a0ae385f26ae015c5,9796a534d63f7be805edd46db7bd062bbdb30786df4dcd...,2024-02-29T23:45:00.000,2024-03-01T00:00:00.000,1356,17.73,76,28.0,43.5,9.6,0,4,57.6,Credit Card,Taxicab Insurance Agency Llc,41.980264315,-87.913624596,"{'type': 'Point', 'coordinates': [-87.91362459...",41.874005383,-87.66351755,"{'type': 'Point', 'coordinates': [-87.66351754...",,
1,f00469441c938315e966fa798b2e77d32a1f89db,63d6f8cde6f0de8a4cf504adc40f8c49c00358506766a6...,2024-02-29T23:45:00.000,2024-03-01T00:00:00.000,1080,8.1,32,41.0,22.75,0.0,0,0,22.75,Unknown,Taxi Affiliation Services,41.878865584,-87.625192142,"{'type': 'Point', 'coordinates': [-87.62519214...",41.794090253,-87.592310855,"{'type': 'Point', 'coordinates': [-87.59231085...",,
2,edcebeb7f3d5d54568463f5d71f5c6cd1a2d1cd9,c8549ac4a2842a038f2a5b91ecf894ca7cb04d60558be3...,2024-02-29T23:45:00.000,2024-03-01T00:15:00.000,1443,9.51,76,,26.25,0.0,0,4,30.25,Cash,Sun Taxi,41.980264315,-87.913624596,"{'type': 'Point', 'coordinates': [-87.91362459...",,,,,
3,edcc84164819a8b9e80253ae884080aac508c477,a3bc50ed4e7b5de8e74d56919ae6e14b91e33c63c6c7d9...,2024-02-29T23:45:00.000,2024-02-29T23:45:00.000,6,0.0,8,8.0,8.0,2.0,0,0,10.5,Credit Card,Sun Taxi,41.899602111,-87.633308037,"{'type': 'Point', 'coordinates': [-87.63330803...",41.899602111,-87.633308037,"{'type': 'Point', 'coordinates': [-87.63330803...",,
4,ed4ffc7c91d92591f2950759b56b47ae69a5302d,6f004629887a8286501ba6043f9130a2aaaaa3b560fbad...,2024-02-29T23:45:00.000,2024-02-29T23:45:00.000,0,0.0,56,,3.25,0.0,0,2,5.25,Cash,Chicago City Taxi Association,41.785998518,-87.750934289,"{'type': 'Point', 'coordinates': [-87.75093428...",,,,17031980100.0,


In [None]:
taxi_trips.info()

In [None]:
# lássuk miből mennyi van
taxi_trips.describe()

In [None]:
# 5 példa ahol a viteldíj hiányzik
taxi_trips[taxi_trips['fare'].isna()].sample(5)

### Transformation: deal with NaN values

In [4]:
# oszlopok eltűntetése
taxi_trips.drop(['pickup_census_tract','dropoff_census_tract', 'pickup_centroid_location', 'dropoff_centroid_location'], axis=1, inplace=True) 

### Transformation: renaming


In [5]:
taxi_trips.rename(columns={
    'pickup_community_area': 'pickup_community_area_id',
    'dropoff_community_area': 'dropoff_community_area_id'},
    inplace=True
)

### Transformation: create helper column for weather

In [6]:
taxi_trips['trip_start_timestamp'] = pd.to_datetime(taxi_trips['trip_start_timestamp'])

In [9]:
# azért hogy a weather table-t joinolni tudjuk fel kell kerekítenünk csak órára
taxi_trips["datetime_for_weather"] = taxi_trips['trip_start_timestamp'].dt.floor('h')

#### check joining the trips and the weather data

In [10]:
# Get weather data amd transform

current_datetime = datetime.now() - relativedelta(months=2)

formatted_datetime = current_datetime.strftime('%Y-%m-%d')

url = "https://archive-api.open-meteo.com/v1/era5"
date = '2024-04-01'
params={
    'latitude': 41.85,
    'longitude': -87.65,
    'start_date': formatted_datetime,
    'end_date': formatted_datetime,
    'hourly': 'temperature_2m,wind_speed_10m,precipitation,rain'
}

response = requests.get(url, params=params)

weather_data = response.json()

weather_data_filtered = {
'datetime': weather_data['hourly']['time'],
'temperature': weather_data['hourly']['temperature_2m'],
'wind_speed': weather_data['hourly']['wind_speed_10m'],
'precipitation': weather_data['hourly']['precipitation'],
'rain': weather_data['hourly']['rain']
}


weather_df = pd.DataFrame(weather_data_filtered)

weather_df['datetime'] = pd.to_datetime(weather_df['datetime'])

In [11]:
taxi_trips_with_weather = taxi_trips.merge(weather_df, left_on='datetime_for_weather', right_on='datetime')

In [12]:
taxi_trips_with_weather.head()

Unnamed: 0,trip_id,taxi_id,trip_start_timestamp,trip_end_timestamp,trip_seconds,trip_miles,pickup_community_area_id,dropoff_community_area_id,fare,tips,tolls,extras,trip_total,payment_type,company,pickup_centroid_latitude,pickup_centroid_longitude,dropoff_centroid_latitude,dropoff_centroid_longitude,datetime_for_weather,datetime,temperature,wind_speed,precipitation,rain
0,71d174437061ee12d24a938a0ae385f26ae015c5,9796a534d63f7be805edd46db7bd062bbdb30786df4dcd...,2024-02-29 23:45:00,2024-03-01T00:00:00.000,1356,17.73,76,28.0,43.5,9.6,0,4,57.6,Credit Card,Taxicab Insurance Agency Llc,41.980264315,-87.913624596,41.874005383,-87.66351755,2024-02-29 23:00:00,2024-02-29 23:00:00,4.7,20.5,0.0,0.0
1,f00469441c938315e966fa798b2e77d32a1f89db,63d6f8cde6f0de8a4cf504adc40f8c49c00358506766a6...,2024-02-29 23:45:00,2024-03-01T00:00:00.000,1080,8.1,32,41.0,22.75,0.0,0,0,22.75,Unknown,Taxi Affiliation Services,41.878865584,-87.625192142,41.794090253,-87.592310855,2024-02-29 23:00:00,2024-02-29 23:00:00,4.7,20.5,0.0,0.0
2,edcebeb7f3d5d54568463f5d71f5c6cd1a2d1cd9,c8549ac4a2842a038f2a5b91ecf894ca7cb04d60558be3...,2024-02-29 23:45:00,2024-03-01T00:15:00.000,1443,9.51,76,,26.25,0.0,0,4,30.25,Cash,Sun Taxi,41.980264315,-87.913624596,,,2024-02-29 23:00:00,2024-02-29 23:00:00,4.7,20.5,0.0,0.0
3,edcc84164819a8b9e80253ae884080aac508c477,a3bc50ed4e7b5de8e74d56919ae6e14b91e33c63c6c7d9...,2024-02-29 23:45:00,2024-02-29T23:45:00.000,6,0.0,8,8.0,8.0,2.0,0,0,10.5,Credit Card,Sun Taxi,41.899602111,-87.633308037,41.899602111,-87.633308037,2024-02-29 23:00:00,2024-02-29 23:00:00,4.7,20.5,0.0,0.0
4,ed4ffc7c91d92591f2950759b56b47ae69a5302d,6f004629887a8286501ba6043f9130a2aaaaa3b560fbad...,2024-02-29 23:45:00,2024-02-29T23:45:00.000,0,0.0,56,,3.25,0.0,0,2,5.25,Cash,Chicago City Taxi Association,41.785998518,-87.750934289,,,2024-02-29 23:00:00,2024-02-29 23:00:00,4.7,20.5,0.0,0.0
