In [1]:
import pandas as pd

In [2]:
traffic = pd.read_csv('/content/Dataset_Uber Traffic (1).csv')

In [3]:
traffic.head()

Unnamed: 0,DateTime,Junction,Vehicles,ID
0,01/11/15 0:00,1,15,20151101001
1,01/11/15 1:00,1,13,20151101011
2,01/11/15 2:00,1,10,20151101021
3,01/11/15 3:00,1,7,20151101031
4,01/11/15 4:00,1,9,20151101041


In [4]:
traffic['DateTime'] = pd.to_datetime(traffic['DateTime'], format="%d/%m/%y %H:%M")

print(traffic['DateTime'].min())
print(traffic['DateTime'].max())

2015-11-01 00:00:00
2017-06-30 23:00:00


Hyderabad Coordinates (needed for weather APIs):

Latitude: 17.3850

Longitude: 78.4867

In [5]:
# !pip install openmeteo-requests requests-cache retry-requests numpy pandas
!pip install openmeteo-requests
!pip install requests-cache retry-requests numpy pandas



In [6]:
import openmeteo_requests

import pandas as pd
import requests_cache
from retry_requests import retry

# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://archive-api.open-meteo.com/v1/archive"
params = {
	"latitude": 17.385,
	"longitude": 78.4867,
	"start_date": "2015-11-01",
	"end_date": "2017-06-30",
	"hourly": ["temperature_2m", "relative_humidity_2m", "precipitation", "wind_speed_10m"],
	"timezone": "auto",
}
responses = openmeteo.weather_api(url, params=params)

# Process first location. Add a for-loop for multiple locations or weather models
response = responses[0]
print(f"Coordinates: {response.Latitude()}째N {response.Longitude()}째E")
print(f"Elevation: {response.Elevation()} m asl")
print(f"Timezone: {response.Timezone()}{response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0: {response.UtcOffsetSeconds()}s")

# Process hourly data. The order of variables needs to be the same as requested.
hourly = response.Hourly()
hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
hourly_relative_humidity_2m = hourly.Variables(1).ValuesAsNumpy()
hourly_precipitation = hourly.Variables(2).ValuesAsNumpy()
hourly_wind_speed_10m = hourly.Variables(3).ValuesAsNumpy()

hourly_data = {"date": pd.date_range(
	start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
	end =  pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = hourly.Interval()),
	inclusive = "left"
)}

hourly_data["temperature_2m"] = hourly_temperature_2m
hourly_data["relative_humidity_2m"] = hourly_relative_humidity_2m
hourly_data["precipitation"] = hourly_precipitation
hourly_data["wind_speed_10m"] = hourly_wind_speed_10m

hourly_dataframe = pd.DataFrame(data = hourly_data)
print("\nHourly data\n", hourly_dataframe)


Coordinates: 17.398944854736328째N 78.45708465576172째E
Elevation: 505.0 m asl
Timezone: b'Asia/Kolkata'b'GMT+5:30'
Timezone difference to GMT+0: 19800s

Hourly data
                            date  temperature_2m  relative_humidity_2m  \
0     2015-10-31 18:30:00+00:00       23.099501             73.478798   
1     2015-10-31 19:30:00+00:00       22.399500             77.633232   
2     2015-10-31 20:30:00+00:00       21.599501             81.514389   
3     2015-10-31 21:30:00+00:00       21.199499             83.013901   
4     2015-10-31 22:30:00+00:00       20.949499             83.507774   
...                         ...             ...                   ...   
14587 2017-06-30 13:30:00+00:00       27.471001             67.507095   
14588 2017-06-30 14:30:00+00:00       27.071001             68.263649   
14589 2017-06-30 15:30:00+00:00       27.121000             69.118393   
14590 2017-06-30 16:30:00+00:00       26.821001             68.425446   
14591 2017-06-30 17:30:00+00:00 

In [7]:
hourly_dataframe = pd.DataFrame(data = hourly_data)

# Convert UTC time to IST (Asia/Kolkata)
hourly_dataframe['date'] = hourly_dataframe['date'].dt.tz_convert('Asia/Kolkata')

# Format datetime to look like Excel format: DD-MM-YYYY HH:MM
hourly_dataframe['date'] = hourly_dataframe['date'].dt.strftime('%d-%m-%Y %H:%M')

print("\nHourly data\n", hourly_dataframe)



Hourly data
                    date  temperature_2m  relative_humidity_2m  precipitation  \
0      01-11-2015 00:00       23.099501             73.478798            0.0   
1      01-11-2015 01:00       22.399500             77.633232            0.0   
2      01-11-2015 02:00       21.599501             81.514389            0.0   
3      01-11-2015 03:00       21.199499             83.013901            0.0   
4      01-11-2015 04:00       20.949499             83.507774            0.0   
...                 ...             ...                   ...            ...   
14587  30-06-2017 19:00       27.471001             67.507095            0.0   
14588  30-06-2017 20:00       27.071001             68.263649            0.0   
14589  30-06-2017 21:00       27.121000             69.118393            0.0   
14590  30-06-2017 22:00       26.821001             68.425446            0.0   
14591  30-06-2017 23:00       26.521000             70.291824            0.0   

       wind_speed_10m  
0

In [8]:
# Save to CSV
hourly_dataframe.to_csv("hyderabad_weather.csv", index=False)