## Preliminaries

In [1]:
# downloads
import pandas as pd
from datetime import datetime, timedelta

### Coordinates cleaning


Note: with the current code, the highest # of iterations/ (weather records for each day) is **100**, therefore the df should contain 100 rows.

In [2]:
# import file
df_all = pd.read_csv("yelp_coordinates.csv")
# calculate the number of rows per dataframe
rows_per_df = 100
# split the dataframe into a list of smaller dataframes
df_list = [df_all[i:i+rows_per_df] for i in range(0, len(df_all), rows_per_df)]
# set your df
df = df_list[0]

FileNotFoundError: [Errno 2] No such file or directory: 'yelp_coordinates.csv'

In [95]:
# show file
df.head()

Unnamed: 0,latitude,longitude,date
3000,34.414,-119.691,2021-12-26 17:50:34
3001,27.714,-82.354,2021-12-26 17:31:55
3002,39.95,-75.153,2021-12-26 17:30:19
3003,28.152,-82.757,2021-12-26 17:20:36
3004,40.308,-75.13,2021-12-26 17:12:56


### Scraping weather


We are following the sample from https://github.com/m0rp43us/openmeteopy/blob/main/Readme/HISTORICAL_WEATHER.md

In [69]:
# downloads
from openmeteo_py import OWmanager
from openmeteo_py.Hourly.HourlyHistorical import HourlyHistorical
from openmeteo_py.Options.HistoricalOptions import HistoricalOptions
from openmeteo_py.Utils.constants import *

In [79]:
# Define a function to make an API call for a given latitude, longitude, and date (Y-M-D!)
def get_weather_data(latitude, longitude, date):
    # convert day
    dt_obj = datetime.strptime(date, "%Y-%m-%d %H:%M:%S")
    day = dt_obj.strftime("%Y-%m-%d")
    hourly = HourlyHistorical()
    options = HistoricalOptions(latitude,longitude,nan,True,celsius,kmh,mm,iso8601,utc,day, day)

    #notice that we had to give the value "None" for the hourly parameter,otherwise you'll be filling the hourly parameter instead of the daily one.
    mgr_T = OWmanager(options, OWmanager.historical, hourly.temperature_2m())
    mgr_W = OWmanager(options, OWmanager.historical, hourly.windspeed_10m())
    mgr_P = OWmanager(options, OWmanager.historical, hourly.precipitation())
    mgr_WC = OWmanager(options, OWmanager.historical, hourly.weathercode())

    # Download data,here we want it as a key value json where the keys are dates and values the corresponding values of that date (technically timestamp)
    temp = mgr_T.get_data(1)
    wind = mgr_W.get_data(1)
    prec = mgr_P.get_data(1)
    wcode = mgr_WC.get_data(1)
    
    # convert date
    dt = datetime.strptime(date, '%Y-%m-%d %H:%M:%S')
    # round to the nearest hour
    dt_rounded = dt.replace(minute=0, second=0, microsecond=0)
    # format the datetime object to the desired string format
    date = dt_rounded.strftime('%Y-%m-%dT%H:%M')
    
    # get data for the specific hour
    if date in temp['hourly']['temperature_2m']:
        temperature = temp['hourly']['temperature_2m'][date]
    else:
        temperature = None
        
    if date in wind['hourly']['windspeed_10m']:
        windspeed = wind['hourly']['windspeed_10m'][date]
    else:
        windspeed = None
        
    if date in prec['hourly']['precipitation']:
        precipitation = prec['hourly']['precipitation'][date]
    else:
        precipitation = None
        
    if date in wcode['hourly']['weathercode']:
        weathercode = wcode['hourly']['weathercode'][date]
    else:
        weathercode = None


    
    return temperature, windspeed, precipitation, weathercode

In [80]:
# check if the function works properly
timestamp = '2022-01-19 19:48:13'
latitude = 27.564
longitude = -82.565
print(get_weather_data(latitude, longitude, timestamp))

(22.3, 12.0, 0.0, 0)


## Joining weather to our coordinates

In [108]:
# iterate over the rows of the dataframe and get the weather data for each timestamp
for index, row in df.iterrows():
    # get the latitude, longitude, and timestamp from the current row
    latitude = row[latitude_column]
    longitude = row[longitude_column]
    date = row[date_column]
    temperature, windspeed, precipitation, weathercode = get_weather_data(latitude, longitude, date)
        
    # update the dataframe with the weather data
    df.loc[index, temperature_column] = temperature
    df.loc[index, precipitation_column] = precipitation
    df.loc[index, weathercode_column] = weathercode
    df.loc[index, windspeed_column] = windspeed

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.loc[index, temperature_column] = temperature
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.loc[index, precipitation_column] = precipitation
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.loc[index, weathercode_column] = weathercode
A value is trying to be set on a copy of a slice from a D

KeyboardInterrupt: 

In [105]:
# check the data
print(df.head())

   latitude  longitude                 date  temperature  precipitation  \
0    30.026    -90.238  2022-01-19 19:48:13         20.8            0.2   
1    34.423   -119.703  2022-01-19 19:39:46         14.6            0.0   
2    27.841    -82.685  2022-01-19 19:37:15         19.8            0.0   
3    27.960    -82.463  2022-01-19 19:29:13         20.8            0.0   
4    40.181    -75.106  2022-01-19 19:27:53          8.0            0.0   

   weathercode  windspeed  
0         51.0       13.2  
1          0.0        3.5  
2          0.0        5.5  
3          0.0       10.7  
4          0.0       17.4  


### Saving the data

In [106]:
# load into CSV file
csv_name = "weather_coordinates.csv"
df.to_csv(csv_name, index=False)