In [1]:
import pandas as pd

In [2]:
# S&P 500 index data
sp500_data = {
    'Date': pd.to_datetime(['2023-01-03', '2023-01-04', '2023-01-05', '2023-01-06', '2023-01-09']),
    'SP500': [3750, 3780, 3795, 3800, 3820]
}
sp500_df = pd.DataFrame(sp500_data)

# NYC weather data
weather_data = {
    'Date': pd.to_datetime(['2023-01-02', '2023-01-03', '2023-01-04', '2023-01-05', '2023-01-08']),
    'Weather': ['Rainy', 'Sunny', 'Cloudy', 'Snow', 'Windy']
}
weather_df = pd.DataFrame(weather_data)

In [3]:
sp500_df

Unnamed: 0,Date,SP500
0,2023-01-03,3750
1,2023-01-04,3780
2,2023-01-05,3795
3,2023-01-06,3800
4,2023-01-09,3820


In [4]:
weather_df

Unnamed: 0,Date,Weather
0,2023-01-02,Rainy
1,2023-01-03,Sunny
2,2023-01-04,Cloudy
3,2023-01-05,Snow
4,2023-01-08,Windy


In [5]:
# Merging data frames using merge_asof
merged_df = pd.merge_asof(sp500_df, weather_df, on='Date', direction='nearest')
merged_df

Unnamed: 0,Date,SP500,Weather
0,2023-01-03,3750,Sunny
1,2023-01-04,3780,Cloudy
2,2023-01-05,3795,Snow
3,2023-01-06,3800,Snow
4,2023-01-09,3820,Windy


In [6]:
# Tolerance
merged_df = pd.merge_asof(sp500_df.sort_values('Date'), weather_df.sort_values('Date'), on='Date', tolerance=pd.Timedelta('1 day'), direction='nearest')
merged_df

Unnamed: 0,Date,SP500,Weather
0,2023-01-03,3750,Sunny
1,2023-01-04,3780,Cloudy
2,2023-01-05,3795,Snow
3,2023-01-06,3800,Snow
4,2023-01-09,3820,Windy


In [7]:
# Direction
merged_df = pd.merge_asof(sp500_df.sort_values('Date'), weather_df.sort_values('Date'), on='Date', tolerance=pd.Timedelta('1 day'), direction='backward')
merged_df

Unnamed: 0,Date,SP500,Weather
0,2023-01-03,3750,Sunny
1,2023-01-04,3780,Cloudy
2,2023-01-05,3795,Snow
3,2023-01-06,3800,Snow
4,2023-01-09,3820,Windy


In [8]:
# Excluding Exact Matches
# Assuming the data is prepared as initially described, and we're explicitly excluding exact matches.
merged_df = pd.merge_asof(sp500_df.sort_values('Date'), weather_df.sort_values('Date'), on='Date', allow_exact_matches=False, direction='backward')
merged_df

Unnamed: 0,Date,SP500,Weather
0,2023-01-03,3750,Rainy
1,2023-01-04,3780,Sunny
2,2023-01-05,3795,Cloudy
3,2023-01-06,3800,Snow
4,2023-01-09,3820,Windy


In [9]:
# Considering other columns to merge
# Augmenting the S&P 500 data frame with a 'Location' column
sp500_df['Location'] = 'New York City'  # Assuming all entries relate to New York City

# Augmenting the weather data frame with a 'Location' column
weather_df['Location'] = ['New York City', 'New York City', 'Los Angeles', 'New York City', 'Los Angeles']

# Ensure both DataFrames are sorted by 'Date' before merging
sp500_df = sp500_df.sort_values('Date')
weather_df = weather_df.sort_values('Date')

# Merging using 'merge_asof' with 'by' and 'direction'
merged_df = pd.merge_asof(sp500_df, weather_df, on='Date', by='Location', direction='backward')
merged_df

Unnamed: 0,Date,SP500,Location,Weather
0,2023-01-03,3750,New York City,Sunny
1,2023-01-04,3780,New York City,Sunny
2,2023-01-05,3795,New York City,Snow
3,2023-01-06,3800,New York City,Snow
4,2023-01-09,3820,New York City,Snow
