In [1]:
import pandas as pd

df = pd.DataFrame({"device_id": ['D475', 'D175', 'D200', 'D375', 'M475', 'M400', 'M250', 'A150'],
                   "device_temperature": [35.4, 45.2, 59.3, 49.3, 32.2, 35.7, 36.8, 34.9],
                   "device_status": ["Inactive", "Active", "Active", "Active", "Active", "Inactive", "Active", "Active"]})

df1 = pd.DataFrame({"device_id": ['D475', 'D175', 'D200', 'D375', 'M475', 'M400', 'M250', 'A150'],
                   "device_temperature": [39.4, 45.2, 29.3, 49.3, 32.2, 35.7, 36.8, 24.9],
                   "device_status": ["Active", "Active", "Inactive", "Active", "Active", "Inactive", "Active", "Inactive"]})

In [2]:
#Case 1: Compare two Series having the same shape, same data at the same position
series1 = pd.Series([1,2,3,4])
series2 = pd.Series([1,2,3,4])
series1.equals(series2)

True

In [3]:
#Case 2: Compare two Series having different shapes
series1 = pd.Series([1,2,3,4,5,6])
series2 = pd.Series([1,2,4,4])
series1.equals(series2)

False

In [4]:
#Case 3: Compare two Series having the same shape but different order of the data
series1 = pd.Series([1,2,3,4])
series2 = pd.Series([2,1,3,4])
series1.equals(series2)

False

In [5]:
#Case 4: Compare two Series having the same shape but different data
series1 = pd.Series([1,2,3,4])
series2 = pd.Series([1,2,4,4])
series1.equals(series2)

False

In [6]:
df.equals(df1)

False

In [7]:
# compare columns
df["device_id"].equals(df1["device_id"])

True

In [8]:
print("List of the columns having different values in the DataFrames df1 and df \n")
for column in df.columns:
    if df[column].equals(df1[column]):
        pass
    else:
        print(column)

List of the columns having different values in the DataFrames df1 and df 

device_temperature
device_status


In [9]:
# using concat
df2 = pd.concat([df, df1])
df2

Unnamed: 0,device_id,device_temperature,device_status
0,D475,35.4,Inactive
1,D175,45.2,Active
2,D200,59.3,Active
3,D375,49.3,Active
4,M475,32.2,Active
5,M400,35.7,Inactive
6,M250,36.8,Active
7,A150,34.9,Active
0,D475,39.4,Active
1,D175,45.2,Active


In [10]:
df3 = df2.drop_duplicates(keep=False)
df3

Unnamed: 0,device_id,device_temperature,device_status
0,D475,35.4,Inactive
2,D200,59.3,Active
7,A150,34.9,Active
0,D475,39.4,Active
2,D200,29.3,Inactive
7,A150,24.9,Inactive


In [11]:
# show both the DataFrames columns side by side
df4 = df.compare(df1)
df4


Unnamed: 0_level_0,device_temperature,device_temperature,device_status,device_status
Unnamed: 0_level_1,self,other,self,other
0,35.4,39.4,Inactive,Active
2,59.3,29.3,Active,Inactive
7,34.9,24.9,Active,Inactive


In [12]:
# merge dataframes with closest match

# S&P 500 index data
sp500_data = {
    'Date': pd.to_datetime(['2023-01-03', '2023-01-04', '2023-01-05', '2023-01-06', '2023-01-09']),
    'SP500': [3750, 3780, 3795, 3800, 3820]
}
sp500_df = pd.DataFrame(sp500_data)

# NYC weather data
weather_data = {
    'Date': pd.to_datetime(['2023-01-02', '2023-01-03', '2023-01-04', '2023-01-05', '2023-01-08']),
    'Weather': ['Rainy', 'Sunny', 'Cloudy', 'Snow', 'Windy']
}
weather_df = pd.DataFrame(weather_data)

In [13]:
# Merging data frames using merge_asof
merged_df = pd.merge_asof(sp500_df, weather_df, on='Date', direction='nearest')
print(merged_df)

        Date  SP500 Weather
0 2023-01-03   3750   Sunny
1 2023-01-04   3780  Cloudy
2 2023-01-05   3795    Snow
3 2023-01-06   3800    Snow
4 2023-01-09   3820   Windy


In [15]:
merged_df = pd.merge_asof(sp500_df.sort_values('Date'), weather_df.sort_values('Date'), on='Date', tolerance=pd.Timedelta('1 day'), direction='nearest')

In [16]:
print(merged_df)

        Date  SP500 Weather
0 2023-01-03   3750   Sunny
1 2023-01-04   3780  Cloudy
2 2023-01-05   3795    Snow
3 2023-01-06   3800    Snow
4 2023-01-09   3820   Windy


In [17]:
merged_df = pd.merge_asof(sp500_df.sort_values('Date'), weather_df.sort_values('Date'), on='Date', tolerance=pd.Timedelta('1 day'), direction='backward')
print(merged_df)

        Date  SP500 Weather
0 2023-01-03   3750   Sunny
1 2023-01-04   3780  Cloudy
2 2023-01-05   3795    Snow
3 2023-01-06   3800    Snow
4 2023-01-09   3820   Windy


In [18]:
# Assuming the data is prepared as initially described, and we're explicitly excluding exact matches.
merged_df = pd.merge_asof(sp500_df.sort_values('Date'), weather_df.sort_values('Date'), on='Date', allow_exact_matches=False, direction='backward')
print(merged_df)

        Date  SP500 Weather
0 2023-01-03   3750   Rainy
1 2023-01-04   3780   Sunny
2 2023-01-05   3795  Cloudy
3 2023-01-06   3800    Snow
4 2023-01-09   3820   Windy


In [19]:
# Augmenting the S&P 500 data frame with a 'Location' column
sp500_df['Location'] = 'New York City'  # Assuming all entries relate to New York City

# Augmenting the weather data frame with a 'Location' column
weather_df['Location'] = ['New York City', 'New York City', 'Los Angeles', 'New York City', 'Los Angeles']

In [20]:
# Ensure both DataFrames are sorted by 'Date' before merging
sp500_df = sp500_df.sort_values('Date')
weather_df = weather_df.sort_values('Date')

# Merging using 'merge_asof' with 'by' and 'direction'
merged_df = pd.merge_asof(sp500_df, weather_df, on='Date', by='Location', direction='backward')
print(merged_df)

        Date  SP500       Location Weather
0 2023-01-03   3750  New York City   Sunny
1 2023-01-04   3780  New York City   Sunny
2 2023-01-05   3795  New York City    Snow
3 2023-01-06   3800  New York City    Snow
4 2023-01-09   3820  New York City    Snow
