In [17]:
import numpy as np
import pandas as pd

In [35]:
unemployment_rates = pd.read_csv('/Users/Zhiyang/DSI/team_project/data/processed/cleaned_unemployment_rates.csv')
minimum_wage_data = pd.read_csv('team_project/data/raw/minimum_wage_data.csv')


unemployment_rates_head = unemployment_rates.head()
minimum_wage_data_head = minimum_wage_data.head()

unemployment_rates_head, minimum_wage_data_head

(  Province Month-Year  Unemployment Rate
 0   Canada     Jan-76                7.1
 1   Canada     Feb-76                7.0
 2   Canada     Mar-76                6.7
 3   Canada     Apr-76                6.8
 4   Canada     May-76                6.9,
   Jurisdiction Effective Date Minimum Wage  \
 0           FJ      01-Apr-24       $17.30   
 1           FJ      01-Apr-23       $16.65   
 2           FJ      01-Apr-22       $15.55   
 3           FJ      29-Dec-21       $15.00   
 4           FJ      17-Jul-96          NaN   
 
                                                 Note  
 0  An employee should be paid at least the federa...  
 1  An employee should be paid at least the federa...  
 2  An employee should be paid at least the federa...  
 3  An employee should be paid at least the federa...  
 4  Then-current provincial/territorial rates adop...  )

In [36]:
# Convert date columns to datetime
unemployment_rates['Month-Year'] = pd.to_datetime(unemployment_rates['Month-Year'], format='%b-%y')
minimum_wage_data['Effective Date'] = pd.to_datetime(minimum_wage_data['Effective Date'], format='%d-%b-%y')

In [37]:
province_mapping = {
    'Newfoundland and Labrador': 'NL',
    'Prince Edward Island': 'PEI',
    'Nova Scotia': 'NS',
    'New Brunswick': 'NB',
    'Quebec': 'QC',
    'Ontario': 'ON',
    'Manitoba': 'MB',
    'Saskatchewan': 'SK',
    'Alberta': 'AB',
    'British Columbia': 'BC',
    'Canada': 'FJ'
}

# Map the province names
unemployment_rates['Mapped Province'] = unemployment_rates['Province'].map(province_mapping)


# Create an expanded minimum wage DataFrame
# I only need month and year
# Create a date range from the earliest to the latest date in the unemployment dataset
date_range = pd.date_range(start=unemployment_rates['Month-Year'].min(), end=unemployment_rates['Month-Year'].max(), freq='MS')


expanded_rows = []

for province in minimum_wage_data['Jurisdiction'].unique():
    province_data = minimum_wage_data[minimum_wage_data['Jurisdiction'] == province].sort_values('Effective Date')
    for date in date_range:
        if not province_data[province_data['Effective Date'] <= date].empty:
            valid_wage = province_data[province_data['Effective Date'] <= date]['Minimum Wage'].iloc[-1]
            #it selects the last minimum wage that were effective on or before the current date
        else:
            valid_wage = None
        expanded_rows.append([province, date, valid_wage])

# Create the expanded DataFrame
expanded_min_wage = pd.DataFrame(expanded_rows, columns=['Province', 'Month-Year', 'Minimum Wage'])


# Format the 'Month-Year' column to match the format in the unemployment data
unemployment_rates['Month-Year'] = unemployment_rates['Month-Year'].dt.strftime('%b-%Y')
expanded_min_wage['Month-Year'] = expanded_min_wage['Month-Year'].dt.strftime('%b-%Y')


In [38]:
# Merge the datasets again, ensuring correct alignment
merged_data = pd.merge(unemployment_rates, expanded_min_wage, left_on=['Mapped Province', 'Month-Year'], right_on=['Province', 'Month-Year'], how='left')

merged_data.head(200)

Unnamed: 0,Province_x,Month-Year,Unemployment Rate,Mapped Province,Province_y,Minimum Wage
0,Canada,Jan-1976,7.1,FJ,FJ,$2.60
1,Canada,Feb-1976,7.0,FJ,FJ,$2.60
2,Canada,Mar-1976,6.7,FJ,FJ,$2.60
3,Canada,Apr-1976,6.8,FJ,FJ,$2.90
4,Canada,May-1976,6.9,FJ,FJ,$2.90
...,...,...,...,...,...,...
195,Canada,Apr-1992,10.7,FJ,FJ,$4.00
196,Canada,May-1992,10.9,FJ,FJ,$4.00
197,Canada,Jun-1992,11.4,FJ,FJ,$4.00
198,Canada,Jul-1992,11.3,FJ,FJ,$4.00


In [39]:
merged_data = merged_data.drop(columns=['Mapped Province', 'Province_y'])
# I need to delete the columns I used to map
merged_data.head(200)

Unnamed: 0,Province_x,Month-Year,Unemployment Rate,Minimum Wage
0,Canada,Jan-1976,7.1,$2.60
1,Canada,Feb-1976,7.0,$2.60
2,Canada,Mar-1976,6.7,$2.60
3,Canada,Apr-1976,6.8,$2.90
4,Canada,May-1976,6.9,$2.90
...,...,...,...,...
195,Canada,Apr-1992,10.7,$4.00
196,Canada,May-1992,10.9,$4.00
197,Canada,Jun-1992,11.4,$4.00
198,Canada,Jul-1992,11.3,$4.00


In [40]:
merged_data.to_csv('team_project/data/raw/merged_data.csv', index=False)