In [15]:
import pandas as pd

In [16]:
# Load the data files
file1_path = "Data/Revised/Geography/Lookup/11-21-MSOA.csv"
file2_path = "Data/Revised/Geography/MSOA/2011.csv"
file3_path = "Data/Revised/Geography/MSOA/2021.csv"

# Reading the CSV files
file1 = pd.read_csv(file1_path)
file2 = pd.read_csv(file2_path)
file3 = pd.read_csv(file3_path)

# Merging the first file with 2011 data based on 'MSOA11CD'
merged_data_2011 = file1.merge(file2[['MSOA11CD', 'area_2011']], on='MSOA11CD', how='left')

# Merging the result with 2021 data based on 'msoa21cd'
merged_final_data = merged_data_2011.merge(file3[['msoa21cd', 'area_2021']], left_on='MSOA21CD', right_on='msoa21cd', how='left')

# Dropping the redundant 'msoa21cd' column after merging
merged_final_data.drop(columns=['msoa21cd'], inplace=True)

# Calculating the ratio of area_2011 to area_2021 and adding it as a new column 'Ratio'
merged_final_data['Ratio'] = (merged_final_data['area_2021'] / merged_final_data['area_2011']).round(2)

# Display the updated dataframe
print(merged_final_data)

       MSOA11CD                  MSOA11NM   MSOA21CD  \
0     E02000001        City of London 001  E02000001   
1     E02000002  Barking and Dagenham 001  E02000002   
2     E02000003  Barking and Dagenham 002  E02000003   
3     E02000004  Barking and Dagenham 003  E02000004   
4     E02000005  Barking and Dagenham 004  E02000005   
...         ...                       ...        ...   
997   E02000371               Hackney 027  E02007111   
998   E02000891         Tower Hamlets 028  E02007112   
999   E02000891         Tower Hamlets 028  E02007113   
1000  E02000891         Tower Hamlets 028  E02007114   
1001  E02000189                Camden 024  E02007115   

                      MSOA21NM    area_2011    area_2021  Ratio  
0           City of London 001  2906464.063  3150817.013   1.08  
1     Barking and Dagenham 001  2166190.858  2161596.596   1.00  
2     Barking and Dagenham 002  2144114.309  2141548.639   1.00  
3     Barking and Dagenham 003  2490806.735  2492938.224   1.00

In [21]:
# Load the IMD 2019 data
imd_2019_path = "Data/Revised/Migration/IMD_2019.csv"
imd_2019_data = pd.read_csv(imd_2019_path)

# Merging the existing data with the IMD 2019 data based on 'MSOA11CD'
merged_with_score = merged_final_data.merge(imd_2019_data[['MSOA11CD', 'Score']], on='MSOA11CD', how='left')

# Renaming the 'Score' column to 'Score_2019'
merged_with_score.rename(columns={'Score': 'Score_2019'}, inplace=True)

# Calculating the ratio of area_2021 to area_2011
merged_with_score['Ratio'] = (merged_with_score['area_2021'] / merged_with_score['area_2011']).round(2)

# Calculating the product of 'Ratio' and 'Score_2019', rounded to two decimal places
merged_with_score['Score'] = (merged_with_score['Ratio'] * merged_with_score['Score_2019']).round(2)

# Calculating Decile rankings based on the 'Score' column, with 1 being the highest score
merged_with_score['Decile'] = pd.qcut(merged_with_score['Score'], 10, labels=range(10, 0, -1))

# Converting the Decile values to integers
merged_with_score['Decile'] = merged_with_score['Decile'].astype(int)

# Display the updated dataframe
print(merged_with_score)

       MSOA11CD                  MSOA11NM   MSOA21CD  \
0     E02000001        City of London 001  E02000001   
1     E02000002  Barking and Dagenham 001  E02000002   
2     E02000003  Barking and Dagenham 002  E02000003   
3     E02000004  Barking and Dagenham 003  E02000004   
4     E02000005  Barking and Dagenham 004  E02000005   
...         ...                       ...        ...   
997   E02000371               Hackney 027  E02007111   
998   E02000891         Tower Hamlets 028  E02007112   
999   E02000891         Tower Hamlets 028  E02007113   
1000  E02000891         Tower Hamlets 028  E02007114   
1001  E02000189                Camden 024  E02007115   

                      MSOA21NM    area_2011    area_2021  Ratio  Score_2019  \
0           City of London 001  2906464.063  3150817.013   1.08       13.71   
1     Barking and Dagenham 001  2166190.858  2161596.596   1.00       34.72   
2     Barking and Dagenham 002  2144114.309  2141548.639   1.00       25.10   
3     Barki

In [22]:
# Selecting only the specified columns
final_data = merged_with_score[['MSOA21CD', 'MSOA21NM', 'Score', 'Decile']]

# Exporting the selected data to a new CSV file
final_data.to_csv('Data/Revised/Migration/IMD_2021.csv', index=False)