In [1]:
import matplotlib
import pandas as pd
import matplotlib.pyplot as plt
import geopandas as gpd
import os
from matplotlib.animation import FuncAnimation
from matplotlib.colors import Normalize

matplotlib.use("tkagg")


In [2]:
#cleaning
df = pd.read_csv("masterdata.csv")
df.drop('Unnamed: 0', axis=1, inplace=True)
df["Date"] = pd.to_datetime(df["Date"])
df = df.sort_values(["Year", "Month", "Organisation name"])
df["PostDist"] = df["zipcode"].str.split(" ").str[0]
df["PostArea"] = df["PostDist"].str.replace(r'\d+', '', regex=True)
df

Unnamed: 0,Organisation code,Year,Month,Organisation name,Community-onset,"Community-onset, community associated","Community-onset, healthcare associated","Community-onset, indeterminate association",Hospital-onset,"Hospital-onset, healthcare associated",Total cases,Unknown,zipcode,Date,PostDist,PostArea
16016,REM,2018.0,1.0,Aintree University Hospital,14.0,0.0,0.0,0.0,10.0,0.0,24.0,0.0,L7 8XP,2018-01-01,L7,L
15108,RCF,2018.0,1.0,Airedale,13.0,0.0,0.0,0.0,4.0,0.0,17.0,0.0,BD20 6TD,2018-01-01,BD20,BD
14434,RBS,2018.0,1.0,Alder Hey Children's,3.0,0.0,0.0,0.0,1.0,0.0,4.0,0.0,L12 2AP,2018-01-01,L12,L
22594,RTK,2018.0,1.0,Ashford & St Peter's Hospitals,13.0,0.0,0.0,0.0,2.0,0.0,15.0,0.0,KT16 0PZ,2018-01-01,KT16,KT
16414,RF4,2018.0,1.0,"Barking, Havering & Redbridge University Hospi...",36.0,0.0,0.0,0.0,9.0,0.0,45.0,0.0,RM7 0AG,2018-01-01,RM7,RM
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2873,02T,2024.0,5.0,NHS WEST YORKSHIRE ICB - 02T,7.0,7.0,0.0,0.0,0.0,0.0,7.0,0.0,HX1 1PW,2024-05-01,HX1,HX
3830,03R,2024.0,5.0,NHS WEST YORKSHIRE ICB - 03R,18.0,14.0,4.0,0.0,3.0,3.0,21.0,0.0,WF1 1LT,2024-05-01,WF1,WF
9562,15F,2024.0,5.0,NHS WEST YORKSHIRE ICB - 15F,39.0,35.0,4.0,0.0,14.0,14.0,53.0,0.0,LS16 6EB,2024-05-01,LS16,LS
10126,36J,2024.0,5.0,NHS WEST YORKSHIRE ICB - 36J,36.0,31.0,5.0,0.0,10.0,10.0,46.0,0.0,BD1 4AS,2024-05-01,BD1,BD


In [3]:
plt.style.use("ggplot")
fig, ax = plt.subplots(figsize=(14,9))

df_eng = df.loc[df["Organisation code"] == "ENG"]
lines = [ax.plot(df_eng["Date"], df_eng["Community-onset"], label="Community-onset case count", color="blue")[0],
         ax.plot(df_eng["Date"], df_eng["Hospital-onset"], label="Hospital-onset case count", color="red")[0]]

ax.set_title("Trend in the cases of E. coli bacteraemia in England, by financial year: 2018-2024")
ax.set_xlabel("Year")
ax.set_ylabel("Count")
ax.legend()

plt.tight_layout()
plt.show()

In [4]:
uk = gpd.read_file(os.getcwd()+ "\\uk-shapefile\\PostalArea.shp")
crs = {'init':'epsg:4326'}

df = df.loc[df["Organisation code"] != "ENG"]

postal_areas_to_remove = [
    'ZE', 'KW', 'IV', 'HS', 'PH', 'AB', 'DD', 'PA', 'FK', 'G', 'KY', 'KA', 'DG', 'TD', 'EH', 'ML',
    'LL', 'SY', 'LD', 'HR', 'NP', 'CF', 'SA', 'BT'
]

# Removing rows with specified postal areas
eng = uk[~uk['PostArea'].isin(postal_areas_to_remove)]
grouped = df.groupby('PostArea')['Community-onset'].sum().reset_index()

eng_geo = eng.merge(grouped, on="PostArea", how="left")
fig, ax = plt.subplots(figsize=(20,20))
eng_geo.plot(ax=ax, column="Community-onset", linewidth=0.01, edgecolor="black", cmap="Purples", legend=True,legend_kwds={'label': 'Community-onset Case Count', 'orientation': 'vertical'}, missing_kwds={'color': 'lightgrey'})

plt.title('Community-onset Case Count', fontsize=16)
plt.axis("off")
plt.margins(0,0)
plt.gca().xaxis.set_major_locator(plt.NullLocator())
plt.gca().yaxis.set_major_locator(plt.NullLocator())
# plt.savefig("test.png",bbox_inches='tight', dpi=750)
plt.show()

In [6]:
grouped = df.groupby('PostArea')['Total cases'].sum().reset_index()

eng_geo = eng.merge(grouped, on="PostArea", how="left")
fig, ax = plt.subplots(figsize=(20,20))
eng_geo.plot(ax=ax, column="Total cases", linewidth=0.01, edgecolor="black", cmap="Purples", legend=False,legend_kwds={'label': 'Total Case Count', 'orientation': 'vertical'}, missing_kwds={'color': 'lightgrey'})


plt.title('Total Case Count', fontsize=16)
plt.axis("off")
plt.margins(0,0)
plt.gca().xaxis.set_major_locator(plt.NullLocator())
plt.gca().yaxis.set_major_locator(plt.NullLocator())
# plt.savefig("test2.png",bbox_inches='tight', dpi=750)
plt.show()

In [8]:
df_grouped = df.groupby(['PostArea', 'Date'])['Total cases'].sum().reset_index()
df_grouped['Cumulative_Cases'] = df_grouped.groupby('PostArea')['Total cases'].cumsum()

gdf = uk[~uk['PostArea'].isin(postal_areas_to_remove)]

# Merge the geodataframe with the grouped dataframe
merged_gdf = gdf.merge(df_grouped, on='PostArea', how='left')

# Create a figure and axis
fig, ax = plt.subplots(figsize=(15, 20))

# Set up the plot
ax.set_title('Cumulative E. coli Cases in England')
ax.axis('off')

# Function to update the plot for each frame
def update(date):
    ax.clear()
    ax.set_title(f'Cumulative E. coli Cases in England as of {date}')
    ax.axis('off')

    # Filter data up to the current date    
    current_data = merged_gdf[merged_gdf['Date'] <= date]

    # Plot the data
    current_data.plot(column='Cumulative_Cases', ax=ax, legend=True, vmin=current_data['Cumulative_Cases'].min(),                       vmax=current_data['Cumulative_Cases'].max(), cmap='YlOrRd', 
                     norm=Normalize(vmin=current_data['Cumulative_Cases'].min(), vmax=current_data['Cumulative_Cases'].max()),
                     missing_kwds={'color': 'lightgrey'})

# Get unique dates for animation
dates = sorted(df_grouped['Date'].unique())

# Create the animation
animation = FuncAnimation(fig, update, frames=dates, interval=200)
animation.save('ecoli_cases_animation.gif', writer='pillow', fps=5)

plt.close(fig)