In [25]:
import geopandas as gpd
import numpy as np
import pandas as pd
import os

In [4]:
DMA_shp_path = r"D:\qing_research\1_flood_water\1_flood_water_network\data\Geo\DMA\DMA shape file\DMA.shp"
floods_shp_2020_path = r"D:\qing_research\1_flood_water\1_flood_water_network\data\Geo\flood\1_extract_flood_inDMA\flood_map\2020.shp"
DMA_mapping_path = r"D:\qing_research\1_flood_water\1_flood_water_network\data\Anglian_water\processed\DMA_Mapping.csv"

In [5]:
DMA_shp = gpd.read_file(DMA_shp_path)
floods_shp_2020 = gpd.read_file(floods_shp_2020_path)
DMA_shp=DMA_shp.to_crs(epsg = 3857)
floods_shp_2020 = floods_shp_2020.to_crs(epsg=3857)

In [6]:
overlap_2020 = gpd.overlay(DMA_shp,floods_shp_2020,how='intersection')
overlap_2020 = overlap_2020.drop_duplicates().reset_index(drop = True)

In [7]:
DMA_mapping = pd.read_csv(DMA_mapping_path)

In [9]:
DMA_counts = overlap_2020.groupby('flood_id').size().reset_index(name='dma_count')
DMA_counts

Unnamed: 0,flood_id,dma_count
0,8,1
1,59,1
2,228,4
3,229,3
4,328,3
5,329,2
6,330,1
7,331,1
8,332,1
9,333,1


In [11]:
cause_num_by_floods = overlap_2020.groupby('flood_caus').size().reset_index(name='num')
cause_num_by_floods

Unnamed: 0,flood_caus,num
0,channel capacity exceeded (no raised defences),12
1,local drainage/surface water,1
2,overtopping of defences,7
3,unknown,1


In [12]:
floods_counts = overlap_2020.groupby('DMASHORTNA').size().reset_index(name='floods_count')
floods_counts

Unnamed: 0,DMASHORTNA,floods_count
0,ALLINGMA,2
1,BRIGSVMA,2
2,CLAYPLMA,2
3,DEENEDMA,1
4,DISSS1MA,1
5,EVEMISMA,2
6,HOUMARMA,1
7,KENNI4MA,1
8,KENNI5MA,1
9,KPYTCHMA,1


In [13]:
def get_season(month):
    if month in [12, 1, 2]:
        return 'Winter'
    elif month in [3, 4, 5]:
        return 'Spring'
    elif month in [6, 7, 8]:
        return 'Summer'
    else:
        return 'Autumn'
overlap_2020['start_date'] = pd.to_datetime(overlap_2020['start_date'])
floods_counts['season'] = overlap_2020['start_date'].dt.month.apply(get_season)

# Group by 'floods_count' and 'season'
floods_num_by_season = floods_counts.groupby(['floods_count', 'season']).size().reset_index(name='num_dma')

# Display the result
floods_num_by_season

Unnamed: 0,floods_count,season,num_dma
0,1,Winter,8
1,2,Winter,5
2,3,Winter,1


In [16]:
floods_num_by_date = floods_shp_2020.groupby(['start_date', 'end_date']).size().reset_index(name='counts_date')

In [17]:
floods_num_by_date

Unnamed: 0,start_date,end_date,counts_date
0,2020-02-08,2020-02-14,2
1,2020-02-16,2020-02-24,2
2,2020-12-23,2020-12-23,3
3,2020-12-23,2020-12-24,5
4,2020-12-23,2020-12-30,1


In [15]:
dma_per_flood = overlap_2020.groupby('flood_id')['DMASHORTNA'].apply(list).reset_index(name='DMA_ids')
dma_per_flood

Unnamed: 0,flood_id,DMA_ids
0,8,[EVEMISMA]
1,59,[EVEMISMA]
2,228,"[CLAYPLMA, ALLINGMA, HOUMARMA, LNGBENMA]"
3,229,"[CLAYPLMA, ALLINGMA, LNGBENMA]"
4,328,"[KENNI4MA, DISSS1MA, KENNI5MA]"
5,329,"[DEENEDMA, SOUTHWMA]"
6,330,[WDFRD1MA]
7,331,[BRIGSVMA]
8,332,[BRIGSVMA]
9,333,[WROWRDMA]


In [16]:
dma_name_to_id = dict(zip(DMA_mapping['DMA_Name'], DMA_mapping['DMA_ID']))
dma_per_flood['DMA_ids'] = dma_per_flood['DMA_ids'].apply(
    lambda x: [dma_name_to_id[dma] for dma in x if dma in dma_name_to_id]
)
flood_shp_2020 = floods_shp_2020.merge(dma_per_flood,on='flood_id',how='left')

In [22]:
flood_shp_2020.drop(columns = ['rec_out_id','rec_grp_id','fm_status','hfm_status','data_prov','data_qual','year'],inplace = True)

In [23]:
flood_shp_2020

Unnamed: 0,name,start_date,end_date,flood_src,flood_caus,data_src,fluvial_f,coastal_f,tidal_f,flood_id,geometry,DMA_ids
0,2020 February Flood Incident - Storm Ciara,2020-02-08,2020-02-14,main river,channel capacity exceeded (no raised defences),Satellite - Radar,True,False,False,8,"POLYGON Z ((-110913.298 7064453.303 0, -110929...",[579]
1,2020 February Flood Incident - Storm Ciara,2020-02-08,2020-02-14,main river,channel capacity exceeded (no raised defences),Satellite - Radar,True,False,False,59,"MULTIPOLYGON Z (((-112400.716 7063479.888 0, -...",[579]
2,LNA_2020_02_River Witham,2020-02-16,2020-02-24,main river,overtopping of defences,Visual,True,False,False,228,"MULTIPOLYGON Z (((-83865.128 6981726.578 0, -8...","[381, 14, 1046]"
3,LNA_2020_02_River Witham,2020-02-16,2020-02-24,main river,overtopping of defences,Visual,True,False,False,229,"MULTIPOLYGON Z (((-82616.734 6980900.966 0, -8...","[381, 14, 1046]"
4,River Waveney Flooding 2020,2020-12-23,2020-12-30,main river,channel capacity exceeded (no raised defences),Satellite - Optical,True,False,False,328,"MULTIPOLYGON Z (((112285.451 6868380.189 0, 11...","[940, 502, 941]"
5,LNA_2020_December_Southwick Brook,2020-12-23,2020-12-24,main river,channel capacity exceeded (no raised defences),Visual,True,False,False,329,"POLYGON Z ((-55793.647 6894125.628 0, -55781.3...","[473, 1586]"
6,LNA_2020_December_Harpers Brook_Sudborough,2020-12-23,2020-12-24,main river,channel capacity exceeded (no raised defences),Visual,True,False,False,330,"POLYGON Z ((-64960.158 6877934.908 0, -64950.4...",[1885]
7,LNA_2020_December_Harpers Brook_Brigstock,2020-12-23,2020-12-24,main river,channel capacity exceeded (no raised defences),Visual,True,False,False,331,"MULTIPOLYGON Z (((-67961.415 6883426.326 0, -6...",[266]
8,LNA_2020_December_Harpers Brook_Brigstock,2020-12-23,2020-12-24,drainage,local drainage/surface water,Other,False,False,False,332,"POLYGON Z ((-67655.038 6882969.348 0, -67675.8...",[266]
9,LNA_2020_December_Wootton Brook_Collingtree Park,2020-12-23,2020-12-23,main river,channel capacity exceeded (no raised defences),Visual,True,False,False,333,"POLYGON Z ((-100685.782 6836592.814 0, -100642...",[2004]


In [38]:
output_dir = r"D:\qing_research\1_flood_water\1_flood_water_network\data\Geo\flood\2_extract_flood_date_waterflow\2020"
water_flow_path = r"D:\qing_research\1_flood_water\1_flood_water_network\data\Anglian_water\processed\DMA_flow"

In [34]:
flood_shp_2020['start_date'] = pd.to_datetime(flood_shp_2020['start_date'])
flood_shp_2020['end_date'] = pd.to_datetime(flood_shp_2020['end_date'])

In [40]:
for _, flood_row in flood_shp_2020.iterrows():
    dma_ids = flood_row['DMA_ids']
    for dma_id in dma_ids:
        dma_file = f"Flow_DMA_{dma_id}.csv"
        dma_file_path = os.path.join(water_flow_path,dma_file)
        dma_data = pd.read_csv(dma_file_path)
        dma_data['Date_Time'] = pd.to_datetime(dma_data['Date_Time'])
        
        expanded_startdate = flood_row['start_date'] - pd.Timedelta(days = 7)
        expanded_enddate = flood_row['end_date'] + pd.Timedelta(days = 7)
        extracted_data = dma_data[(dma_data['Date_Time']>=expanded_startdate) & (dma_data['Date_Time']<=expanded_enddate)]

        output_file_name = f"flood_{flood_row['flood_id']}_{flood_row['start_date'].strftime('%Y%m%d')}_{flood_row['end_date'].strftime('%Y%m%d')}_{dma_id}.csv"
        output_path = os.path.join(output_dir,output_file_name)
        extracted_data.to_csv(output_path,index=False)

In [42]:
flood_shp_2020['start_date'] = flood_shp_2020['start_date'].dt.date
flood_shp_2020['end_date'] = flood_shp_2020['end_date'].dt.date

In [44]:
flood_shp_2020.to_file(fr"D:\qing_research\1_flood_water\1_flood_water_network\data\Geo\flood\2_extract_flood_date_waterflow\2020\flood_shp_2020.shp", driver = "ESRI Shapefile")