# Imports

In [62]:
import pandas as pd
import re
import datetime
import numpy as np
from tqdm import tqdm

from disaster_extr_helpers import *

In [63]:
data = 'data/emdat_processed.csv'
parse_dates = ['StartDate', 'EndDate']
df_emdat = pd.read_csv(data, index_col="Dis No", parse_dates = parse_dates)

In [64]:
australia_heat_wave = { 
    'Group': 'Natural',
    'Subgroup':'Meteorological', 
    'Type':'Extreme temperature ', 
    'Subtype':'Heat wave', 
    'Subsubtype': 'NaN',
    'Name':'NaN', 
    'Country':'Australia', 
    'ISO':'AUS', 
    'Region': 'Australia and New Zealand',
    'Continent': 'Oceania',
    'Origin':'NaN', 
    'Magnitude':48.2, 
    'Scale':'°C', 
    'Deaths': 0,
    'Injured':0, 
    'Affected':0,
    'Homeless':0,
    'TotalAffected':0, 
    'Damages':0, 
    'StartDate':'2017-01-30', 
    'EndDate': '2017-02-14',
    'Duration':15
}
# Not necessary, row already added to csv file
#row_series = pd.Series(data=australia_heat_wave, name='2017-9999-AUS')
#df_emdat = df_emdat.append(row_series, ignore_index=False)

In [65]:
df_emdat.loc['2017-9999-AUS']

Group                              Natural
Subgroup                    Meteorological
Type                  Extreme temperature 
Subtype                          Heat wave
Subsubtype                             NaN
Name                                   NaN
Country                          Australia
ISO                                    AUS
Region           Australia and New Zealand
Continent                          Oceania
Origin                                 NaN
Magnitude                             48.2
Scale                                   °C
Deaths                                 0.0
Injured                                0.0
Affected                               0.0
Homeless                               0.0
TotalAffected                          0.0
Damages                                0.0
StartDate              2017-01-30 00:00:00
EndDate                2017-02-14 00:00:00
Duration                                15
Name: 2017-9999-AUS, dtype: object

In [66]:
STORMS = {
    '2015': ['2015-0470-MEX'],
    '2016': ['2016-0041-FJI'],
    '2017': ['2017-0362-USA'],
    '2018': ['2018-0342-USA', '2018-0341-CHN', '2018-0341-PHL', '2018-0341-HKG'], # two separate storms over same time period
    '2019': ['2019-0492-JPN'],
    #'2020': ['2020-0211-LKA', '2020-0211-BGD', '2020-0211-IND'] # can't use because in May 2020
}

STORMS_val = {
    '2018-0341-CHN': {'Magnitude': 240},
    '2018-0342-USA': {'Magnitude': 240, 'Damages': 24000000},
    #'2020-0211-BGD': {'Magnitude': 151},
    #'2020-0211-LKA': {'Magnitude': 80},
    '2018-0341-PHL': {'Deaths': 127, 'Damages': 628000},
    '2018-0341-CHN': {'Deaths': 6, 'Damages': 1990000},
    '2018-0341-HKG': {'Damages': 930000},
    '2017-0362-USA': {'Deaths': 106, 'Damages': 125000000},
    '2016-0041-FJI': {'Damages': 1400000},
    '2015-0470-MEX': {'Magnitude': 345, 'Damages': 462000}
}

STORMS_2020 = {
    '2020': ['2019-0573-PHL']
}

STORMS_2020_val = {}

HEAT_WAVES = {
    '2015': ['2015-0189-IND'],
    '2016': ['2016-0133-IND'],
    '2017': ['2017-9999-AUS', '2017-0072-AUS'], # heat wave and associated fire
    '2018': ['2018-0226-JPN', '2018-0256-PRK'],
    '2019': ['2019-0366-BEL', '2019-0366-FRA', '2019-0366-NLD', '2019-0366-DEU'], #'2019-0366-AUT' (no value for temp), '2019-0650-GBR' (lasts too long)
  #  '2020': ['2019-0545-AUS'] # Forest fire (can't use '2020-0441-USA' because in august)
}

HEAT_WAVES_2020 = {
    '2020': ['2019-0545-AUS'] # Forest fire (can't use '2020-0441-USA' because in august)
}
HEAT_WAVES_2020_val = {
    #'2020-0441-USA': {'Magnitude': 4180},
    '2019-0545-AUS': {'Magnitude': 186360}
}

HEAT_WAVES_val = {
    #'2020-0441-USA': {'Magnitude': 4180},
    '2017-0072-AUS': {'Magnitude': 550},
    '2018-0256-PRK': {'Deaths': 42},
    #'2019-0545-AUS': {'Magnitude': 186360}
}

In [67]:
df_heat = get_df_disaster(df_emdat, HEAT_WAVES, HEAT_WAVES_val)

In [68]:
df_heat[['Magnitude', 'Scale', 'Name', 'StartDate', 'EndDate','Deaths','Damages']]

Unnamed: 0_level_0,Magnitude,Scale,Name,StartDate,EndDate,Deaths,Damages
Dis No,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2015-0189-IND,48.0,°C,,2015-05-20,2015-05-31,2248.0,0.0
2016-0133-IND,51.0,°C,,2016-04-01,2016-05-20,300.0,0.0
2017-9999-AUS,48.2,°C,,2017-01-30,2017-02-14,0.0,0.0
2017-0072-AUS,550.0,Km2,Sir Ivan fire,2017-02-09,2017-02-13,0.0,20000.0
2018-0226-JPN,41.0,°C,,2018-07-01,2018-07-15,119.0,0.0
2018-0256-PRK,38.0,°C,,2018-07-11,2018-08-03,42.0,0.0
2019-0366-BEL,41.0,°C,,2019-07-19,2019-07-27,400.0,0.0
2019-0366-FRA,44.0,°C,,2019-07-21,2019-07-27,868.0,0.0
2019-0366-NLD,40.0,°C,,2019-07-22,2019-07-27,400.0,0.0
2019-0366-DEU,42.0,°C,,2019-07-24,2019-07-25,0.0,0.0


In [69]:
df_heat_bounds = retrieve_bounding_dates(df_heat)

In [70]:
df_heat_bounds

Unnamed: 0_level_0,MinStartDate,MaxEndDate
Year,Unnamed: 1_level_1,Unnamed: 2_level_1
2015,2015-05-20,2015-05-31
2016,2016-04-01,2016-05-20
2017,2017-01-30,2017-02-14
2018,2018-07-01,2018-08-03
2019,2019-07-19,2019-07-27


In [71]:
df_heat_2020 = get_df_disaster(df_emdat, HEAT_WAVES_2020, HEAT_WAVES_2020_val)

In [72]:
df_heat_2020_bounds = retrieve_bounding_dates(df_heat_2020)

In [73]:
df_heat_2020_bounds

Unnamed: 0_level_0,MinStartDate,MaxEndDate
Year,Unnamed: 1_level_1,Unnamed: 2_level_1
2019,2019-09-01,2020-02-01


In [74]:
df_storm = get_df_disaster(df_emdat, STORMS, STORMS_val)

In [75]:
df_storm[['Magnitude', 'Scale', 'Name', 'StartDate', 'EndDate','Deaths', 'Damages', 'Injured','TotalAffected']]

Unnamed: 0_level_0,Magnitude,Scale,Name,StartDate,EndDate,Deaths,Damages,Injured,TotalAffected
Dis No,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2015-0470-MEX,345.0,Kph,Hurricane Patricia,2015-10-22,2015-10-28,14.0,462000.0,0.0,15000.0
2016-0041-FJI,325.0,Kph,Cyclone Winston,2016-02-20,2016-02-21,45.0,1400000.0,144.0,540558.0
2017-0362-USA,215.0,Kph,Hurricane Harvey,2017-08-25,2017-08-29,106.0,125000000.0,24.0,582024.0
2018-0341-CHN,0.0,Kph,Typhoon Mangkut (Ompong),2018-09-10,2018-09-18,6.0,1990000.0,0.0,0.0
2018-0342-USA,240.0,Kph,Hurricane Florence,2018-09-12,2018-09-18,53.0,24000000.0,0.0,1500000.0
2018-0341-PHL,240.0,Kph,Typhoon Mangkut (Ompong),2018-09-16,2018-09-16,127.0,628000.0,138.0,3800138.0
2018-0341-HKG,240.0,Kph,Typhoon Mangkut (Ompong),2018-09-17,2018-09-17,0.0,930000.0,300.0,300.0
2019-0492-JPN,160.0,Kph,Tropical cylone 'Hagibis',2019-10-12,2019-10-17,99.0,17000000.0,470.0,390470.0


In [76]:
df_storm_bounds = retrieve_bounding_dates(df_storm)

In [77]:
df_storm_bounds

Unnamed: 0_level_0,MinStartDate,MaxEndDate
Year,Unnamed: 1_level_1,Unnamed: 2_level_1
2015,2015-10-22,2015-10-28
2016,2016-02-20,2016-02-21
2017,2017-08-25,2017-08-29
2018,2018-09-10,2018-09-18
2019,2019-10-12,2019-10-17


In [78]:
df_storm_2020 = get_df_disaster(df_emdat, STORMS_2020, STORMS_2020_val)

In [79]:
df_storm_2020

Unnamed: 0_level_0,Group,Subgroup,Type,Subtype,Subsubtype,Name,Country,ISO,Region,Continent,...,Scale,Deaths,Injured,Affected,Homeless,TotalAffected,Damages,StartDate,EndDate,Duration
Dis No,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-0573-PHL,Natural,Meteorological,Storm,Tropical cyclone,,Tropical cyclone 'Kammuri' (Tisoy),Philippines (the),PHL,South-Eastern Asia,Asia,...,Kph,4.0,318.0,2305075.0,342165.0,2647558.0,109151.0,2019-12-02,2019-12-03,1


In [80]:
df_storm_2020_bounds = retrieve_bounding_dates(df_storm_2020)

In [81]:
df_storm_2020_bounds

Unnamed: 0_level_0,MinStartDate,MaxEndDate
Year,Unnamed: 1_level_1,Unnamed: 2_level_1
2019,2019-12-02,2019-12-03


In [82]:
# General tags for heat waves
heat_tags = pd.DataFrame(
    {'tags': [
        r'\b([dD]egrees)\b',
        r'\b([fF]ahrenheit)\b',
        r'\b(°[fF])\b',
        r'\b(°[cC])\b',
        r'\b([cC]elsius)\b',
        r'\b([mM]ercury (rose|hit))\b',
        #r'\b(waves?)\b', too broad
        r'\b([hH]eat(ing)?)\b',
        r'\b([tT]emperatures?)\b',
        r'\b([hH]ot(test|ter)?)\b',
        r'\b([wW]arm(er|est)?)\b',
        r'\b(([eE]xtreme|[vV]olatile) [wW]eather)\b',
        r'\b([hH]eatstrokes?)\b',
        r'\b([hH]eatwaves?)\b',
        r'\b([hH]eatstorms?)\b',
        r'\b([wW]orld [mM]eteorological [oO]rganisation)\b',
        r'\b(WMO)\b']
    }
)

# Indian Heat Wave 2015
heat_tags_2015 = pd.DataFrame(
    {'tags': [
        r'\b([aA]ndhra [pP]radesh)\b',
        r'\b([tT]elangana)\b',
        r'\b([pP]unjab)\b',
        r'\b([oO]disha)\b',
        r'\b([kK]hammam)\b',
        r'\b([jJ]harsuguda)\b',
        r'\b([hH]yderabad)\b',
        r'\b([iI]ndia [mM]eteorological [dD]epartment)\b']
    }
)

# Indian Heat Wave 2016
heat_tags_2016 = pd.DataFrame(
    {'tags': [
        r'\b([pP]halodi)\b',
        r'\b([iI]ndia [mM]eteorological [dD]epartment)\b']
    }
)

# Sir Ivan Fire (caused by heatwave) in Australia
heat_tags_2017 = pd.DataFrame(
    {'tags': [
        r'\b([bB]ureau [oF]f [mM]eteorology)\b',
        r'\b(New South Wales)\b',
        r'\b(Pilliga)\b',
        r'\b(Talleganda)\b',
        r'\b(Queensland Ambulance Service)\b',
        r'\b([wW]ildfires?)\b',
        r'\b([oO]range sk(y|ies))\b',
        r'\b([sS]moke clouds?)\b',
        r'\b(([bB]ush)?[fF]ires?)\b',
        r'\b([mM]egafires?)\b',
        r'\b([bB]urning forests?)\b',
        r'\b(Taree)\b',
        r'\b(Ivanhoe)\b']
    }
)

# Japan and Korea Heat Wave
heat_tags_2018 = pd.DataFrame(
    {'tags': [
        r'\b([kK]umagaya)\b',
        r'\b([jJ]apan [mM]eteorological [aA]gency)\b',
        r'\b([sS]henyang)\b',
        r'\b([tT]okyo [fF]ire [dD]epartment)\b',
        r'\b([gG]angneung)\b',
        r'\b([hH]ayang)\b']
    }
)

# Europe Heat Wave
heat_tags_2019 = pd.DataFrame(
    {'tags': [
        r'\b([aA]ngleur)\b',
        r'\b([bB]egijnendijk)\b',
        r'\b([dD]oksany)\b',
        r'\b([pP]orvoo)\b',
        r'\b([mM][ée]t[ée]o [fF]rance)\b',
        r'\b([gG]allargues-le-[mM]ontueux)\b',
        r'\b([bB]erlin[- ][tT]empelhof)\b',
        r'\b([bB]randenburg)\b',
        r'\b(Lingen)\b',
        r'\b([mM]eteolux)\b',
        r'\b([sS]teinsel)\b',
        r'\b(KNMI|knmi|[rR]oyal [dD]utch [mM]eteorological [iI]nstitute)\b',
        r'\b([gG]elderland)\b',
        r'\b([sS]altdal)\b',
        r'\b([nN]orwegian [mM]eteorological [iI]nstitute)\b',
        r'\b([zZ]aragoza)\b',
        r'\b([oO]skarshamn)\b',
        r'\b([sS]wedish [mM]eteorological and [hH]ydrological [iI]nstitute)\b',
        r'\b([mM]eteo[sW]wiss)\b',
        r'\b([cC]ambridge [uU]niversity [bB]otanic [gG]arden)\b']
    }
)

# 2019–20 Australian bushfire season
heat_tags_2020 = pd.DataFrame(
    {'tags': [
        r'\b([wW]ildfires?)\b',
        r'\b([oO]range sk(y|ies))\b',
        r'\b([sS]moke clouds?)\b',
        r'\b(([bB]ush)?[fF]ires?)\b',
        r'\b([mM]egafires?)\b',
        r'\b([bB]urning forests?)\b',
        r'\b(Black Summer)\b']
    }
)

# General tags for storms
storm_tags = pd.DataFrame(
    {'tags': [
        #r'\b([tT]ropical [sS]torms?)\b',
        #r'\b([cC]yclones?)\b',
        #r'\b([tT]yphoons?)\b',   # Maybe only include for typhoon?
        #r'\b([hH]urricanes?)\b', # Maybe only include for hurricane?
        r'\b([wW]inds?)\b',
        r'\b([gG]usts?)\b',
        r'\b((one|ten|[0-9]{1,2})-minute sustain(ed)?)\b',
        r'\b([mM]aximum sustained winds?)\b',
        r'\b([gG]ale[- ]force)\b',
        r'\b([wW]orld [mM]eteorological [oO]rganisation)\b',
        r'\b(WMO)\b']
    }
) 

# Hurricane Patricia Mexico
storm_tags_2015 = pd.DataFrame(
    {'tags': [
        r'\b([cC]yclones?)\b',
        r'\b([hH]urricanes?)\b',
        r'\b(Patricia)\b',
        r'\b([tT]ropical [sS]torms?)\b',
        r'\b([cC]ategory 5)\b',
        r'\b([fF]lood(waters?|s|ed|ing)?)\b',
        r'\b(rain(ed|s|fall)?)\b',
        r'\b(NOAA)\b',
        r'\b(Tehuantepec)\b',
        r'\b(Jalisco)\b',
        r'\b(Federal Emergency Management Agency|FEMA)\b',
        r'\b(National Hurricane Center|NHC)\b',
        r'\b(Mexican (Red Cross|Army|Navy|Federal Police))\b']
    }
) 

# Cyclone Winston Fiji
storm_tags_2016 = pd.DataFrame(
    {'tags': [
        r'\b([cC]yclones?)\b',
        r'\b(Winston)\b',
        r'\b([cC]ategory 5)\b',
        r'\b(Vanua Balavu)\b',
        r'\b(Viti Levu)\b',
        r'\b(Fiji)\b',
        r'\b([jJ]oint [tT]yphoon [wW]arning [cC]enter)\b',
        r'\b(Rakiraki District)\b',
        r'\b(FMS)\b',
        r'\b(Fijian Red Cross)\b']
    }
) 

# Hurricane Harvey USA
storm_tags_2017 = pd.DataFrame(
    {'tags': [
        r'\b([hH]urricanes?)\b',
        r'\b(Harvey)\b',
        r'\b([tT]ropical [sS]torms?)\b',
        r'\b([cC]ategory 4)\b',
        r'\b([fF]lood(waters?|s|ed|ing)?)\b',
        r'\b(rain(ed|s|fall)?)\b',
        r'\b(NOAA)\b',
        r'\b([sS]an [jJ]os[ée] [iI]sland)\b',
        r'\b(Holiday Beach)\b',
        r'\b(Federal Emergency Management Agency|FEMA)\b',
        r'\b(National Hurricane Center|NHC)\b',
        r'\b(H.R. ?601)\b']
    }
) 

# Pacific Asia Typhoon Mangkhut (Ompong) and Hurricane Florence in US 
storm_tags_2018 = pd.DataFrame(
    {'tags': [
        # Mangkhut tags
        r'\b([cC]yclones?)\b',
        r'\b([tT]yphoons?)\b',
        r'\b([mM]angkhut)\b',
        r'\b([oO]mpong)\b',
        r'\b([cC]agayan)\b',
        r'\b([cC]ategory 5)\b',
        r'\b([nN]orthern [mM]ariana [iI]slands)\b',
        r'\b([bB]aggao)\b',
        r'\b([cC]agayan)\b',
        r'\b([hH]ong [kK]ong [oO]bservatory)\b',
        r'\b([hH]urricane [sS]ignal)\b',
        r'\b([gG]uangdong)\b',
        r'\b([mM]eteorological [bB]ureau)\b',
        r'\b([gG]uangzhou)\b',
        ## Hurricane Florence tags
        r'\b([hH]urricanes?)\b',
        r'\b(Florence)\b',
        r'\b([tT]ropical [sS]torms?)\b',
        r'\b([cC]ategory 4)\b',
        r'\b([wW]rightsville [bB]each)\b',
        r'\b([fF]lood(waters?|s|ed|ing)?)\b',
        r'\b(rain(ed|s|fall)?)\b',
        r'\b(NOAA)\b',
        r'\b(SCEMD)\b',
        r'\b(Federal Emergency Management Agency|FEMA)\b',
        r'\b(National Hurricane Center|NHC)\b']
    }
)

# Japan Tropical Cyclone Hagibis
storm_tags_2019 = pd.DataFrame(
    {'tags': [
        r'\b([cC]yclones?)\b',
        r'\b([tT]yphoons?)\b',
        r'\b([rR]eiwa 1)\b',
        r'\b([hH]agibis)\b',
        r'\b([cC]ategory 5)\b',
        #r'\b([jJ]apan)\b', too broad
        r'\b([fF]lood(waters?|s|ed|ing)?)\b',
        r'\b([lL]andslides?)\b',
        r'\b(rain(ed|s|fall)?)\b',
        r'\b([cC]hikuma [rR]iver)\b',
        r'\b([uU]eda)\b',
        r'\b([hH]imawari)\b',
        r'\b([nN]agano)\b',
        r'\b([iI]chihara)\b',
        r'\b([sS]hinkansen)\b',
        r'\b([fF]ukushima)\b',
        r'\b([aA]kiyama [rR]iver)\b',
        r'\b([eE]vacuat(ion|ed?)( center)?)\b',
        r'\b([jJ]apan [mM]eteorological [aA]gency)\b',
        r'\b([iI]zu [pP]eninsula)\b',
        r'\b([sS]hizuoka)\b']
    }
)

# Tropical Cyclone Kammuri
storm_tags_2020 = pd.DataFrame(
    {'tags': [
        r'\b([cC]yclones?)\b',
        r'\b([tT]yphoons?)\b',
        r'\b([kK]ammuri)\b',
        r'\b([tT]isoy)\b',
        r'\b(Mariana Islands)\b',
        r'\b([cC]ategory 4)\b',
        r'\b(Philippine Area of Responsibility)\b',
        r'\b(Bicol Region)\b',
        r'\b(PAGASA)\b',
        r'\b([fF]lood(waters?|s|ed|ing)?)\b',
        r'\b(rain(ed|s|fall)?)\b']
    }
)

# Cyclone Amphan India Bangladesh
storm_tags_2020_unused = pd.DataFrame(
    {'tags': [
        r'\b([cC]yclones?)\b',
        r'\b(Amphan)\b',
        r'\b([cC]ategory 5)\b',
        r'\b(West Bengal)\b',
        r'\b(Kerala)\b',
        r'\b(Satkhira)\b',
        r'\b([jJ]oint [tT]yphoon [wW]arning [cC]enter)\b',
        r'\b(North Indian Ocean)\b',
        r'\b(Indian (Air Force|Navy))\b',
        r'\b(National Disaster Response Force|NDRF)\b',
        r'\b(Bangladesh (Air Force|Army|Armed Forces|Meteorological Department))\b',
        r'\b(Sri Lanka (Air Force|Navy))\b',
        r'\b([fF]lood(waters?|s|ed|ing)?)\b',
        r'\b([lL]andslides?)\b',
        r'\b(rain(ed|s|fall)?)\b']
    }
) 

climate_tags = pd.DataFrame(
    {'tags': [
        r'\b([cC]limate ([iI]mpact|[cC]hange|[cC]risis|[mM]odel|[eE]mergency))\b',
        r'\b([gG]lobal [wW]arming)\b',
        r'\b([gG]reenhouse)\b']
    }
) 

In [83]:
YEAR = 2019
data_path = 'data/quotes-'+str(YEAR)+'.json.bz2'
compression = 'bz2'
chunksize = 100000
disaster_df = df_storm
disaster_type = 'storm'
regex_pattern = generate_regex_from_year_and_type(YEAR, disaster_type)

In [84]:
disaster_df_bounds = retrieve_bounding_dates(disaster_df)

In [85]:
start_YEAR, end_YEAR = disaster_df_bounds.loc[YEAR].MinStartDate, disaster_df_bounds.loc[YEAR].MaxEndDate

In [86]:
lower_YEAR, upper_YEAR = compute_date_bounds(start_YEAR, end_YEAR)

In [87]:
lower_YEAR, upper_YEAR

('2019-09-21', '2019-11-07')

In [None]:
df_concat_result = process_quotes(data_path,lower_YEAR,upper_YEAR,YEAR,regex_pattern)

In [None]:
write_df_to_disk(df_concat_result, disaster_type, YEAR, compression=compression, file_type='both')

In [None]:
# Doing it like this also works

In [None]:
quotes = pd.read_json('data/quotes-'+str(YEAR)+'.json.bz2', lines=True, compression='bz2', chunksize=chunksize, nrows=nrows)

In [None]:
chunk_interval_list = []
for chunk in tqdm(quotes, total=nrows // chunksize):
    chunk_interval = chunk[(chunk['date'] >= lower_YEAR) & (chunk['date'] <= upper_YEAR)]
    chunk_interval_list.append(chunk_interval[chunk_interval['quotation'].str.contains(regex_pattern)])

In [None]:
len(chunk_interval_list)

In [None]:
df_interval_storm = pd.concat(chunk_interval_list)

In [None]:
len(df_interval_storm)

**To remove unwanted quotes based on regex pattern:**

In [None]:
## Option 1

In [None]:
quote='it makes no sense to keep on letting millions of illegal or legal immigrants flood into the United States, and to keep the tens of millions that are already here.'

In [None]:
quote='check out the huge flood migrant'

In [None]:
import re
word = quote
regex_pos = re.compile(r'flood')
regex_neg = re.compile(r'legal|migrant')
if regex_pos.search(word) and not regex_neg.search(word):
    print("match")


In [None]:
# Option 2 (more efficient)

In [None]:
df.drop(df.filter(regex='my_expression').columns,axis=1)