In [3]:
import pandas as pd
from data_preparation.spatio_temporal_filtering import read_bd_data_4_years

In [4]:
metadata, series, metadata_region, region_series, metadata_country, country_series = read_bd_data_4_years()

In [16]:
metadata

Unnamed: 0_level_0,Zone,Country,Region,Population,Latitude,Longitude
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Azimpur_Dhaka,Azimpur,Bangladesh,Dhaka,96641,23.7298,90.3854
Barisal_Barisal,Barisal,Bangladesh,Barisal,202242,22.705,90.3701
Bhairab Bazar_Dhaka,Bhairab Bazar,Bangladesh,Dhaka,105457,24.0524,90.9764
Bhola_Barisal,Bhola,Bangladesh,Barisal,99079,22.6876,90.644
Bogra_Rajshahi,Bogra,Bangladesh,Rajshahi,210000,24.851,89.3711
Chittagong_Chittagong,Chittagong,Bangladesh,Chittagong,3920222,22.3384,91.8317
Comilla_Chittagong,Comilla,Bangladesh,Chittagong,389411,23.4619,91.185
Dhaka_Dhaka,Dhaka,Bangladesh,Dhaka,10356500,23.7104,90.4074
Dinajpur_Rangpur,Dinajpur,Bangladesh,Rangpur,206234,25.6274,88.6378
Faridpur_Dhaka,Faridpur,Bangladesh,Dhaka,112187,23.6061,89.8406


In [7]:
Dhaka_Series = series["Dhaka_Dhaka"]
Dhaka_Series

time
2018-01-01 00:00:00    131.0
2018-01-01 01:00:00    134.5
2018-01-01 02:00:00    148.1
2018-01-01 03:00:00    175.2
2018-01-01 04:00:00    184.6
                       ...  
2021-12-31 19:00:00    129.1
2021-12-31 20:00:00    126.0
2021-12-31 21:00:00    125.8
2021-12-31 22:00:00    121.5
2021-12-31 23:00:00    124.9
Name: Dhaka_Dhaka, Length: 35064, dtype: float64

In [9]:
import numpy as np

def compute_seasonal_daynight_stats(series, value_name='PM2.5', fmt='{:.1f}±{:.1f}'):
    """
    series: pd.Series with DatetimeIndex
    returns: pd.DataFrame with seasons as rows and 'day'/'night' formatted stats as columns
    """
    s = series.dropna().copy()
    if not isinstance(s.index, pd.DatetimeIndex):
        s.index = pd.to_datetime(s.index)

    df = s.to_frame(name=value_name)
    df['hour'] = df.index.hour
    df['month'] = df.index.month
    df['daynight'] = np.where(df['hour'].between(6, 17), 'day', 'night')  # day: 06:00-17:59, night: 18:00-05:59

    def month_to_season(m):
        if m in (12, 1, 2):
            return 'winter'
        if m in (3, 4, 5):
            return 'spring'
        if m in (6, 7, 8):
            return 'summer'
        return 'fall'

    df['season'] = df['month'].map(month_to_season)

    agg = df.groupby(['season', 'daynight'])[value_name].agg(['mean', 'std'])
    # pivot to have day/night columns
    pivot = agg.unstack(level=-1)  # columns like ('mean','day'), ('std','day'), ...
    # build formatted strings
    seasons = ['winter', 'spring', 'summer', 'fall', 'all']
    rows = {}
    for season in seasons:
        if season == 'all':
            sub = df
        else:
            sub = df[df['season'] == season]
        stats = {}
        for dn in ('day', 'night'):
            grp = sub[value_name]
            if grp.empty:
                stats[dn] = np.nan
            else:
                m = grp[ (sub['daynight'] == dn) ].mean()
                sd = grp[ (sub['daynight'] == dn) ].std()
                if pd.isna(m):
                    stats[dn] = np.nan
                else:
                    stats[dn] = fmt.format(m, sd if not pd.isna(sd) else 0.0)
        rows[season] = stats

    result = pd.DataFrame.from_dict(rows, orient='index')[['day', 'night']]
    result.index.name = 'season'
    return result

# Usage (with your variable):
stats_df = compute_seasonal_daynight_stats(Dhaka_Series, value_name='PM2.5')
print(stats_df)

               day       night
season                        
winter  123.0±51.5  157.5±61.4
spring   57.8±32.1   67.7±47.6
summer   27.7±13.9   28.3±16.9
fall     53.9±37.1   66.2±48.2
all      65.1±50.3   79.2±66.2


In [10]:
# python
import numpy as np
import pandas as pd

def seasonal_daynight_percentiles(series, value_name='PM2.5', fmt='{:.1f}±{:.1f}'):
    s = series.dropna().copy()
    if not isinstance(s.index, pd.DatetimeIndex):
        s.index = pd.to_datetime(s.index)

    df = s.to_frame(name=value_name)
    df['hour'] = df.index.hour
    df['month'] = df.index.month
    df['daynight'] = np.where(df['hour'].between(6, 17), 'day', 'night')  # day: 06:00-17:59

    def month_to_season(m):
        if m in (12, 1, 2):
            return 'winter'
        if m in (3, 4, 5):
            return 'spring'
        if m in (6, 7, 8):
            return 'summer'
        return 'fall'

    df['season'] = df['month'].map(month_to_season)

    seasons = ['winter', 'spring', 'summer', 'fall', 'annual']
    rows = []
    for season in seasons:
        sub = df if season == 'annual' else df[df['season'] == season]
        row = {'season': season}
        for dn in ('day', 'night'):
            sub_dn = sub[sub['daynight'] == dn][value_name]
            if sub_dn.empty:
                row.update({
                    f'{dn}_25': np.nan,
                    f'{dn}_50': np.nan,
                    f'{dn}_75': np.nan,
                    f'{dn}_mean±sd': np.nan,
                })
            else:
                q25, q50, q75 = sub_dn.quantile([0.25, 0.5, 0.75]).tolist()
                m = sub_dn.mean()
                sd = sub_dn.std()
                sd = 0.0 if pd.isna(sd) else sd
                row.update({
                    f'{dn}_25': float(f'{q25:.1f}'),
                    f'{dn}_50': float(f'{q50:.1f}'),
                    f'{dn}_75': float(f'{q75:.1f}'),
                    f'{dn}_mean±sd': fmt.format(m, sd),
                })
        rows.append(row)

    result = pd.DataFrame(rows).set_index('season')
    # order columns like the example
    cols = ['day_25', 'day_50', 'day_75', 'day_mean±sd',
            'night_25', 'night_50', 'night_75', 'night_mean±sd']
    return result[cols]

# Usage:
stats_df = seasonal_daynight_percentiles(Dhaka_Series, value_name='PM2.5')
print(stats_df)

        day_25  day_50  day_75 day_mean±sd  night_25  night_50  night_75  \
season                                                                     
winter    84.6   116.3   155.2  123.0±51.5     113.9     155.3     196.5   
spring    34.9    51.6    74.3   57.8±32.1      35.0      56.7      85.7   
summer    17.9    25.2    34.8   27.7±13.9      16.6      24.2      36.4   
fall      25.6    43.5    73.6   53.9±37.1      27.3      53.5      94.8   
annual    27.1    48.8    89.8   65.1±50.3      27.3      56.4     117.9   

       night_mean±sd  
season                
winter    157.5±61.4  
spring     67.7±47.6  
summer     28.3±16.9  
fall       66.2±48.2  
annual     79.2±66.2  


In [23]:
pd.read_csv("zone_district_map.csv").district.value_counts().index.sort_values()

Index(['Barishal', 'Bhola', 'Bogura', 'Chattogram', 'Cumilla', 'Dhaka',
       'Dinajpur', 'Faridpur', 'Feni', 'Gopalganj', 'Habiganj', 'Jamalpur',
       'Jashore', 'Joypurhat', 'Khagrachhari', 'Khulna', 'Kishoreganj',
       'Kushtia', 'Lakshmipur', 'Lalmonirhat', 'Madaripur', 'Maulvibazar',
       'Mymensingh', 'Naogaon', 'Narail', 'Narayanganj', 'Narsingdi',
       'Nawabganj', 'Netrokona', 'Nilphamari', 'Pabna', 'Pirojpur', 'Rajshahi',
       'Rangpur', 'Satkhira', 'Shariatpur', 'Sherpur', 'Sirajganj', 'Sylhet',
       'Tangail', 'Thakurgaon'],
      dtype='object')

In [None]:
['Barisal', 'Bhola', 'Bogra', 'Chittagong', 'Comilla', 'Dhaka',
       'Dinajpur', 'Faridpur', 'Feni', 'Habiganj', 'Jamalpur', 'Jessore',
       'Joypur Hat', 'Khagrachhari', 'Khulna', 'Kishorganj', 'Kushtia',
       'Lakshmipur', 'Lalmanirhat', 'Madaripur', 'Maulavi Bazar', 'Mymensingh',
       'Narail', 'Narayanganj', 'Narsingdi', 'Nawabganj', 'Netrakona', 'Pabna',
       'Palang', 'Par Naogaon', 'Pirojpur', 'Rajshahi', 'Rangpur', 'Saidpur',
       'Satkhira', 'Shahzadpur', 'Sherpur', 'Sirajganj', 'Sylhet', 'Tangail',
       'Thakurgaon', 'Tungipara']