In [52]:
import pandas as pd
import numpy as np
from IPython.display import display, HTML

In [53]:
PROCESSED_FILES_PATH = '../cleaned_data/'

COUNTRY_NAMES = ['SonLa','LangSon', 'HaNoi',
                    'NgheAn', 'DaNang', 'LamDong',
                    'HoChiMinh','BenTre']

NON_STATISTICS_STATS = ['DatetimeEpoch']

STATISTICS_NAMES = ['Count', 'Mean', 'Standard Deviation',
                    'Min', 'Lower Quartile', 'Median',
                    'Upper Quartile', 'Max', 'Missing Ratio',
                    'Range', 'Variance']

ORDERED_STATISTICS = ['Count','Missing Ratio', 'Min', 'Max', 'Mean',
                    'Median', 'Lower Quartile', 'Upper Quartile',
                    'Range',  'Standard Deviation', 'Variance']

In [54]:
def Missing_ratio(arr):
    ratio = arr.isna().mean()
    return ratio

In [55]:
def standardize_statistics(df) -> pd.DataFrame:
    """Change column's names to standard name 
    and re-order all the columns"""

    old_name = df.columns.tolist()
    df.rename({old_name[i] : STATISTICS_NAMES[i]
               for i in range(len(df.columns))},
               axis = 1, inplace = True)

    df = df[ORDERED_STATISTICS]
    return df 

In [None]:
def Statistics (df) -> pd.DataFrame: 

    stats = [x for x in df.columns.to_list() if x not in NON_STATISTICS_STATS]
    result_df = df[stats].describe().round(2)
    # update columns name 
    stats = result_df.columns.to_list()
    # print(stats)

    missing_list = []
    range_list = []
    variance = []

    for col in stats: 
        missing_list.append(Missing_ratio(df[col]))
        range_list.append(result_df[col]['max'] - result_df[col]['min'])
        variance.append(df[col].var())

    result_df.loc['Missing ratio'] = missing_list
    result_df.loc['Range'] = range_list
    result_df.loc['Variance'] = variance

    return result_df.T


In [61]:
for country in COUNTRY_NAMES:
    print(country)
    raw_df = pd.read_csv(PROCESSED_FILES_PATH + country + '.csv', index_col=0)
    stat_df = Statistics(raw_df)
    stat_df = standardize_statistics(stat_df)
    display(stat_df)
    # print(stat_df) 
    # stat_df

SonLa


Unnamed: 0,Count,Missing Ratio,Min,Max,Mean,Median,Lower Quartile,Upper Quartile,Range,Standard Deviation,Variance
Tempmax,1827.0,0.0,7.9,39.1,27.77,28.6,25.2,31.1,31.2,4.8,23.045023
Tempmin,1827.0,0.0,2.7,26.1,17.8,18.8,14.6,21.4,23.4,4.59,21.097164
Temp,1827.0,0.0,6.9,31.2,22.08,22.8,19.2,25.4,24.3,4.39,19.308399
Dew,1827.0,0.0,1.2,24.5,17.75,18.8,14.6,21.8,23.3,4.66,21.731361
Humidity,1827.0,0.0,31.9,98.3,78.65,80.0,74.7,84.5,66.4,9.43,89.0146
Precip,1827.0,0.0,0.0,146.0,3.84,0.3,0.0,2.8,146.0,10.11,102.173143
Precipprob,1827.0,0.0,0.0,100.0,63.11,100.0,0.0,100.0,100.0,48.26,2329.431174
Precipcover,1827.0,0.0,0.0,70.83,7.59,4.17,0.0,8.33,70.83,10.26,105.217356
Windgust,1827.0,0.0,4.3,79.2,24.98,24.8,19.1,30.8,74.9,9.19,84.436286
Windspeed,1827.0,0.0,4.0,28.8,10.61,10.8,7.9,11.9,24.8,3.32,11.025197


LangSon


Unnamed: 0,Count,Missing Ratio,Min,Max,Mean,Median,Lower Quartile,Upper Quartile,Range,Standard Deviation,Variance
Tempmax,1827.0,0.0,6.6,37.7,26.57,27.5,22.4,31.7,31.1,6.15,37.846779
Tempmin,1827.0,0.0,1.7,27.6,18.76,20.0,14.7,23.5,25.9,5.61,31.486547
Temp,1827.0,0.0,4.2,31.6,22.15,23.1,18.35,26.8,27.4,5.5,30.226926
Dew,1827.0,0.0,-3.9,27.0,18.38,20.1,14.3,23.7,30.9,6.22,38.710396
Humidity,1827.0,0.0,35.9,98.6,80.71,82.6,76.2,87.2,62.7,10.02,100.467105
Precip,1827.0,0.0,0.0,183.72,4.1,0.4,0.0,2.0,183.72,12.21,149.060608
Precipprob,1827.0,0.0,0.0,100.0,73.89,100.0,0.0,100.0,100.0,43.93,1930.246737
Precipcover,1827.0,0.0,0.0,66.67,9.37,8.33,0.0,12.5,66.67,10.13,102.551455
Windgust,1827.0,0.0,8.6,79.2,32.05,31.0,23.4,39.6,70.6,11.47,131.665026
Windspeed,1827.0,0.0,4.7,39.6,14.29,13.7,10.4,17.6,34.9,5.3,28.053243


HaNoi


Unnamed: 0,Count,Missing Ratio,Min,Max,Mean,Median,Lower Quartile,Upper Quartile,Range,Standard Deviation,Variance
Tempmax,1827.0,0.0,10.4,40.0,28.5,29.0,24.1,33.3,29.6,5.82,33.827393
Tempmin,1827.0,0.0,7.0,30.4,21.89,23.0,18.0,26.0,23.4,4.89,23.923673
Temp,1827.0,0.0,9.0,35.3,24.87,25.7,20.9,29.0,26.3,5.12,26.243309
Dew,1827.0,0.0,-2.1,28.1,20.23,22.1,16.55,24.9,30.2,5.8,33.589733
Humidity,1827.0,0.0,31.1,98.9,77.25,78.9,71.2,84.4,67.8,10.91,119.05167
Precip,1827.0,0.0,0.0,173.36,4.69,0.2,0.0,1.98,173.36,13.82,191.036809
Precipprob,1827.0,0.0,0.0,100.0,71.15,100.0,0.0,100.0,100.0,45.32,2053.594285
Precipcover,1827.0,0.0,0.0,75.0,7.27,4.17,0.0,8.33,75.0,8.74,76.456165
Windgust,1827.0,0.0,6.8,108.0,28.26,27.4,22.7,33.1,101.2,8.61,74.163625
Windspeed,1827.0,0.0,6.6,37.1,17.1,16.6,13.9,20.5,30.5,4.76,22.662552


NgheAn


Unnamed: 0,Count,Missing Ratio,Min,Max,Mean,Median,Lower Quartile,Upper Quartile,Range,Standard Deviation,Variance
Tempmax,1827.0,0.0,12.9,40.2,28.76,28.8,24.3,33.2,27.3,5.71,32.58384
Tempmin,1827.0,0.0,9.2,32.0,22.9,23.7,19.6,26.5,22.8,4.53,20.518801
Temp,1827.0,0.0,11.1,34.9,25.52,25.8,21.9,29.4,23.8,4.83,23.374235
Dew,1827.0,0.0,4.6,27.7,21.66,23.2,19.1,24.8,23.1,4.2,17.671353
Humidity,1827.0,0.0,51.1,95.9,80.47,82.6,74.6,88.1,44.8,9.23,85.14876
Precip,1827.0,0.0,0.0,363.1,7.46,0.2,0.0,2.6,363.1,26.54,704.287921
Precipprob,1827.0,0.0,0.0,100.0,65.19,100.0,0.0,100.0,100.0,47.65,2270.542088
Precipcover,1827.0,0.0,0.0,83.33,7.09,4.17,0.0,8.33,83.33,10.5,110.19045
Windgust,1827.0,0.0,9.7,84.6,29.06,27.7,22.7,33.5,74.9,9.68,93.697139
Windspeed,1827.0,0.0,6.8,72.0,17.83,16.6,13.7,20.5,65.2,6.34,40.258769


DaNang


Unnamed: 0,Count,Missing Ratio,Min,Max,Mean,Median,Lower Quartile,Upper Quartile,Range,Standard Deviation,Variance
Tempmax,1827.0,0.0,16.1,40.0,30.7,31.0,27.4,34.0,23.9,4.38,19.185579
Tempmin,1827.0,0.0,13.0,31.0,23.92,24.7,22.0,26.0,18.0,2.86,8.168728
Temp,1827.0,0.0,15.8,34.5,26.92,27.2,24.6,29.6,18.7,3.33,11.099799
Dew,1827.0,0.0,10.2,26.1,22.46,23.5,21.2,24.3,15.9,2.53,6.399437
Humidity,1827.0,0.0,45.8,97.5,77.92,78.5,73.4,83.0,51.7,7.8,60.80199
Precip,1827.0,0.0,0.0,299.19,7.17,0.0,0.0,1.26,299.19,25.89,670.247671
Precipprob,1827.0,0.0,0.0,100.0,52.65,100.0,0.0,100.0,100.0,49.94,2494.318219
Precipcover,1827.0,0.0,0.0,33.33,4.53,4.17,0.0,8.33,33.33,5.6,31.380969
Windgust,1827.0,0.0,7.2,110.5,27.4,25.9,21.2,31.15,103.3,9.82,96.499847
Windspeed,1827.0,0.0,5.4,75.2,17.81,16.6,14.7,20.5,69.8,5.76,33.124067


LamDong


Unnamed: 0,Count,Missing Ratio,Min,Max,Mean,Median,Lower Quartile,Upper Quartile,Range,Standard Deviation,Variance
Tempmax,1827.0,0.0,14.8,31.9,24.55,24.8,23.2,26.0,17.1,2.39,5.716938
Tempmin,1827.0,0.0,7.3,21.3,16.58,16.6,15.2,18.5,14.0,2.36,5.550132
Temp,1827.0,0.0,12.3,24.2,19.69,19.7,18.5,21.1,11.9,1.88,3.543251
Dew,1827.0,0.0,2.6,21.0,16.81,17.2,15.5,18.8,18.4,2.48,6.137572
Humidity,1827.0,0.0,42.7,99.1,85.24,86.5,81.6,90.3,56.4,7.26,52.687365
Precip,1827.0,0.0,0.0,106.5,6.03,2.8,0.3,9.05,106.5,8.78,77.055728
Precipprob,1827.0,0.0,0.0,100.0,84.02,100.0,100.0,100.0,100.0,36.65,1343.544052
Precipcover,1827.0,0.0,0.0,100.0,32.73,33.33,12.5,45.83,100.0,24.43,596.823948
Windgust,1827.0,0.0,2.9,88.2,29.1,28.1,16.2,38.5,85.3,15.48,239.562406
Windspeed,1827.0,0.0,2.5,28.4,9.96,9.0,6.5,12.4,25.9,4.57,20.878635


HoChiMinh


Unnamed: 0,Count,Missing Ratio,Min,Max,Mean,Median,Lower Quartile,Upper Quartile,Range,Standard Deviation,Variance
Tempmax,1827.0,0.0,24.6,38.0,33.04,33.0,32.0,34.0,13.4,1.75,3.075939
Tempmin,1827.0,0.0,18.0,29.9,25.1,25.0,24.0,26.0,11.9,1.56,2.429178
Temp,1827.0,0.0,23.3,32.4,28.41,28.4,27.5,29.3,9.1,1.34,1.7839
Dew,1827.0,0.0,15.0,27.5,23.62,24.4,22.4,25.3,12.5,2.33,5.423375
Humidity,1827.0,0.0,50.2,98.4,77.34,78.5,70.4,84.9,48.2,9.76,95.233305
Precip,1827.0,0.0,0.0,109.0,4.91,0.7,0.0,4.85,109.0,10.64,113.155856
Precipprob,1827.0,0.0,0.0,100.0,74.17,100.0,0.0,100.0,100.0,43.78,1917.087667
Precipcover,1827.0,0.0,0.0,70.83,8.89,4.17,0.0,12.5,70.83,11.11,123.406374
Windgust,1827.0,0.0,8.3,216.0,31.11,29.2,23.8,36.0,207.7,11.49,132.073206
Windspeed,1827.0,0.0,7.6,50.0,19.04,18.4,15.4,22.2,42.4,5.36,28.725759


BenTre


Unnamed: 0,Count,Missing Ratio,Min,Max,Mean,Median,Lower Quartile,Upper Quartile,Range,Standard Deviation,Variance
Tempmax,1827.0,0.0,25.0,37.0,32.19,32.3,31.0,33.5,12.0,1.75,3.061874
Tempmin,1827.0,0.0,19.0,28.6,24.79,25.0,24.0,25.5,9.6,1.36,1.858555
Temp,1827.0,0.0,22.9,31.5,27.86,27.8,27.0,28.7,8.6,1.29,1.658198
Dew,1827.0,0.0,17.1,26.7,23.85,24.6,22.8,25.2,9.6,1.91,3.661406
Humidity,1827.0,0.0,56.5,98.4,80.42,81.2,74.4,86.7,41.9,8.06,64.897461
Precip,1827.0,0.0,0.0,109.0,5.06,0.8,0.1,5.1,109.0,10.66,113.687569
Precipprob,1827.0,0.0,0.0,100.0,75.64,100.0,100.0,100.0,100.0,42.94,1843.43884
Precipcover,1827.0,0.0,0.0,83.33,9.18,4.17,4.17,12.5,83.33,11.24,126.42619
Windgust,1827.0,0.0,7.9,216.0,34.73,33.5,26.8,40.7,208.1,12.82,164.301026
Windspeed,1827.0,0.0,6.5,64.8,16.18,15.7,13.0,18.8,58.3,4.49,20.185794
