In [1]:
import pandas as pd

def read_csv(filename):
    data = pd.read_csv(filename)
    return data

T2MDEW     MERRA-2 Dew/Frost Point at 2 Meters (C) 				

T2M        MERRA-2 Temperature at 2 Meters (C) 

RH2M       MERRA-2 Relative Humidity at 2 Meters (%) 
				
WS10M      MERRA-2 Wind Speed at 10 Meters (m/s) 				

In [2]:
filename = '../../Data/Weather data of districts/Dhaka.csv'

data = read_csv(filename)

In [3]:
data.head()

Unnamed: 0,YEAR,MO,DY,RH2M,WS10M,T2MDEW,T2MWET,T2M
0,2019,9,1,85.75,1.33,27.05,28.44,29.83
1,2019,9,2,88.69,3.2,26.58,27.64,28.7
2,2019,9,3,89.88,4.31,26.75,27.69,28.62
3,2019,9,4,92.31,3.69,26.9,27.59,28.28
4,2019,9,5,91.31,4.05,26.62,27.4,28.19


In [4]:
# take the value from 'YEAR', 'MO' , 'DY' columns and convert them to string like 'YYYY/MM/DD'and write it to new 'DATE' column
# if the value of 'MO' and 'DY' is less than 10 then add '0' before the value
# then convert to datetime format and write it to 'DATE' column (format should be 'YYYY/MM/DD')

data['DATE'] = data['YEAR'].astype(str) + '/' + data['MO'].apply(lambda x: '0'+str(x) if x<10 else str(x) ) + '/' + data['DY'].apply(lambda x: '0'+str(x) if x<10 else str(x) )

data['DATE'] = pd.to_datetime(data['DATE'], format='%Y/%m/%d').dt.strftime('%Y/%m/%d')

data.head()

Unnamed: 0,YEAR,MO,DY,RH2M,WS10M,T2MDEW,T2MWET,T2M,DATE
0,2019,9,1,85.75,1.33,27.05,28.44,29.83,2019/09/01
1,2019,9,2,88.69,3.2,26.58,27.64,28.7,2019/09/02
2,2019,9,3,89.88,4.31,26.75,27.69,28.62,2019/09/03
3,2019,9,4,92.31,3.69,26.9,27.59,28.28,2019/09/04
4,2019,9,5,91.31,4.05,26.62,27.4,28.19,2019/09/05


In [5]:
# delete the 'YEAR', 'MO' , 'DY' columns and move the 'DATE' column to the first

data = data.drop(['YEAR', 'MO', 'DY'], axis=1)

data = data[['DATE'] + [ col for col in data.columns if col != 'DATE' ]]
data.head()

Unnamed: 0,DATE,RH2M,WS10M,T2MDEW,T2MWET,T2M
0,2019/09/01,85.75,1.33,27.05,28.44,29.83
1,2019/09/02,88.69,3.2,26.58,27.64,28.7
2,2019/09/03,89.88,4.31,26.75,27.69,28.62
3,2019/09/04,92.31,3.69,26.9,27.59,28.28
4,2019/09/05,91.31,4.05,26.62,27.4,28.19


In [6]:
def prepare_weather_data(filename, output_filename):
    data = read_csv(filename)
    data['DATE'] = data['YEAR'].astype(str) + '/' + data['MO'].apply(lambda x: '0'+str(x) if x<10 else str(x) ) + '/' + data['DY'].apply(lambda x: '0'+str(x) if x<10 else str(x) )
    data['DATE'] = pd.to_datetime(data['DATE'], format='%Y/%m/%d').dt.strftime('%Y/%m/%d')
    data = data.drop(['YEAR', 'MO', 'DY'], axis=1)
    data = data[['DATE'] + [ col for col in data.columns if col != 'DATE' ]]
    # write it to a new csv file
    data.to_csv(output_filename, index=False)
    print('Data is prepared and saved to', output_filename)
    return data

In [7]:
filename = '../../Data/Weather data of districts/Mymensingh.csv'
output_filename = '../../Data/Processed Weather Data/Mymensingh.csv'

data = prepare_weather_data(filename, output_filename)

Data is prepared and saved to ../../Data/Processed Weather Data/Mymensingh.csv


In [10]:
# for all the file in the files in this directory '../../Data/Weather data of districts/'

import os

directory = '../../Data/Weather data of districts/'

dataframes = []

for filename in os.listdir(directory):
    if filename.endswith('.csv'):
        output_filename = '../../Data/Processed Weather Data/' + filename
        data = prepare_weather_data(directory+filename, output_filename)
        dataframes.append(data)

Data is prepared and saved to ../../Data/Processed Weather Data/Bagerhat.csv
Data is prepared and saved to ../../Data/Processed Weather Data/Banderban.csv
Data is prepared and saved to ../../Data/Processed Weather Data/Barguna.csv
Data is prepared and saved to ../../Data/Processed Weather Data/Barishal.csv
Data is prepared and saved to ../../Data/Processed Weather Data/Bhola.csv
Data is prepared and saved to ../../Data/Processed Weather Data/Bogra.csv
Data is prepared and saved to ../../Data/Processed Weather Data/Brahmanbaria.csv
Data is prepared and saved to ../../Data/Processed Weather Data/Chandpur.csv
Data is prepared and saved to ../../Data/Processed Weather Data/Chapai_Nawabgonj.csv
Data is prepared and saved to ../../Data/Processed Weather Data/Chittagong.csv
Data is prepared and saved to ../../Data/Processed Weather Data/Chuadanga.csv
Data is prepared and saved to ../../Data/Processed Weather Data/Coxs Bazar.csv
Data is prepared and saved to ../../Data/Processed Weather Data/C

In [11]:
dataframes[0].head()

Unnamed: 0,DATE,T2MDEW,T2M,RH2M,WS10M
0,2019/09/01,26.94,29.12,88.44,1.85
1,2019/09/02,26.3,28.57,88.0,3.16
2,2019/09/03,26.85,28.37,91.62,4.07
3,2019/09/04,26.94,28.35,92.12,3.87
4,2019/09/05,26.65,28.06,92.06,3.91


In [12]:
dates = pd.read_excel('../../Data/dates.xlsx')
dates.head()

Unnamed: 0,Date
0,2019/09/01
1,2019/09/02
2,2019/09/03
3,2019/09/04
4,2019/09/05


In [15]:
# go through all the dataframes and only keep the rows that are in the 'dates' dataframe

for i in range(len(dataframes)):
    dataframes[i] = dataframes[i][dataframes[i]['DATE'].isin(dates['Date'])]

(720, 5)

In [17]:
#write the dataframes in separate excel files
# files names should be the same as the original files in the directory '../../Data/Processed Weather Data/'

input_directory = '../../Data/Processed Weather Data/'
output_directory = '../../Data/Weather Data/'

for i in range(len(dataframes)):
    dataframes[i].to_csv(output_directory + os.listdir(input_directory)[i], index=False)