In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

In [2]:
# File path GMET station data (CSV format with columns:'Precipitation (mm)')
gmet_root_dir = r'C:\Users\c.kwa\Desktop\meteosat_retrieval\data_downloads\GMET\GMet_AWSdata\GMet_AWSdata'
tahmo_root_dir = r'C:\Users\c.kwa\Desktop\meteosat_retrieval\data_downloads\TAHMO\TAHMO_export_6704eaa37e81da18c0b7e245'

In [3]:
station_data_list_GMET = []
station_name_list_files = []
for station in os.listdir(gmet_root_dir):
    if station.endswith('.csv'):
        station_file = os.path.join(gmet_root_dir, station)
        station_data = pd.read_csv(station_file)
        station_data_list_GMET.append(station_data)
        station_name_list_files.append(station.split('.')[0])
        #display(station_data)

stations_meta_data_GMET = station_data_list_GMET.pop(7)
station_name_list = stations_meta_data_GMET['STN'].tolist()

station_data_gmet_dict = dict(zip(station_name_list, station_data_list_GMET))

for station_name in station_data_gmet_dict:
    # Convert 'timestamp' column to datetime format
    station_data_gmet_dict[station_name]['Timestamp'] = pd.to_datetime(
        station_data_gmet_dict[station_name]['Date'].astype(str) + ' ' + 
        station_data_gmet_dict[station_name]['Time'].astype(str),
        format='%m/%d/%Y %I:%M:%S %p'
    )
    # Set Timestamp as the index
    station_data_gmet_dict[station_name] = station_data_gmet_dict[station_name].set_index('Timestamp')
    # Drop the original Date and Time columns
    station_data_gmet_dict[station_name] = station_data_gmet_dict[station_name].drop(['Date', 'Time'], axis=1)

    # Ensure the column is numeric
    for col in station_data_gmet_dict[station_name].columns:
        station_data_gmet_dict[station_name][col] = pd.to_numeric(station_data_gmet_dict[station_name][col], errors='coerce')

        # Accumulate over 30 minutes
        station_data_gmet_dict[station_name] = station_data_gmet_dict[station_name].resample('30min', label = 'right', closed = 'right').sum(min_count = 2)


In [4]:
# Initialize an empty DataFrame to hold all stations' data
combined_station_data = pd.DataFrame()

# Loop through the station_data_gmet_dict
for station_name, station_data in station_data_gmet_dict.items():
    # Extract the precipitation data, rename the column to the station name
    station_precip = station_data[['Precipitation (mm)']].rename(columns={'Precipitation (mm)': station_name})
    # Merge into the combined DataFrame on the index (Timestamp)
    combined_station_data = pd.merge(
        combined_station_data, station_precip, left_index=True, right_index=True, how='outer'
    )

In [5]:
#Dropping stations with invalid data
drop_data = ['Ada_Foah', 'Tarkwa', 'Abetifi', 'Sefwi_Bekwai', 'Loagri', 'Kalbeo', 'Jirapa']
combined_station_data = combined_station_data.drop(drop_data, axis = 1)
stations_meta_data_GMET = stations_meta_data_GMET.drop(stations_meta_data_GMET[stations_meta_data_GMET.STN.isin(drop_data)].index, axis=0)

In [32]:
combined_station_data.to_csv('combined_station_data_GMET.csv')

In [7]:
station_name_list = []
station_data_list_TAHMO = []
for station in os.listdir(tahmo_root_dir): 
    if station.endswith('.csv'):
        station_file = os.path.join(tahmo_root_dir, station)
        station_data = pd.read_csv(station_file)
        station_name_list.append(station.split('.')[0])
        station_data_list_TAHMO.append(station_data)

sensors_meta_data_tahmo = station_data_list_TAHMO.pop(0)
stations_meta_data_tahmo = station_data_list_TAHMO.pop(0)
sensors_meta_data_tahmo_name = station_name_list.pop(0)
stations_meta_data_tahmo_name = station_name_list.pop(0)


station_data_tahmo_dict = dict(zip(station_name_list, station_data_list_TAHMO))

#Assuming df is the DataFrame containing the data
for station_name in station_data_tahmo_dict:
    if station_data_tahmo_dict[station_name].shape[1] == 3:
        station_data_tahmo_dict[station_name]['precipitation (mm)'] = station_data_tahmo_dict[station_name].iloc[:, 2].combine_first(station_data_tahmo_dict[station_name].iloc[:, 2])

        # Dropping the old columns (optional)
        station_data_tahmo_dict[station_name] = station_data_tahmo_dict[station_name].drop(station_data_tahmo_dict[station_name].columns[[1, 2]], axis = 1)

        # # Result
        # display(station_data_tahmo_dict[station_name].head())

    elif station_data_tahmo_dict[station_name].shape[1] == 4:
        station_data_tahmo_dict[station_name]['precipitation (mm)'] = station_data_tahmo_dict[station_name].iloc[:, 2].combine_first(station_data_tahmo_dict[station_name].iloc[:, 1]).combine_first(station_data_tahmo_dict[station_name].iloc[:, 3])
        # Dropping the old columns (optional)
        station_data_tahmo_dict[station_name] = station_data_tahmo_dict[station_name].drop(station_data_tahmo_dict[station_name].columns[[1, 2, 3]], axis = 1)


    if 'timestamp' in station_data_tahmo_dict[station_name].columns:
        # Set Timestamp as the index
        station_data_tahmo_dict[station_name]['timestamp'] = pd.to_datetime(station_data_tahmo_dict[station_name]['timestamp'])
        station_data_tahmo_dict[station_name] = station_data_tahmo_dict[station_name].set_index('timestamp')

    # Accumulate over 30 minutes
    station_data_tahmo_dict[station_name] = station_data_tahmo_dict[station_name].resample('30min').sum(min_count = 2)

Unnamed: 0,station code,name,country,installation height (m),latitude,longitude,elevation (m),timezone
0,TA00005,Asankragwa SHS,GH,2.0,5.807731,-2.426395,125.1,Africa/Accra
1,TA00007,Nana Yaa Kesse SHS Duayaw Nkwanta,GH,2.0,7.188273,-2.097477,341.1,Africa/Accra
2,TA00010,Chiraa SHS,GH,2.0,7.389595,-2.185991,337.1,Africa/Accra
3,TA00016,"Accra Academy School, Accra",GH,2.0,5.573104,-0.2445,32.4,Africa/Accra
4,TA00045,"Asesewaa Senior High School, Asesewaa",GH,2.0,6.400626,-0.146577,372.3,Africa/Accra
5,TA00113,Nkwanta SHS,GH,2.0,8.271124,0.515265,213.7,Africa/Accra
6,TA00116,Amedzofe Technical Institute,GH,2.0,6.845815,0.440698,731.8,Africa/Accra
7,TA00117,Keta SHS,GH,2.0,5.895083,0.989567,10.0,Africa/Accra
8,TA00118,Tema Secondary School,GH,2.0,5.641413,-0.01187,18.4,Africa/Accra
9,TA00120,Nkroful Agric SHS,GH,2.0,4.971861,-2.322676,28.0,Africa/Accra


In [8]:
# Initialize an empty DataFrame to hold all stations' data
combined_station_data_tahmo = pd.DataFrame()

# Loop through the station_data_gmet_dict
for station_name, station_data in station_data_tahmo_dict.items():
    # Extract the precipitation data, rename the column to the station name
    station_precip = station_data[['precipitation (mm)']].rename(columns={'precipitation (mm)': station_name})
    # Merge into the combined DataFrame on the index (Timestamp)
    combined_station_data_tahmo = pd.merge(
        combined_station_data_tahmo, station_precip, left_index=True, right_index=True, how='outer'
    )

In [9]:
#Dropping stations with invalid data
drop_data_tahmo = ['TA00117', 'TA00302', 'TA00007', 'TA00010', 'TA00266', 'TA00259', 'TA00690', 'TA00254', 'TA00647']
combined_station_data_tahmo = combined_station_data_tahmo.drop(drop_data_tahmo, axis = 1)
stations_meta_data_tahmo = stations_meta_data_tahmo.drop(stations_meta_data_GMET[stations_meta_data_tahmo['station code'].isin(drop_data_tahmo)].index, axis=0)

  stations_meta_data_tahmo = stations_meta_data_tahmo.drop(stations_meta_data_GMET[stations_meta_data_tahmo['station code'].isin(drop_data_tahmo)].index, axis=0)


In [10]:
combined_station_data_tahmo.to_csv('combined_station_data_tahmo.csv')

Unnamed: 0,station code,name,country,installation height (m),latitude,longitude,elevation (m),timezone
0,TA00005,Asankragwa SHS,GH,2.0,5.807731,-2.426395,125.1,Africa/Accra
1,TA00007,Nana Yaa Kesse SHS Duayaw Nkwanta,GH,2.0,7.188273,-2.097477,341.1,Africa/Accra
3,TA00016,"Accra Academy School, Accra",GH,2.0,5.573104,-0.2445,32.4,Africa/Accra
4,TA00045,"Asesewaa Senior High School, Asesewaa",GH,2.0,6.400626,-0.146577,372.3,Africa/Accra
5,TA00113,Nkwanta SHS,GH,2.0,8.271124,0.515265,213.7,Africa/Accra
6,TA00116,Amedzofe Technical Institute,GH,2.0,6.845815,0.440698,731.8,Africa/Accra
7,TA00117,Keta SHS,GH,2.0,5.895083,0.989567,10.0,Africa/Accra
8,TA00118,Tema Secondary School,GH,2.0,5.641413,-0.01187,18.4,Africa/Accra
9,TA00120,Nkroful Agric SHS,GH,2.0,4.971861,-2.322676,28.0,Africa/Accra
10,TA00121,Half Assini SHS,GH,2.0,5.053207,-2.877294,14.0,Africa/Accra
