## Brightness Temperature Data Preprocessing

Brightness Temperature [NOAA/CDR/GRIDSAT-B1/V2](https://developers.google.com/earth-engine/datasets/catalog/NOAA_CDR_GRIDSAT-B1_V2#description) Google Erath Engine

The choosen Bands
irwin_cdr 	K 	-2093* 	13615* 	0.01 	200 	Brightness temperature near 11µm, nadir-most observation

In [35]:
import pandas as pd
import os

def kelvin_to_celsius(kelvin):
    return kelvin - 273.15

def transform_brightness_temperature(data, output_directory):
    """
    Transforms brightness temperature data by pivoting month values to columns,
    converting temperatures from Kelvin to Celsius, and saving the result to a CSV file.

    Parameters:
    data (DataFrame): A dataframe containing the data.
    output_directory (str): Directory where the transformed CSV file will be saved.

    Returns:
    str: The path to the saved transformed CSV file.
    """

    # Determine the geographical descriptor in the dataset
    geo_column = 'province' if 'province' in data.columns else 'region'
    
    # Ensure the year values are integers
    data['year'] = data['year'].astype(int)
    
    # Pivot the data
    data_pivot = data.pivot_table(index=['year', geo_column, 'district'], columns='month', values='max_brightness_temp')
    
    # Rename columns
    data_pivot.columns = [f"smt_{str(month).zfill(2)}" for month in data_pivot.columns]
    
    # Convert temperatures from Kelvin to Celsius
    data_pivot = data_pivot.applymap(kelvin_to_celsius)
    
    # Reset index to turn multi-index into columns
    data_pivot.reset_index(inplace=True)
    
    # Determine the year range
    min_year = data['year'].min()
    max_year = data['year'].max()
    year_range = f"{min_year}_{max_year}"
    
    # Construct the output file path
    output_filename = f"{year_range}_smt_data.csv"
    output_file_path = os.path.join(output_directory, output_filename)
    
    # Save the transformed DataFrame to a CSV file
    data_pivot.to_csv(output_file_path, index=False)
    print(f"Transformed data saved to {output_file_path}")
    
    return output_file_path

In [32]:
import pandas as pd

def concatenate_csv_files(input_paths, output_path):
    """
    Concatenates multiple CSV files into a single CSV file and removes duplicate rows.

    Parameters:
    input_paths (list of str): List of input file paths for the CSV files to be concatenated.
    output_path (str): Output file path where the concatenated data will be saved.

    Returns:
    None
    """
    # Initialize an empty list to store dataframes
    dataframes = []
    
    # Loop through each file path in the input paths
    for path in input_paths:
        try:
            # Read the CSV file into a dataframe
            df = pd.read_csv(path)
            # Append the dataframe to the list
            dataframes.append(df)
        except Exception as e:
            print(f"Error reading {path}: {e}")
    
    try:
        # Concatenate all dataframes in the list
        concatenated_df = pd.concat(dataframes, ignore_index=True)
        # Remove duplicate rows
        concatenated_df.drop_duplicates(inplace=True)
        # Save the concatenated dataframe to the output path
        concatenated_df.to_csv(output_path, index=False)
        print(f"Concatenated file saved to {output_path}")
    except Exception as e:
        print(f"Error saving concatenated file: {e}")

In [33]:
def batch_transform(files, output_directory):
    """
    Batch transforms multiple CSV files containing brightness temperature data and concatenates the results.

    Parameters:
    files (list of str): List of input file paths.
    output_directory (str): Directory where the transformed and concatenated CSV files will be saved.

    Returns:
    None
    """
    transformed_files = []
    for file_path in files:
        try:
            data = pd.read_csv(file_path)
            transformed_file_path = transform_brightness_temperature(data, output_directory)
            transformed_files.append(transformed_file_path)
        except Exception as e:
            print(f"Error processing {file_path}: {e}")
    
    # Concatenate all transformed files into a single file
    output_file_path = os.path.join(output_directory, 'concatenated_smt_data.csv')
    concatenate_csv_files(transformed_files, output_file_path)

## Tanzania

In [38]:
# Brightness Temperature Processing
output_directory = 'tanzania_data/brightness_temperature/processed'
dr_bt = 'tanzania_data/brightness_temperature/'
f1= dr_bt + 'tz_2010_2011_brightness_temp.csv'
f2= dr_bt + 'tz_2012_2013_brightness_temp.csv'
f3= dr_bt + 'tz_2014_2015_brightness_temp.csv'
f4= dr_bt + 'tz_2016_2018_brightness_temp.csv'
f5= dr_bt + 'tz_2019_2020_brightness_temp.csv'
f6= dr_bt + 'tz_2020_2022_brightness_temp.csv'
f7= dr_bt + 'tz_2023_brightness_temp.csv'
files = [f1, f2, f3, f4, f5, f6, f7]
#batch_transform(files, output_directory)

In [10]:
dr_rain = 'tanzania_data/meteorological_data/final_data/rainfall_data/'
dr_tmax = 'tanzania_data/meteorological_data/final_data/tmax_data/'
dr_tmin = 'tanzania_data/meteorological_data/final_data/tmin_data/'

In [11]:
#rainfall concatenates
f1 = dr_rain + '2010_2011_data_rainfall.csv'
f2 = dr_rain + '2012_2013_data_rainfall.csv'
f3 = dr_rain + '2014_2015_data_rainfall.csv'
f4 = dr_rain + '2016_2018_data_rainfall.csv'
f5 = dr_rain + '2019_2020_data_rainfall.csv'
f6 = dr_rain + '2020_2022_data_rainfall.csv'
f7 = dr_rain + '2023_2023_data_rainfall.csv'
files = [f1, f2, f3, f4, f5, f6, f7]
output_path = dr_rain + 'tz_rainfall_data.csv'

#concatenate_csv_files(files,output_path )

Concatenated file saved to tanzania_data/meteorological_data/final_data/rainfall_data/tz_rainfall_data.csv


In [12]:
#tmax concatenates
f1 = dr_tmax + '2010_2011_data_tmax.csv'
f2 = dr_tmax + '2012_2013_data_tmax.csv'
f3 = dr_tmax + '2014_2015_data_tmax.csv'
f4 = dr_tmax + '2016_2018_data_tmax.csv'
f5 = dr_tmax + '2019_2020_data_tmax.csv'
f6 = dr_tmax + '2020_2022_data_tmax.csv'
f7 = dr_tmax + '2023_2023_data_tmax.csv'
files = [f1, f2, f3, f4, f5, f6, f7]
output_path = dr_tmax + 'tz_tmax_data.csv'

#concatenate_csv_files(files,output_path )

Concatenated file saved to tanzania_data/meteorological_data/final_data/tmax_data/tz_tmax_data.csv


In [14]:
#tmin concatenates
f1 = dr_tmin + '2010_2011_data_tmin.csv'
f2 = dr_tmin + '2012_2013_data_tmin.csv'
f3 = dr_tmin + '2014_2015_data_tmin.csv'
f4 = dr_tmin + '2016_2018_data_tmin.csv'
f5 = dr_tmin + '2019_2020_data_tmin.csv'
f6 = dr_tmin + '2020_2022_data_tmin.csv'
f7 = dr_tmin + '2023_2023_data_tmin.csv'
files = [f1, f2, f3, f4, f5, f6, f7]
output_path = dr_tmin + 'tz_tmin_data.csv'

#concatenate_csv_files(files,output_path )

## Rwanda

In [39]:
# Brightness Temperature Processing
output_directory = 'rwanda_data/brightness_temperature/processed'
dr_bt = 'rwanda_data/brightness_temperature/'
f1= dr_bt + 'rw_2005_2006_brightness_temp.csv'
f2= dr_bt + 'rw_2011_2012_brightness_temp.csv'
f3= dr_bt + 'rw_2014_2015_brightness_temp.csv'
f4= dr_bt + 'rw_2017_2018_brightness_temp.csv'
f5= dr_bt + 'rw_2020_2021_brightness_temp.csv'
files = [f1, f2, f3, f4, f5,]
batch_transform(files, output_directory)

Transformed data saved to rwanda_data/brightness_temperature/processed\2005_2006_smt_data.csv
Transformed data saved to rwanda_data/brightness_temperature/processed\2011_2012_smt_data.csv
Transformed data saved to rwanda_data/brightness_temperature/processed\2014_2015_smt_data.csv
Transformed data saved to rwanda_data/brightness_temperature/processed\2017_2018_smt_data.csv
Transformed data saved to rwanda_data/brightness_temperature/processed\2020_2021_smt_data.csv
Concatenated file saved to rwanda_data/brightness_temperature/processed\concatenated_tb_data.csv
