In [1]:
!pip3 install -q --upgrade pip
!pip3 install -q pandas numpy matplotlib seaborn openpyxl climateserv requests netCDF4 xarray pyproj geopandas

In [2]:
import pandas as pd
from scipy.spatial import KDTree
import numpy as np
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
import os
import netCDF4 as nc
import numpy as np
import xarray as xr
import pandas as pd
import sys
from pathlib import Path
# Calculate the path to the scripts folder relative to the current notebook.
scripts_dir = Path("../../").resolve()

# Add the scripts directory to the sys.path if it's not already there.
if str(scripts_dir) not in sys.path:
    sys.path.append(str(scripts_dir))

from scripts.preprocessing.jay_files import read_and_convert_to_geodataframe, find_nearest

ModuleNotFoundError: No module named 'scripts.preprocessing.jay_files'

In [3]:
def analyze_nasadem_file(file_path: str) -> None:
    """
    Analyze a NASADEM NetCDF file and print out its contents, such as variable names,
    dimensions, and min/max values for longitude and latitude.

    Args:
    file_path (str): The path to the NASADEM NetCDF file.
    """
    # Open the NetCDF file
    dataset = nc.Dataset(file_path, 'r')

    # Print general information about the file
    print('##################################################')
    print(f"Analyzing file: {file_path}")
    print("Variables in this file:")
    for var in dataset.variables:
        print(f" - {var}: {dataset.variables[var].dimensions}, {dataset.variables[var].shape}")

    # Check for common variables like longitude and latitude
    if 'lon' in dataset.variables and 'lat' in dataset.variables:
        lon = dataset.variables['lon'][:]
        lat = dataset.variables['lat'][:]
        print(f"Longitude range: {np.min(lon)} to {np.max(lon)}")
        print(f"Latitude range: {np.min(lat)} to {np.max(lat)}")
    
    # Close the dataset
    dataset.close()

# Paths to your files
file_paths = [
    '../../data/original_data/jay_files/Topography_Products/Curvature.nc',
    '../../data/original_data/jay_files/Topography_Products/Drainage_Density.nc',
    '../../data/original_data/jay_files/Topography_Products/Slope.nc',
    '../../data/original_data/jay_files/hydrogeology.nc'
]

# Analyze each file
for path in file_paths:
    analyze_nasadem_file(file_path=path)

##################################################
Analyzing file: ../../data/original_data/jay_files/Topography_Products/Curvature.nc
Variables in this file:
 - lon: ('lon',), (11915,)
 - lat: ('lat',), (2872,)
 - Curvatu_tif2: ('lat', 'lon'), (2872, 11915)
Longitude range: -17.078611111124776 to -13.769166666697293
Latitude range: 13.077222222232688 to 13.874722222230263
##################################################
Analyzing file: ../../data/original_data/jay_files/Topography_Products/Drainage_Density.nc
Variables in this file:
 - lon: ('lon',), (10937,)
 - lat: ('lat',), (2873,)
 - Drainage_density: ('lat', 'lon'), (2873, 10937)
Longitude range: -16.806781756020996 to -13.769003978257615
Latitude range: 13.077083333343863 to 13.874861111119214
##################################################
Analyzing file: ../../data/original_data/jay_files/Topography_Products/Slope.nc
Variables in this file:
 - lon: ('lon',), (11915,)
 - lat: ('lat',), (2872,)
 - Slope_tif2: ('lat', 'lon')

In [4]:
import xarray as xr
import pandas as pd

def nc_to_dataframe(file_path: str, var_name: str, dataset_name: str) -> pd.DataFrame:
    """
    Convert a NetCDF file to a pandas DataFrame and print the percentage of NaN values for the specified variable.

    Args:
    file_path (str): Path to the .nc file.
    var_name (str): Name of the variable in the .nc file to convert.
    dataset_name (str): Descriptive name for the dataset.

    Returns:
    pd.DataFrame: DataFrame representation of the NetCDF variable.
    """
    # Open the .nc file
    ds = xr.open_dataset(file_path)

    # Select the variable
    data = ds[var_name]

    # Convert to DataFrame
    df = data.to_dataframe().reset_index()

    # Calculate and print the percentage of NaN values
    nan_percentage = df[var_name].isna().mean() * 100
    print(f"DataFrame from {file_path} - Variable: {var_name}")
    print(df.head())  # Display the first few rows of the DataFrame
    print(f"Percentage of NaN values in '{var_name}': {nan_percentage:.2f}%")
    print('-' * 50)  # Separator

    # Store the DataFrame in a global variable
    globals()[f'df_{dataset_name}'] = df

# File paths, variable names, and dataset names
file_var_pairs = [
    ('../../data/original_data/jay_files/Topography_Products/Curvature.nc', 'Curvatu_tif2', 'curvature'),
    ('../../data/original_data/jay_files/Topography_Products/Drainage_Density.nc', 'Drainage_density', 'drainage_density'),
    ('../../data/original_data/jay_files/Topography_Products/Slope.nc', 'Slope_tif2', 'slope'),
    ('../../data/original_data/jay_files/hydrogeology.nc', 'Hydrogeo', 'Hydrogeology')
]

# Process each file
for file_path, var_name, dataset_name in file_var_pairs:
    nc_to_dataframe(file_path=file_path, var_name=var_name, dataset_name=dataset_name)

# After running this script, you can access each DataFrame directly, e.g., df_ndvi, df_ndwi, etc.

DataFrame from ../../data/original_data/jay_files/Topography_Products/Curvature.nc - Variable: Curvatu_tif2
         lat        lon  Curvatu_tif2
0  13.874722 -17.078611           NaN
1  13.874722 -17.078333           NaN
2  13.874722 -17.078056           NaN
3  13.874722 -17.077778           NaN
4  13.874722 -17.077500           NaN
Percentage of NaN values in 'Curvatu_tif2': 2.38%
--------------------------------------------------
DataFrame from ../../data/original_data/jay_files/Topography_Products/Drainage_Density.nc - Variable: Drainage_density
         lat        lon  Drainage_density
0  13.874861 -16.806782               0.0
1  13.874861 -16.806504               0.0
2  13.874861 -16.806226               0.0
3  13.874861 -16.805948               0.0
4  13.874861 -16.805671               0.0
Percentage of NaN values in 'Drainage_density': 0.00%
--------------------------------------------------
DataFrame from ../../data/original_data/jay_files/Topography_Products/Slope.nc - Variab

In [5]:
def find_nearest(df_wells: pd.DataFrame, df_curvature: pd.DataFrame) -> pd.DataFrame:
    """
    Find nearest neighbor in df_curvature for each point in df_wells
    and merge the data.
    """
    # Build KDTree for efficient nearest neighbor search
    tree = KDTree(df_curvature[['lat', 'lon']])

    merged_rows = []

    for index, well in df_wells.iterrows():
        print(f"Analyzing well {index + 1}/{len(df_wells)}: {well['Name ']}")

        # Find the nearest neighbor for this well
        distance, nearest_idx = tree.query([well['Latitude'], well['Longitude']], k=1)
        nearest_row = df_curvature.iloc[nearest_idx]

        print(f"Closest match found at index {nearest_idx}: {nearest_row['Curvatu_tif2']}")
        
        # Merge the data
        merged_row = pd.concat([well, nearest_row])
        print(f"Merged data: {merged_row}\n")

        merged_rows.append(merged_row)

    merged_df = pd.DataFrame(merged_rows).reset_index(drop=True)
    return merged_df

def save_data(merged_df: pd.DataFrame, save_path: str) -> None:
    """Save the merged data to a CSV file."""
    merged_df.to_csv(save_path, index=False)


In [6]:
df_wells = pd.read_csv('../../data/processed_data/igrac/wells_gambia_final.csv')

In [17]:
df_Hydrogeology

Unnamed: 0,lat,lon,Hydrogeo
0,21.611112,-17.52524,
1,21.611112,-17.50524,
2,21.611112,-17.48524,
3,21.611112,-17.46524,
4,21.611112,-17.44524,
...,...,...,...
132140,10.931112,-12.68524,
132141,10.931112,-12.66524,
132142,10.931112,-12.64524,
132143,10.931112,-12.62524,


In [14]:
df_wells
rows_with_nan = df_wells[df_wells.isnull().any(axis=1)]
rows_with_nan

Unnamed: 0,ID,Name,Latitude,Longitude,Ground surface elevation,DepthToGroundwater,Curvatu_tif2,Drainage_density,Slope_tif2,Hydrogeo,NASADEM_HGT
15,NWP20003,Mariama Kunda OB2,13.358333,-16.726389,15.47,0-7,3888000000.0,84.816689,0.46455,,19.0
32,NWP20002,Brufut OB7,13.395665,-16.743327,17.53,0-7,-0.0,41.551628,2.500146,,17.0


In [21]:
import pandas as pd
from scipy.spatial import KDTree
import numpy as np

# Assuming df_wells and df_Hidrogeology are already defined

def find_nearest_hydrogeo(well_lat: float, well_lon: float, kdtree, hydrogeo_coords, hydrogeo_values, max_neighbors=10) -> float:
    """
    Find the nearest non-NaN hydrogeo value for given well coordinates.

    Args:
        well_lat (float): Latitude of the well.
        well_lon (float): Longitude of the well.
        kdtree (KDTree): KDTree for nearest neighbor search.
        hydrogeo_coords (np.array): Array of lat-lon coordinates for KDTree.
        hydrogeo_values (np.array): Array of hydrogeo values corresponding to the KDTree.
        max_neighbors (int): Maximum number of neighbors to consider.

    Returns:
        float: The nearest non-NaN hydrogeo value.
    """
    # Query a set of nearest neighbors
    distances, indices = kdtree.query([well_lat, well_lon], k=max_neighbors)
    
    for index in indices:
        if not np.isnan(hydrogeo_values[index]):
            return hydrogeo_values[index]
    
    # Return NaN if all nearest neighbors have NaN values
    return np.nan

In [22]:
# Prepare data for KDTree
hydrogeo_coords = df_Hydrogeology[['lat', 'lon']].values
hydrogeo_values = df_Hydrogeology['Hydrogeo'].values

# Create KDTree
kdtree = KDTree(hydrogeo_coords)


In [24]:
# Update NaN values in df_wells
for index, row in df_wells[df_wells['Hydrogeo'].isnull()].iterrows():
    nearest_hydrogeo = find_nearest_hydrogeo(row['Latitude'], row['Longitude'], kdtree, hydrogeo_coords, hydrogeo_values)
    df_wells.at[index, 'Hydrogeo'] = nearest_hydrogeo

In [26]:
df_wells

# save df_welss as csv
df_wells.to_csv('../../data/processed_data/igrac/wells_gambia_final.csv', index=False)

In [19]:
df_wells.columns

Index(['ID', 'Name ', 'Status', 'Latitude', 'Longitude',
       'Ground surface elevation', 'DepthToGroundwater',
       'GroundwaterProductivity', 'GroundwaterStorage'],
      dtype='object')

In [21]:
merged_df = find_nearest(df_wells=df_wells, df_curvature=df_curvature)

save_data(merged_df=merged_df, save_path='../../data/processed_data/igrac/wells_gambia_updated_jay.csv')

Analyzing well 1/47: Yoro Beri Kunda
Closest match found at index 16141267: -1296000000.0
Merged data: ID                               0000058001
Name                        Yoro Beri Kunda
Status                               Active
Latitude                          13.498482
Longitude                        -14.757303
Ground surface elevation               17.4
DepthToGroundwater                      0-7
GroundwaterProductivity                 >20
GroundwaterStorage                  >50,000
lat                               13.498611
lon                              -14.757222
Curvatu_tif2                  -1296000000.0
dtype: object

Analyzing well 2/47: Pakaliba
Closest match found at index 15341216: 3888000000.0
Merged data: ID                            0000035001
Name                            Pakaliba
Status                            Active
Latitude                       13.517245
Longitude                     -15.242125
Ground surface elevation           13.34
DepthToGround

In [22]:
df_wells = pd.read_csv('../../data/processed_data/igrac/wells_gambia_updated_jay.csv')

In [24]:
df_wells.head()

Unnamed: 0,ID,Name,Status,Latitude,Longitude,Ground surface elevation,DepthToGroundwater,GroundwaterProductivity,GroundwaterStorage,lat,lon,Curvatu_tif2
0,58001,Yoro Beri Kunda,Active,13.498482,-14.757303,17.4,0-7,>20,">50,000",13.498611,-14.757222,-1296000000.0
1,35001,Pakaliba,Active,13.517245,-15.242125,13.34,0-7,>20,">50,000",13.517222,-15.242222,3888000000.0
2,24001,Somita,Active,13.208963,-16.298932,27.31,0-7,>20,">50,000",13.208889,-16.298889,2592000000.0
3,23001,Taneneh,Active,13.233057,-16.504728,14.32,0-7,>20,">50,000",13.233056,-16.504722,7776000000.0
4,20007,Mandinari OB3,Active,13.362761,-16.637219,30.47,7-25,>20,">50,000",13.362778,-16.637222,-0.0


In [25]:
df_wells = df_wells.drop(columns=['lat', 'lon'])

In [26]:
df_wells = df_wells.rename(columns={'Name ': 'Name'})

In [38]:
df_wells.columns

Index(['ID', 'Name', 'Status', 'Latitude', 'Longitude',
       'Ground surface elevation', 'DepthToGroundwater',
       'GroundwaterProductivity', 'GroundwaterStorage', 'Curvatu_tif2'],
      dtype='object')

df_slope

In [35]:
df_drainage_density

Unnamed: 0,lat,lon,Drainage_density
0,13.874861,-16.806782,0.000000
1,13.874861,-16.806504,0.000000
2,13.874861,-16.806226,0.000000
3,13.874861,-16.805948,0.000000
4,13.874861,-16.805671,0.000000
...,...,...,...
31421996,13.077083,-13.770115,10.032833
31421997,13.077083,-13.769837,10.017525
31421998,13.077083,-13.769560,9.998237
31421999,13.077083,-13.769282,9.974904


In [37]:
from scipy.spatial import KDTree
import pandas as pd

def find_nearest_drainage_density(df_wells: pd.DataFrame, df_drainage_density: pd.DataFrame) -> pd.DataFrame:
    """
    Find nearest neighbor in df_drainage_density for each point in df_wells
    and merge the data.
    """
    # Build KDTree for efficient nearest neighbor search
    tree = KDTree(df_drainage_density[['lat', 'lon']])

    merged_rows = []

    for index, well in df_wells.iterrows():
        well_name = well.get('Name', 'Unknown')  # Fallback to 'Unknown' if 'Name' does not exist
        print(f"Analyzing well {index + 1}/{len(df_wells)}: {well_name}")

        # Find the nearest neighbor for this well
        distance, nearest_idx = tree.query([well['Latitude'], well['Longitude']], k=1)
        nearest_row = df_drainage_density.iloc[nearest_idx]

        print(f"Closest match found at index {nearest_idx}: {nearest_row['Drainage_density']}")

        # Merge the data
        merged_row = well.to_dict()
        merged_row.update(nearest_row.to_dict())
        merged_rows.append(merged_row)

        print(f"Merged data: {merged_row}\n")

    merged_df = pd.DataFrame(merged_rows)
    return merged_df

def save_data(merged_df: pd.DataFrame, save_path: str) -> None:
    """Save the merged data to a CSV file."""
    merged_df.to_csv(save_path, index=False)


In [39]:
merged_df = find_nearest_drainage_density(df_wells=df_wells, df_drainage_density=df_drainage_density)
save_data(merged_df=merged_df, save_path='../../data/processed_data/igrac/wells_gambia_updated_jay_2.csv')

Analyzing well 1/47: Yoro Beri Kunda
Closest match found at index 14827013: 88.64712524414062
Merged data: {'ID': '0000058001', 'Name': 'Yoro Beri Kunda', 'Status': 'Active', 'Latitude': 13.4984817969726, 'Longitude': -14.7573025092349, 'Ground surface elevation': 17.4, 'DepthToGroundwater': '0-7', 'GroundwaterProductivity': '>20', 'GroundwaterStorage': '>50,000', 'Curvatu_tif2': -1296000000.0, 'lat': 13.49847222223147, 'lon': -14.757337311587943, 'Drainage_density': 88.64712524414062}

Analyzing well 2/47: Pakaliba
Closest match found at index 14081552: 99.22616577148438
Merged data: {'ID': '0000035001', 'Name': 'Pakaliba', 'Status': 'Active', 'Latitude': 13.5172450917462, 'Longitude': -15.242125038732, 'Ground surface elevation': 13.34, 'DepthToGroundwater': '0-7', 'GroundwaterProductivity': '>20', 'GroundwaterStorage': '>50,000', 'Curvatu_tif2': 3888000000.0, 'lat': 13.517361111120302, 'lon': -15.24205953380869, 'Drainage_density': 99.22616577148438}

Analyzing well 3/47: Somita
Clo

In [40]:
df_wells = pd.read_csv('../../data/processed_data/igrac/wells_gambia_updated_jay_2.csv')

In [43]:
df_wells = df_wells.drop(columns=['lat', 'lon'])

In [44]:
df_wells.head()

Unnamed: 0,ID,Name,Status,Latitude,Longitude,Ground surface elevation,DepthToGroundwater,GroundwaterProductivity,GroundwaterStorage,Curvatu_tif2,Drainage_density
0,58001,Yoro Beri Kunda,Active,13.498482,-14.757303,17.4,0-7,>20,">50,000",-1296000000.0,88.647125
1,35001,Pakaliba,Active,13.517245,-15.242125,13.34,0-7,>20,">50,000",3888000000.0,99.226166
2,24001,Somita,Active,13.208963,-16.298932,27.31,0-7,>20,">50,000",2592000000.0,91.167213
3,23001,Taneneh,Active,13.233057,-16.504728,14.32,0-7,>20,">50,000",7776000000.0,61.539433
4,20007,Mandinari OB3,Active,13.362761,-16.637219,30.47,7-25,>20,">50,000",-0.0,49.994442


In [48]:
df_slope

Unnamed: 0,lat,lon,Slope_tif2
0,13.874722,-17.078611,
1,13.874722,-17.078333,
2,13.874722,-17.078056,
3,13.874722,-17.077778,
4,13.874722,-17.077500,
...,...,...,...
34219875,13.077222,-13.770278,
34219876,13.077222,-13.770000,
34219877,13.077222,-13.769722,
34219878,13.077222,-13.769444,


In [50]:
from scipy.spatial import KDTree
import pandas as pd

def find_nearest_slope(df_wells: pd.DataFrame, df_slope: pd.DataFrame) -> pd.DataFrame:
    """
    Find nearest neighbor in df_slope for each point in df_wells
    and merge the data.
    """
    # Build KDTree for efficient nearest neighbor search
    tree = KDTree(df_slope[['lat', 'lon']])

    merged_rows = []

    for index, well in df_wells.iterrows():
        well_name = well.get('Name', 'Unknown')  # Fallback to 'Unknown' if 'Name' does not exist
        print(f"Analyzing well {index + 1}/{len(df_wells)}: {well_name}")

        # Find the nearest neighbor for this well
        distance, nearest_idx = tree.query([well['Latitude'], well['Longitude']], k=1)
        nearest_row = df_slope.iloc[nearest_idx]

        print(f"Closest match found at index {nearest_idx}: {nearest_row['Slope_tif2']}")

        # Merge the data
        merged_row = well.to_dict()
        merged_row.update(nearest_row.to_dict())
        merged_rows.append(merged_row)

        print(f"Merged data: {merged_row}\n")

    merged_df = pd.DataFrame(merged_rows)
    return merged_df

In [51]:
# Usage
merged_df = find_nearest_slope(df_wells=df_wells, df_slope=df_slope)
save_data(merged_df=merged_df, save_path='../../data/processed_data/igrac/wells_gambia_updated_jay_3.csv')


Analyzing well 1/47: Yoro Beri Kunda
Closest match found at index 16141267: 3.281345844268799
Merged data: {'ID': '0000058001', 'Name': 'Yoro Beri Kunda', 'Status': 'Active', 'Latitude': 13.4984817969726, 'Longitude': -14.7573025092349, 'Ground surface elevation': 17.4, 'DepthToGroundwater': '0-7', 'GroundwaterProductivity': '>20', 'GroundwaterStorage': '>50,000', 'Curvatu_tif2': -1296000000.0, 'Drainage_density': 88.64712524414062, 'lat': 13.498611111120296, 'lon': -14.757222222249844, 'Slope_tif2': 3.281345844268799}

Analyzing well 2/47: Pakaliba
Closest match found at index 15341216: 3.667654514312744
Merged data: {'ID': '0000035001', 'Name': 'Pakaliba', 'Status': 'Active', 'Latitude': 13.5172450917462, 'Longitude': -15.242125038732, 'Ground surface elevation': 13.34, 'DepthToGroundwater': '0-7', 'GroundwaterProductivity': '>20', 'GroundwaterStorage': '>50,000', 'Curvatu_tif2': 3888000000.0, 'Drainage_density': 99.22616577148438, 'lat': 13.51722222223135, 'lon': -15.242222222248369

In [52]:
df_wells = pd.read_csv('../../data/processed_data/igrac/wells_gambia_updated_jay_3.csv')

In [54]:
df_wells = df_wells.drop(columns=['lat', 'lon'])

In [55]:
df_wells.head()

Unnamed: 0,ID,Name,Status,Latitude,Longitude,Ground surface elevation,DepthToGroundwater,GroundwaterProductivity,GroundwaterStorage,Curvatu_tif2,Drainage_density,Slope_tif2
0,58001,Yoro Beri Kunda,Active,13.498482,-14.757303,17.4,0-7,>20,">50,000",-1296000000.0,88.647125,3.281346
1,35001,Pakaliba,Active,13.517245,-15.242125,13.34,0-7,>20,">50,000",3888000000.0,99.226166,3.667655
2,24001,Somita,Active,13.208963,-16.298932,27.31,0-7,>20,">50,000",2592000000.0,91.167213,0.656959
3,23001,Taneneh,Active,13.233057,-16.504728,14.32,0-7,>20,">50,000",7776000000.0,61.539433,2.500146
4,20007,Mandinari OB3,Active,13.362761,-16.637219,30.47,7-25,>20,">50,000",-0.0,49.994442,1.768431


In [56]:
df_Hydrogeology

Unnamed: 0,lat,lon,Hydrogeo
0,21.611112,-17.52524,
1,21.611112,-17.50524,
2,21.611112,-17.48524,
3,21.611112,-17.46524,
4,21.611112,-17.44524,
...,...,...,...
132140,10.931112,-12.68524,
132141,10.931112,-12.66524,
132142,10.931112,-12.64524,
132143,10.931112,-12.62524,


In [57]:
from scipy.spatial import KDTree
import pandas as pd

def find_nearest_hydrogeology(df_wells: pd.DataFrame, df_hydrogeology: pd.DataFrame) -> pd.DataFrame:
    """
    Find nearest neighbor in df_hydrogeology for each point in df_wells
    and merge the data.
    """
    # Build KDTree for efficient nearest neighbor search
    tree = KDTree(df_hydrogeology[['lat', 'lon']])

    merged_rows = []

    for index, well in df_wells.iterrows():
        well_name = well.get('Name', 'Unknown')  # Fallback to 'Unknown' if 'Name' does not exist
        print(f"Analyzing well {index + 1}/{len(df_wells)}: {well_name}")

        # Find the nearest neighbor for this well
        distance, nearest_idx = tree.query([well['Latitude'], well['Longitude']], k=1)
        nearest_row = df_hydrogeology.iloc[nearest_idx]

        print(f"Closest match found at index {nearest_idx}: {nearest_row['Hydrogeo']}")

        # Merge the data
        merged_row = well.to_dict()
        merged_row.update(nearest_row.to_dict())
        merged_rows.append(merged_row)

        print(f"Merged data: {merged_row}\n")

    merged_df = pd.DataFrame(merged_rows)
    return merged_df

In [59]:
# Usage
merged_df = find_nearest_hydrogeology(df_wells=df_wells, df_hydrogeology=df_Hydrogeology)
save_data(merged_df=merged_df, save_path='../../data/processed_data/igrac/wells_gambia_updated_jay_4.csv')


Analyzing well 1/47: Yoro Beri Kunda
Closest match found at index 100420: 3.0
Merged data: {'ID': '0000058001', 'Name': 'Yoro Beri Kunda', 'Status': 'Active', 'Latitude': 13.4984817969726, 'Longitude': -14.7573025092349, 'Ground surface elevation': 17.4, 'DepthToGroundwater': '0-7', 'GroundwaterProductivity': '>20', 'GroundwaterStorage': '>50,000', 'Curvatu_tif2': -1296000000.0, 'Drainage_density': 88.64712524414062, 'Slope_tif2': 3.281345844268799, 'lat': 13.491112000000175, 'lon': -14.765240000000059, 'Hydrogeo': 3.0}

Analyzing well 2/47: Pakaliba
Closest match found at index 100149: 3.0
Merged data: {'ID': '0000035001', 'Name': 'Pakaliba', 'Status': 'Active', 'Latitude': 13.5172450917462, 'Longitude': -15.242125038732, 'Ground surface elevation': 13.34, 'DepthToGroundwater': '0-7', 'GroundwaterProductivity': '>20', 'GroundwaterStorage': '>50,000', 'Curvatu_tif2': 3888000000.0, 'Drainage_density': 99.22616577148438, 'Slope_tif2': 3.667654514312744, 'lat': 13.511112000000175, 'lon': 

In [60]:
df_wells = pd.read_csv('../../data/processed_data/igrac/wells_gambia_updated_jay_4.csv')

In [62]:
df_wells = df_wells.drop(columns=['lat', 'lon','GroundwaterStorage','GroundwaterProductivity','Status'])

In [63]:
df_wells

Unnamed: 0,ID,Name,Latitude,Longitude,Ground surface elevation,DepthToGroundwater,Curvatu_tif2,Drainage_density,Slope_tif2,Hydrogeo
0,0000058001,Yoro Beri Kunda,13.498482,-14.757303,17.4,0-7,-1296000000.0,88.647125,3.281346,3.0
1,0000035001,Pakaliba,13.517245,-15.242125,13.34,0-7,3888000000.0,99.226166,3.667655,3.0
2,0000024001,Somita,13.208963,-16.298932,27.31,0-7,2592000000.0,91.167213,0.656959,5.0
3,0000023001,Taneneh,13.233057,-16.504728,14.32,0-7,7776000000.0,61.539433,2.500146,5.0
4,0000020007,Mandinari OB3,13.362761,-16.637219,30.47,7-25,-0.0,49.994442,1.768431,5.0
5,0000020006,Kerewan OB5,13.36081,-16.625421,23.47,7-25,-0.0,51.52953,1.184233,5.0
6,0000022001,Baffuloto OB4,13.329997,-16.658777,28.62,0-7,-5184000000.0,69.567108,3.281346,5.0
7,0000020005,Sinchu Sore OB1,13.384752,-16.679721,25.57,7-25,2592000000.0,58.679554,0.929039,5.0
8,0000020009,Wellingara EX1,13.394916,-16.664077,20.48,0-7,2592000000.0,92.571068,0.985384,5.0
9,0000000001,Bijilo OB6,13.420804,-16.716666,19.47,0-7,-6480000000.0,57.568844,2.785167,5.0


In [64]:
df_wells.to_csv('../../data/processed_data/igrac/wells_gambia_final.csv', index=False)

In [65]:
df_wells = pd.read_csv('../../data/processed_data/igrac/wells_gambia_final.csv')

In [66]:
df_wells

Unnamed: 0,ID,Name,Latitude,Longitude,Ground surface elevation,DepthToGroundwater,Curvatu_tif2,Drainage_density,Slope_tif2,Hydrogeo
0,0000058001,Yoro Beri Kunda,13.498482,-14.757303,17.4,0-7,-1296000000.0,88.647125,3.281346,3.0
1,0000035001,Pakaliba,13.517245,-15.242125,13.34,0-7,3888000000.0,99.226166,3.667655,3.0
2,0000024001,Somita,13.208963,-16.298932,27.31,0-7,2592000000.0,91.167213,0.656959,5.0
3,0000023001,Taneneh,13.233057,-16.504728,14.32,0-7,7776000000.0,61.539433,2.500146,5.0
4,0000020007,Mandinari OB3,13.362761,-16.637219,30.47,7-25,-0.0,49.994442,1.768431,5.0
5,0000020006,Kerewan OB5,13.36081,-16.625421,23.47,7-25,-0.0,51.52953,1.184233,5.0
6,0000022001,Baffuloto OB4,13.329997,-16.658777,28.62,0-7,-5184000000.0,69.567108,3.281346,5.0
7,0000020005,Sinchu Sore OB1,13.384752,-16.679721,25.57,7-25,2592000000.0,58.679554,0.929039,5.0
8,0000020009,Wellingara EX1,13.394916,-16.664077,20.48,0-7,2592000000.0,92.571068,0.985384,5.0
9,0000000001,Bijilo OB6,13.420804,-16.716666,19.47,0-7,-6480000000.0,57.568844,2.785167,5.0
