# Reproject Coordinates

In [1]:
import rasterio
from rasterio.warp import reproject, Resampling
import numpy as np

In [2]:
import rioxarray
rds = rioxarray.open_rasterio("data/raw/NLCD_MA_2016.tif")
rds = rds.squeeze().drop_vars("spatial_ref").drop_vars("band")
rds.name = "data"
res = rds.to_dataframe().reset_index()
res.head(2)

  rds = rds.squeeze().drop("spatial_ref").drop("band")


Unnamed: 0,y,x,data
0,3154472.0,103887.5875,0
1,3154472.0,103986.0125,0


In [8]:
[(x,y) for (x, y) in zip(list(res["x"][0:2]), list(res["y"][0:2]))]

[(103887.58749999998, 3154472.0374999996),
 (103986.01249999998, 3154472.0374999996)]

In [11]:
from pyproj import Transformer
with rasterio.open("data/raw/NLCD_MA_2016.tif") as dataset:
    examples = [(x,y) for (x, y) in zip(list(res["x"][0:2]), list(res["y"][0:2]))]
    transformer = Transformer.from_crs(dataset.crs, "epsg:4326")
    coords = [transformer.transform(x, y) for x,y in examples]
coords

[(42.885686939656495, -73.56073276411749),
 (42.88569346255544, -73.5603656513047)]

In [2]:
def reproject_tif_to_latlon(input_tif_path, output_tif_path, target_crs='EPSG:4326'):
    """
    Reprojects a TIFF file to a target Coordinate Reference System (CRS),
    typically WGS84 (latitude and longitude).

    Args:
        input_tif_path (str): Path to the input TIFF file.
        output_tif_path (str): Path where the reprojected TIFF will be saved.
        target_crs (str): The target CRS in a format recognized by rasterio (e.g., 'EPSG:4326').
                          Default is WGS84 Lat/Lon.
    """
    try:
        with rasterio.open(input_tif_path) as src:
            # Check if source CRS is already the target CRS
            if src.crs == target_crs:
                print(f"Input TIFF '{input_tif_path}' is already in {target_crs}. Skipping re-projection.")
                return

            print(f"Reprojecting '{input_tif_path}' from {src.crs} to {target_crs}...")

            # Define the target resolution. This is important for reprojection.
            # You might want to define this based on your data or desired output.
            # For geographic CRS, resolution is in degrees. A common rough estimate might be:
            # (src.res[0] / meters_per_degree_at_equator_approx) or determine new_transform based on desired pixel size in degrees.
            # For simplicity, we'll let reproject calculate the new transform and resolution.
            # You can explicitly set new_transform and new_width/new_height if you need specific output grid.

            # Determine the new profile for the output TIFF
            # Start with a copy of the source profile
            profile = src.profile
            profile.update({
                'crs': target_crs,
                'transform': src.transform, # Will be updated by reproject
                'width': src.width,         # Will be updated by reproject
                'height': src.height,       # Will be updated by reproject
                'compress': 'lzw' # Optional: add compression for smaller file size
            })

            # Calculate the new transform and dimensions needed for the target CRS
            # This is critical for getting correct output extent and resolution
            destination, transform_out = reproject(
                source=rasterio.band(src, 1), # Reprojecting the first band
                destination=np.empty((src.height, src.width), dtype=src.dtype), # Placeholder array for output
                src_transform=src.transform,
                src_crs=src.crs,
                dst_transform=src.transform, # This will be adjusted by reproject
                dst_crs=target_crs,
                resampling=Resampling.nearest # Or Resampling.bilinear, Resampling.cubic, etc.
            )
            
            # Update the profile with the calculated transform and dimensions
            profile['transform'] = transform_out
            profile['width'] = destination.shape[1] # New width
            profile['height'] = destination.shape[0] # New height

            # Write the reprojected data to the new TIFF file
            with rasterio.open(output_tif_path, 'w', **profile) as dst:
                reproject(
                    source=rasterio.band(src, 1),  # The source band (can be multiple bands)
                    destination=rasterio.band(dst, 1), # The destination band
                    src_transform=src.transform,
                    src_crs=src.crs,
                    dst_transform=profile['transform'], # Use the calculated transform
                    dst_crs=target_crs,
                    resampling=Resampling.nearest, # Resampling method
                    num_threads=2 # Optional: for multiprocessing if needed
                )
            
            print(f"Successfully reprojected and saved to '{output_tif_path}'.")

    except rasterio.errors.RasterioIOError as e:
        print(f"Error accessing TIFF file: {e}")
        print("Please ensure the input file path is correct and the file is a valid TIFF.")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")

In [3]:
input_file = 'data/raw/NLCD_MA_2016.tif'
output_file = 'data/clean/NLCD_MA_2016_WGS84.tif'
reproject_tif_to_latlon(input_file, output_file)

Reprojecting 'data/raw/NLCD_MA_2016.tif' from EPSG:2249 to EPSG:4326...
An unexpected error occurred: 'DatasetReader' object has no attribute 'dtype'


In [None]:
# --- Example Usage ---

# 1. Create a dummy TIFF file with a Projected CRS (e.g., UTM Zone 18N, EPSG:32618)
#    This will be our input file for reprojection.
def create_dummy_utm_tif(filename, rows, cols):
    dummy_data = np.random.randint(0, 256, size=(rows, cols), dtype=np.uint8)
    utm_crs = "EPSG:32618" # WGS 84 / UTM zone 18N
    # Example transform: origin at (300000, 5000000) UTM, pixel resolution 10x10 meters
    utm_transform = rasterio.transform.from_origin(300000, 5000000, 10, 10)
    profile = {
        'driver': 'GTiff',
        'height': rows,
        'width': cols,
        'count': 1,
        'dtype': dummy_data.dtype,
        'crs': utm_crs,
        'transform': utm_transform,
    }
    with rasterio.open(filename, 'w', **profile) as dst:
        dst.write(dummy_data, 1)
    print(f"Dummy input TIFF '{filename}' created successfully with CRS {utm_crs}.")

try:
    input_file = 'input_utm.tif'
    output_file = 'output_latlon.tif'
    dummy_rows, dummy_cols = 500, 500 # A reasonable size for demonstration

    create_dummy_utm_tif(input_file, dummy_rows, dummy_cols)

    # Perform the reprojection
    reproject_tif_to_latlon(input_file, output_file)

    # Optional: Verify the output file's CRS
    with rasterio.open(output_file) as reprojected_src:
        print(f"\nVerification: Reprojected file CRS is {reprojected_src.crs}")
        print(f"Reprojected file dimensions: {reprojected_src.height} rows, {reprojected_src.width} columns")
        print(f"Reprojected file transform:\n{reprojected_src.transform}")

except ImportError:
    print("\nERROR: Please install 'rasterio' and 'numpy'.")
    print("You can install them using: pip install rasterio numpy")
except Exception as e:
    
    print(f"\nAn unexpected error occurred during reprojection process: {e}")

In [12]:
import pandas as pd

# 1. Define your sample DataFrame
data = {
    'X': [10, 20, 30, 40],
    'Y': [1, 2, 3, 4]
}
df = pd.DataFrame(data)

print("Original DataFrame:")
print(df)
print("-" * 30)

# 2. Define your function f that takes X and Y and outputs Z and W
def f(x_val, y_val):
    """
    Sample function that takes two inputs and returns two outputs.
    For demonstration, Z = X + Y and W = X * Y.
    """
    z_output = x_val + y_val
    w_output = x_val * y_val
    return z_output, w_output

# 3. Apply the function to the DataFrame rows and add results as new columns

# Method A: Using apply with a lambda function and unpacking the result
# This is often the most concise way when the function returns a tuple/list
df[['Z', 'W']] = df.apply(lambda row: f(row['X'], row['Y']), axis=1, result_type='expand')

# Explanation of parameters:
# - lambda row: f(row['X'], row['Y']): This is a small anonymous function
#   that takes a 'row' (which is a Series for each row of the DataFrame).
#   It accesses the 'X' and 'Y' values from that row and passes them to function f.
# - axis=1: Tells `apply` to iterate over DataFrame rows (i.e., apply the function row-wise).
# - result_type='expand': This is crucial when your function returns multiple values
#   (like a tuple or list). It expands these results into separate columns.
#   Without 'expand', it would try to put the (Z, W) tuple into a single column.
# - df[['Z', 'W']]: Assigns the two expanded results directly to new columns named 'Z' and 'W'.


print("\nDataFrame after adding Z and W columns:")
print(df)
print("-" * 30)

# Method B (Alternative): If you prefer using .loc or explicit list comprehension
# This method can be less performant for very large DataFrames than Method A.
# However, it might be more readable for some, or useful if you're not expanding.

# Recreate original DataFrame for demonstration of Method B
df_b = pd.DataFrame(data)
z_results = []
w_results = []

for index, row in df_b.iterrows():
    z_val, w_val = f(row['X'], row['Y'])
    z_results.append(z_val)
    w_results.append(w_val)

df_b['Z'] = z_results
df_b['W'] = w_results

print("\nDataFrame using Method B (iterrows and list append):")
print(df_b)
print("-" * 30)

Original DataFrame:
    X  Y
0  10  1
1  20  2
2  30  3
3  40  4
------------------------------

DataFrame after adding Z and W columns:
    X  Y   Z    W
0  10  1  11   10
1  20  2  22   40
2  30  3  33   90
3  40  4  44  160
------------------------------

DataFrame using Method B (iterrows and list append):
    X  Y   Z    W
0  10  1  11   10
1  20  2  22   40
2  30  3  33   90
3  40  4  44  160
------------------------------


In [16]:
row_ind = np.arange(6)
col_ind = np.arange(6)
row_ind, col_ind

(array([0, 1, 2, 3, 4, 5]), array([0, 1, 2, 3, 4, 5]))

In [20]:
import random
random.shuffle(col_ind)
col_ind

array([4, 0, 2, 1, 5, 3])