In [1]:
pip install --upgrade geopandas

Collecting pandas>=1.4.0 (from geopandas)
  Using cached pandas-2.2.3-cp310-cp310-macosx_11_0_arm64.whl.metadata (89 kB)
Using cached pandas-2.2.3-cp310-cp310-macosx_11_0_arm64.whl (11.3 MB)
Installing collected packages: pandas
  Attempting uninstall: pandas
    Found existing installation: pandas 1.3.5
    Uninstalling pandas-1.3.5:
      Successfully uninstalled pandas-1.3.5
Successfully installed pandas-2.2.3
Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install pandas==1.3.5

Collecting pandas==1.3.5
  Using cached pandas-1.3.5-cp310-cp310-macosx_11_0_arm64.whl.metadata (12 kB)
Using cached pandas-1.3.5-cp310-cp310-macosx_11_0_arm64.whl (10.3 MB)
Installing collected packages: pandas
  Attempting uninstall: pandas
    Found existing installation: pandas 2.2.3
    Uninstalling pandas-2.2.3:
      Successfully uninstalled pandas-2.2.3
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
geopandas 1.0.1 requires pandas>=1.4.0, but you have pandas 1.3.5 which is incompatible.
xarray 2023.6.0 requires pandas>=1.4, but you have pandas 1.3.5 which is incompatible.[0m[31m
[0mSuccessfully installed pandas-1.3.5
Note: you may need to restart the kernel to use updated packages.


In [3]:
pip install rasterio

Note: you may need to restart the kernel to use updated packages.


In [4]:
import os
import glob
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
import rasterio
from rasterio.transform import rowcol

# Define the root directory containing yearly folders
# 수정 1 : sentinel root 경로 설정
root_directory = './Seoul/2023/'

# Path to your CSV file
# 수정 2 : airkorea 측정소 코드 
csv_file_path = './Station_address/Seoul_address.csv'

# Read the CSV file using pandas
df = pd.read_csv(csv_file_path)

# Create a GeoDataFrame from the DataFrame
gdf = gpd.GeoDataFrame(
    df,
    geometry=gpd.points_from_xy(df.Longitude, df.Latitude),
    crs="EPSG:4326"  # Assuming the coordinates are in WGS84
)

# Transform the GeoDataFrame to the target CRS (EPSG:32652)
gdf = gdf.to_crs("EPSG:32652")
print(gdf)

def extract_values_from_image(image_path, gdf, column_name):
    with rasterio.open(image_path) as dataset:
        print(f"Processing {image_path}")

        # Ensure the GeoDataFrame has the same CRS as the image
        if gdf.crs != dataset.crs:
            gdf = gdf.to_crs(dataset.crs)

        # Loop through each point in the GeoDataFrame
        for idx, row in gdf.iterrows():
            point = row.geometry
            x, y = point.x, point.y

            # Get the row and column of the transformed coordinate
            try:
                row_idx, col_idx = rowcol(dataset.transform, x, y)

                # Read the value at the row and column
                value = dataset.read(1)[row_idx, col_idx]

                # Add the value to the GeoDataFrame
                gdf.loc[idx, column_name] = value
            except IndexError:
                print(f"Point {point} is outside the bounds of the raster {image_path}")
                gdf.loc[idx, column_name] = None

    return gdf

# Band names corresponding to the files
band_names = ["AOT", "B1", "B2", "B3", "B4", "B5", "B6", "B7", "B8A", "B11", "B12", "SCL", "TCI", "WVP"]

# Initialize a list to store processed data
output_data = []

# Recursively search for .jp2 files in the root directory
for subdir, _, _ in os.walk(root_directory):
    jp2_dir = os.path.join(subdir, 'GRANULE')
    if os.path.exists(jp2_dir):
        jp2_files = glob.glob(os.path.join(jp2_dir, '**/IMG_DATA/R20m/*.jp2'), recursive=True)
        if jp2_files:
            # Extract date from folder name (assuming the date is part of the SAFE folder structure)
            safe_folder = [folder for folder in subdir.split(os.sep) if folder.endswith(".SAFE")]
            date = safe_folder[0].split('_')[2][:8] if safe_folder else "Unknown"
            
            # Copy the original GeoDataFrame
            temp_gdf = gdf.copy()

            # Process each band file
            for idx, image_path in enumerate(jp2_files):
                if idx < len(band_names):
                    band_name = band_names[idx]
                    temp_gdf = extract_values_from_image(image_path, temp_gdf, band_name)

            # Append the date as a column
            temp_gdf["Date"] = date

            # Add to the list of dataframes
            output_data.append(temp_gdf.copy())

# Combine all dataframes into a single dataframe
final_gdf = pd.concat(output_data, ignore_index=True)

# Sort the dataframe by Date
final_gdf = final_gdf.sort_values(by="Date")

# Save the GeoDataFrame to a CSV file
# 수정 3 : band값 저장 경로 설정
output_csv_path = './korea_bandvalue/Seoul/2023_band.csv'
final_gdf.to_csv(output_csv_path, index=False)

# Print the absolute path to confirm where the file was saved
absolute_path = os.path.abspath(output_csv_path)
print(f"CSV file saved at: {absolute_path}")


                              주소   Latitude   Longitude  \
0                  서울 중구 덕수궁길 15  37.564308  126.974441   
1                서울 용산구 한강대로 405  37.552395  126.971069   
2               서울 종로구 종로35가길 19  37.572051  127.005026   
3                 서울 중구 청계천로 184  37.568617  126.998189   
4                  서울 종로구 종로 169  37.570908  126.996558   
5                서울 용산구 한남대로 136  37.540169  127.004847   
6             서울특별시 광진구 광나루로 571  37.545165  127.095422   
7                서울 성동구 뚝섬로3길 18  37.541996  127.049653   
8                서울 성동구 강변북로 257  37.538879  127.041791   
9                서울 중랑구 용마산로 369  37.584901  127.094035   
10            서울 동대문구 천호대로13길 43  37.575911  127.028966   
11                 서울 동대문구 홍릉로 1  37.580436  127.044497   
12               서울 성북구 삼양로2길 70  37.606676  127.027289   
13            서울 성북구 돈암동 8-164번지  37.603501  127.025965   
14              서울 도봉구 시루봉로2길 34  37.654310  127.029005   
15                서울 은평구 진흥로 215  37.610183  126.933201 