In [None]:
import requests

# Replace with your API key and desired coordinates
API_KEY = "69849d8a1edffe060dd0e9ace4083847"
lat = 17.3850   # Hyderabad latitude
lon = 78.4867   # Hyderabad longitude

url = f"http://api.openweathermap.org/data/2.5/air_pollution?lat={lat}&lon={lon}&appid={API_KEY}"

response = requests.get(url)

if response.status_code == 200:
    data = response.json()
    air = data['list'][0]['components']
    print("Air Pollution Data:")
    for pollutant, value in air.items():
        print(f"{pollutant}: {value} µg/m³")
else:
    print(f"Error {response.status_code}: {response.text}")


Air Pollution Data:
co: 91.77 µg/m³
no: 0.02 µg/m³
no2: 3.63 µg/m³
o3: 54.99 µg/m³
so2: 4.66 µg/m³
pm2_5: 2.19 µg/m³
pm10: 2.94 µg/m³
nh3: 2.63 µg/m³


In [None]:
# Step 1: Install Earth Engine and authenticate
!pip install earthengine-api --quiet

import ee
ee.Authenticate()  # your GCP project
ee.Initialize(project='lumiere-research-project')

# Step 2: Define Hyderabad and 30 km region
latitude = 17.3850
longitude = 78.4867

point = ee.Geometry.Point([longitude, latitude])
region = point.buffer(30000).bounds()  # 30 km buffer

# Step 3: Load Harmonized Sentinel-2A collection
collection = ee.ImageCollection("COPERNICUS/S2_HARMONIZED") \
    .filterDate("2024-01-01", "2024-01-31") \
    .filterBounds(region) \
    .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 10)) \
    .filterMetadata('SPACECRAFT_NAME', 'equals', 'Sentinel-2A')

# Step 4: Select useful bands for machine learning
bands = ['B2', 'B3', 'B4', 'B8', 'B11', 'B12']  # Blue, Green, Red, NIR, SWIR1, SWIR2

# Get the first 3 images
images = collection.toList(3)

# Step 5: Export each image (cast bands to UInt16 to fix export error)
for i in range(3):
    image = ee.Image(images.get(i)).select(bands)

    # Cast all bands to UInt16
    def cast_band(band):
        return image.select(band).toUint16().rename(band)

    casted_bands = [cast_band(band) for band in bands]
    image_casted = ee.Image.cat(casted_bands)

    # Get formatted date
    date_str = image.date().format("YYYY-MM-dd").getInfo()

    # Export to Google Drive
    task = ee.batch.Export.image.toDrive(
        image=image_casted.clip(region),
        description=f'S2A_HARMONIZED_Hyderabad_{date_str}',
        folder='EarthEngineExports',
        fileNamePrefix=f's2a_harmonized_hyd_{date_str}',
        region=region.coordinates().getInfo(),
        scale=10,
        maxPixels=1e13,
        fileFormat='GeoTIFF'
    )

    task.start()
    print(f"✅ Started export for image on {date_str}...")

print("\n📁 Files will appear in your Google Drive folder: EarthEngineExports")
print("🕒 You can monitor task progress here: https://code.earthengine.google.com/tasks")


✅ Started export for image on 2024-01-08...
✅ Started export for image on 2024-01-08...
✅ Started export for image on 2024-01-18...

📁 Files will appear in your Google Drive folder: EarthEngineExports
🕒 You can monitor task progress here: https://code.earthengine.google.com/tasks


In [None]:
# Step 1: Install Earth Engine API
!pip install earthengine-api --quiet

import ee
ee.Authenticate()
ee.Initialize(project='lumiere-research-project')

# Step 2: Define 30 km region around Hyderabad
latitude = 17.3850
longitude = 78.4867

point = ee.Geometry.Point([longitude, latitude])
region = point.buffer(30000).bounds()

# Step 3: Load Harmonized Sentinel-2A imagery
collection = ee.ImageCollection("COPERNICUS/S2_HARMONIZED") \
    .filterDate("2024-01-01", "2024-01-31") \
    .filterBounds(region) \
    .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 10)) \
    .filterMetadata('SPACECRAFT_NAME', 'equals', 'Sentinel-2A')

# Step 4: Select bands for ML
bands = ['B2', 'B3', 'B4', 'B8', 'B11', 'B12']
images = collection.toList(3)  # Export first 3 images

# Step 5: Export to Google Cloud Storage (GCS)
bucket_name = 'your-bucket-name'  # CHANGE THIS to your actual GCS bucket name

for i in range(3):
    image = ee.Image(images.get(i)).select(bands)

    # Cast all bands to UInt16
    def cast_band(band):
        return image.select(band).toUint16().rename(band)

    casted_bands = [cast_band(band) for band in bands]
    image_casted = ee.Image.cat(casted_bands)

    # Get export date string
    date_str = image.date().format("YYYY-MM-dd").getInfo()

    # Export task
    task = ee.batch.Export.image.toCloudStorage(
        image=image_casted.clip(region),
        description=f'S2A_HYDERABAD_GCS_{date_str}',
        bucket=bucket_name,
        fileNamePrefix=f's2a_hyd_{date_str}',
        region=region.coordinates().getInfo(),
        scale=10,
        maxPixels=1e13,
        fileFormat='GeoTIFF'
    )
    task.start()
    print(f"✅ Export started for {date_str} → GCS bucket: {bucket_name}")

print("\n🧾 Monitor progress at: https://code.earthengine.google.com/tasks")


✅ Export started for 2024-01-08 → GCS bucket: your-bucket-name
✅ Export started for 2024-01-08 → GCS bucket: your-bucket-name
✅ Export started for 2024-01-18 → GCS bucket: your-bucket-name

🧾 Monitor progress at: https://code.earthengine.google.com/tasks


In [None]:
import requests
import csv
import math
from time import sleep

# Constants
API_KEY = "69849d8a1edffe060dd0e9ace4083847"
CENTER_LAT = 17.3850
CENTER_LON = 78.4867
RADIUS_KM = 30
STEP_KM = 1  # Distance between points (higher = fewer API calls)

# Approx degree change for latitude/longitude
def offset_to_deg(distance_km, lat=17.3850):
    lat_deg = distance_km / 110.574
    lon_deg = distance_km / (111.320 * math.cos(math.radians(lat)))
    return lat_deg, lon_deg

lat_offset, lon_offset = offset_to_deg(STEP_KM)

# Generate grid of points within the radius
def generate_coordinates(center_lat, center_lon, radius_km, step_km):
    lat_off, lon_off = offset_to_deg(step_km, center_lat)
    points = []
    for dlat in range(-int(radius_km//step_km), int(radius_km//step_km)+1):
        for dlon in range(-int(radius_km//step_km), int(radius_km//step_km)+1):
            lat = center_lat + dlat * lat_off
            lon = center_lon + dlon * lon_off
            distance = haversine(center_lat, center_lon, lat, lon)
            if distance <= radius_km:
                points.append((lat, lon))
    return points

# Haversine formula to calculate distance
def haversine(lat1, lon1, lat2, lon2):
    R = 6371  # Earth radius in km
    dlat = math.radians(lat2 - lat1)
    dlon = math.radians(lon2 - lon1)
    a = math.sin(dlat/2)**2 + math.cos(math.radians(lat1)) * \
        math.cos(math.radians(lat2)) * math.sin(dlon/2)**2
    return 2 * R * math.asin(math.sqrt(a))

# API request
def get_air_pollution_data(lat, lon):
    url = f"http://api.openweathermap.org/data/2.5/air_pollution?lat={lat}&lon={lon}&appid={API_KEY}"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        components = data['list'][0]['components']
        return {
            "lat": lat,
            "lon": lon,
            **components
        }
    else:
        print(f"Error {response.status_code} for {lat},{lon}: {response.text}")
        return None

# Main function
def main():
    coords = generate_coordinates(CENTER_LAT, CENTER_LON, RADIUS_KM, STEP_KM)
    print(f"Fetching data for {len(coords)} points...")
    results = []

    for i, (lat, lon) in enumerate(coords):
        print(f"[{i+1}/{len(coords)}] Getting data for {lat:.4f}, {lon:.4f}")
        data = get_air_pollution_data(lat, lon)
        if data:
            results.append(data)
        sleep(1)  # Avoid hitting API rate limits

    # Write to CSV
    with open("air_pollution_data.csv", mode="w", newline="") as file:
        writer = csv.DictWriter(file, fieldnames=results[0].keys())
        writer.writeheader()
        writer.writerows(results)

    print("Saved to air_pollution_data.csv")

# Run the script
main()


Fetching data for 2811 points...
[1/2811] Getting data for 17.1227, 78.4208
[2/2811] Getting data for 17.1227, 78.4302
[3/2811] Getting data for 17.1227, 78.4396
[4/2811] Getting data for 17.1227, 78.4490
[5/2811] Getting data for 17.1227, 78.4585
[6/2811] Getting data for 17.1227, 78.4679
[7/2811] Getting data for 17.1227, 78.4773
[8/2811] Getting data for 17.1227, 78.4867
[9/2811] Getting data for 17.1227, 78.4961
[10/2811] Getting data for 17.1227, 78.5055
[11/2811] Getting data for 17.1227, 78.5149
[12/2811] Getting data for 17.1227, 78.5244
[13/2811] Getting data for 17.1227, 78.5338
[14/2811] Getting data for 17.1227, 78.5432
[15/2811] Getting data for 17.1227, 78.5526
[16/2811] Getting data for 17.1318, 78.3926
[17/2811] Getting data for 17.1318, 78.4020
[18/2811] Getting data for 17.1318, 78.4114
[19/2811] Getting data for 17.1318, 78.4208
[20/2811] Getting data for 17.1318, 78.4302
[21/2811] Getting data for 17.1318, 78.4396
[22/2811] Getting data for 17.1318, 78.4490
[23/2811

In [None]:
!earthengine authenticate


E0000 00:00:1750613811.048279   16392 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1750613811.055198   16392 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
Authenticate: Limited support in Colab. Use ee.Authenticate() or --auth_mode=notebook instead.
To authorize access needed by Earth Engine, open the following URL in a web browser and follow the instructions. If the web browser does not start automatically, please manually browse the URL below.

    https://code.earthengine.google.com/client-auth?scopes=https%3A//www.googleapis.com/auth/earthengine%20https%3A//www.googleapis.com/auth/cloud-platform%20https%3A//www.googleapis.com/auth/drive%20https%3A//www.googleapis.com/auth/devstorage.full_control&request_id=wKPpjlK_dJjC2WS-v0If0tKkt-ozJOANUJyyII5kwRM&tc=htlD_Lna4lVQWGiBRcTZg48B9Wt0HvefuLr

In [None]:
# Install Earth Engine
!pip install earthengine-api

import ee
ee.Authenticate()
ee.Initialize(project='lumiere-research-project')  # Replace with your GEE project ID




In [None]:
import pandas as pd

# Load CSV from Colab or local
df = pd.read_csv('/content/air_pollution_data.csv')  # Columns: lat, lon
df = df.drop(columns=['no'])


In [None]:
df.head()

Unnamed: 0,lat,lon,co,no2,o3,so2,pm2_5,pm10,nh3
0,17.122732,78.420808,93.93,3.94,46.3,3.79,1.24,2.06,2.73
1,17.122732,78.430221,93.93,3.94,46.3,3.79,1.24,2.06,2.73
2,17.122732,78.439634,93.93,3.94,46.3,3.79,1.24,2.06,2.73
3,17.122732,78.449048,93.93,3.94,46.3,3.79,1.24,2.06,2.73
4,17.122732,78.458461,93.93,3.94,46.3,3.79,1.24,2.06,2.73


In [None]:
import datetime

def get_sentinel_data(lat, lon, start_date, end_date, buffer_m=500):
    point = ee.Geometry.Point(lon, lat).buffer(buffer_m)

    collection = ee.ImageCollection('COPERNICUS/S2_HARMONIZED') \
        .filterBounds(point) \
        .filterDate(start_date, end_date) \
        .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 10)) \
        .select(['B4', 'B3', 'B2'])  # RGB Bands

    image = collection.median().clip(point)
    return image


In [None]:
import uuid
# Folder remains the same
EXPORT_FOLDER = 'sentinel2a-dataset'.strip()

BATCH_SIZE = 200
total_points = len(df)

START = '2024-06-01'
END = '2024-06-30'

# Start from 400 instead of 0
for batch_start in range(400, total_points, BATCH_SIZE):
    batch_end = min(batch_start + BATCH_SIZE, total_points)
    print(f"\n🚀 Starting batch {batch_start} to {batch_end - 1}...")

    for idx in range(batch_start, batch_end):
        lat, lon = df.loc[idx, 'lat'], df.loc[idx, 'lon']
        image = get_sentinel_data(lat, lon, START, END)
        region = ee.Geometry.Point(lon, lat).buffer(500).bounds().coordinates().getInfo()

        task = ee.batch.Export.image.toDrive(
            image=image,
            description=f"sentinel2_{idx}",
            folder=EXPORT_FOLDER,
            fileNamePrefix=f"sentinel2_{idx}",
            scale=10,
            region=region,
            maxPixels=1e9
        )
        task.start()
        print(f"✅ Export task started for point {idx} (lat: {lat}, lon: {lon})")

    input(f"\n🔔 Batch {batch_start}–{batch_end - 1} submitted. Press Enter to continue to next batch...")



🚀 Starting batch 400 to 599...
✅ Export task started for point 400 (lat: 17.222213088067722, lon: 78.5243524648926)
✅ Export task started for point 401 (lat: 17.222213088067722, lon: 78.53376558111576)
✅ Export task started for point 402 (lat: 17.222213088067722, lon: 78.5431786973389)
✅ Export task started for point 403 (lat: 17.222213088067722, lon: 78.55259181356206)
✅ Export task started for point 404 (lat: 17.222213088067722, lon: 78.56200492978522)
✅ Export task started for point 405 (lat: 17.222213088067722, lon: 78.57141804600836)
✅ Export task started for point 406 (lat: 17.222213088067722, lon: 78.58083116223152)
✅ Export task started for point 407 (lat: 17.222213088067722, lon: 78.59024427845466)
✅ Export task started for point 408 (lat: 17.222213088067722, lon: 78.59965739467782)
✅ Export task started for point 409 (lat: 17.222213088067722, lon: 78.60907051090098)
✅ Export task started for point 410 (lat: 17.222213088067722, lon: 78.61848362712412)
✅ Export task started fo