<a href="https://colab.research.google.com/github/anandita-garg/Predicting-Optimal-Locations-For-Solar-Farms/blob/main/API_Calls.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import random
import pandas as pd
import numpy as np
from geopy.distance import geodesic
from tqdm import tqdm
import geopandas as gpd
from shapely.geometry import Point

#Getting Coordinates

# Load world shapefile and extract India
world = gpd.read_file('ne_10m_admin_0_countries.shp')
india = world[world['ADMIN'] == 'India']

lat_min, lat_max = 6.462, 37.087
lon_min, lon_max = 68.175, 97.417

def generate_random_coordinate():
    lat = random.uniform(lat_min, lat_max)
    lon = random.uniform(lon_min, lon_max)
    return lat, lon

def is_valid_coordinate(new_coord, existing_coords, min_distance_m=30):
    point = Point(new_coord[1], new_coord[0])
    if not india.contains(point).any():
        return False
    for coord in existing_coords:
        if geodesic(new_coord, coord).meters < min_distance_m:
            return False
    return True

def generate_coordinates(num_coords=6000):
    coordinates = []
    attempts = 0
    for _ in tqdm(range(num_coords), desc="Generating Coordinates", ncols=100):
        while True:
            new_coord = generate_random_coordinate()
            if is_valid_coordinate(new_coord, coordinates):
                coordinates.append(new_coord)
                break
            attempts += 1
            if attempts > num_coords * 10:
                print("Too many attempts — stopping early.")
                break
    return coordinates

# Generate and save
coords = generate_coordinates(6000)
df = pd.DataFrame(coords, columns=['latitude', 'longitude'])
df.to_csv('test.csv', index=False)

In [None]:
import ee
import pandas as pd
import requests
import numpy as np
import time
from datetime import datetime
from sklearn.neighbors import BallTree
from functools import lru_cache


#API call to retrieve features for AHP1


CUSTOM_LULC_CLASSES = {
    5: "Built-up Areas", 4: "Water Bodies", 3: "Forest",
    2: "Vegetation", 1: "Wasteland/Barren Areas"
}

ESA_TO_CUSTOM_LULC = {
    50: 5, 80: 4, 10: 3, 20: 3, 40: 2, 30: 2,
    60: 1, 70: 1, 90: 1, 95: 1, 100: 1
}

ee.Authenticate()
ee.Initialize(project='test')

df = pd.read_csv("test.csv")
print(f"Loaded {len(df)} coordinates from CSV")

def get_lulc(lat, lon):
    point = ee.Geometry.Point([lon, lat])
    esa_lulc = ee.Image("ESA/WorldCover/v100/2020").select("Map")
    remapped = esa_lulc.remap(list(ESA_TO_CUSTOM_LULC.keys()),
                             list(ESA_TO_CUSTOM_LULC.values())).rename("Custom_LULC")
    lulc_value = remapped.sample(region=point, scale=10).first().get("Custom_LULC").getInfo()
    return lulc_value, CUSTOM_LULC_CLASSES.get(lulc_value, "Unknown")

def get_elevation(lat, lon):
    point = ee.Geometry.Point([lon, lat])
    srtm = ee.Image("USGS/SRTMGL1_003")
    elevation = srtm.sample(region=point, scale=30).first().get("elevation").getInfo()
    return elevation

def get_slope(lat, lon):
    point = ee.Geometry.Point([lon, lat])
    srtm = ee.Image("USGS/SRTMGL1_003")
    terrain = ee.Terrain.products(srtm)
    slope = terrain.select("slope")
    slope_value = slope.sample(region=point, scale=30).first().get("slope").getInfo()
    return slope_value

def get_cloud_cover(lat, lon):
    start_date = "20240410"
    end_date = "20250410"
    url = (
        f"https://power.larc.nasa.gov/api/temporal/daily/point"
        f"?parameters=CLOUD_AMT"
        f"&community=RE"
        f"&latitude={lat}"
        f"&longitude={lon}"
        f"&start={start_date}"
        f"&end={end_date}"
        f"&format=JSON"
    )

    response = requests.get(url)
    if response.status_code != 200:
        return None, None

    data = response.json()
    cloud_data = data["properties"]["parameter"]["CLOUD_AMT"]
    df_cloud = pd.DataFrame(cloud_data.items(), columns=["date", "cloud_cover"])

    avg_cloud_cover = df_cloud["cloud_cover"].mean()
    cloudy_days = (df_cloud["cloud_cover"] > 70).sum()
    cloudy_percent = (cloudy_days / len(df_cloud)) * 100

    return avg_cloud_cover, cloudy_percent

def get_humidity(lat, lon):
    start_date = "2024-04-10"
    end_date = "2025-04-10"
    url = (
        "https://archive-api.open-meteo.com/v1/archive?"
        f"latitude={lat}&longitude={lon}"
        f"&start_date={start_date}&end_date={end_date}"
        "&daily=relative_humidity_2m_mean"
        "&timezone=auto"
    )

    response = requests.get(url)
    data = response.json()

    if "daily" not in data:
        return None

    humidity = data["daily"]["relative_humidity_2m_mean"]
    return sum(humidity) / len(humidity) if humidity else None

def get_temperature(lat, lon):
    start_date = "2024-04-10"
    end_date = "2025-04-10"
    url = (
        "https://archive-api.open-meteo.com/v1/archive?"
        f"latitude={lat}&longitude={lon}"
        f"&start_date={start_date}&end_date={end_date}"
        "&daily=temperature_2m_mean"
        "&timezone=auto"
    )

    response = requests.get(url)
    data = response.json()

    if "daily" not in data:
        return None

    temps = data["daily"]["temperature_2m_mean"]
    return sum(temps) / len(temps) if temps else None

@lru_cache(maxsize=None)
def get_solar_radiation(lat, lon):
    start_date = "2024-01-01"
    end_date = "2024-12-31"
    url = (
        f"https://archive-api.open-meteo.com/v1/archive?latitude={lat}&longitude={lon}"
        f"&start_date={start_date}&end_date={end_date}&daily=shortwave_radiation_sum"
        f"&timezone=auto"
    )

    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()
        data = response.json()['daily']['shortwave_radiation_sum']
        return np.nanmean(data)
    except:
        return None

def get_distance_to_road(lat, lon, bbox_width=0.1):
    bbox = (
        lon - bbox_width/2, lat - bbox_width/2,
        lon + bbox_width/2, lat + bbox_width/2
    )

    query = f"""
    [out:json][timeout:300];
    way[highway]({bbox[1]},{bbox[0]},{bbox[3]},{bbox[2]});
    node(w);
    out;
    """

    response = requests.post("https://overpass-api.de/api/interpreter", data={'data': query})
    data = response.json()

    road_coords = []
    for element in data['elements']:
        if element['type'] == 'node':
            road_coords.append([element['lat'], element['lon']])

    if not road_coords:
        if bbox_width < 0.5:
            return get_distance_to_road(lat, lon, bbox_width * 2)
        return None

    road_coords_rad = np.deg2rad(road_coords)
    tree = BallTree(road_coords_rad, metric='haversine')

    test_coord_rad = np.deg2rad([[lat, lon]])
    distances, indices = tree.query(test_coord_rad, k=1)

    distance_km = distances[0][0] * 6371
    return distance_km

for i, row in df.iterrows():
    lat = row['latitude']
    lon = row['longitude']
    print(f"\n==== Processing coordinate {i+1}/{len(df)}: ({lat}, {lon}) ====")

    features_added = []
    try:
        lulc_code, lulc_type = get_lulc(lat, lon)
        df.at[i, 'lulc_code'] = lulc_code
        df.at[i, 'lulc_type'] = lulc_type
        features_added.append(f"LULC: {lulc_type} (code {lulc_code})")

        elevation = get_elevation(lat, lon)
        df.at[i, 'elevation'] = elevation
        features_added.append(f"Elevation: {elevation} meters")

        slope = get_slope(lat, lon)
        df.at[i, 'slope'] = slope
        features_added.append(f"Slope: {slope:.2f}°")

        cloud_cover, cloudy_days_percent = get_cloud_cover(lat, lon)
        df.at[i, 'cloud_cover'] = cloud_cover
        df.at[i, 'cloudy_days_percent'] = cloudy_days_percent
        if cloud_cover is not None:
            features_added.append(f"Cloud cover: {cloud_cover:.2f}%, Cloudy days: {cloudy_days_percent:.2f}%")

        humidity = get_humidity(lat, lon)
        df.at[i, 'humidity'] = humidity
        if humidity is not None:
            features_added.append(f"Humidity: {humidity:.2f}%")

        temperature = get_temperature(lat, lon)
        df.at[i, 'temperature'] = temperature
        if temperature is not None:
            features_added.append(f"Temperature: {temperature:.2f}°C")

        solar = get_solar_radiation(lat, lon)
        df.at[i, 'solar_radiation'] = solar
        if solar is not None:
            features_added.append(f"Solar radiation: {solar:.2f} kWh/m²")

        distance = get_distance_to_road(lat, lon)
        df.at[i, 'distance_to_road'] = distance
        if distance is not None:
            features_added.append(f"Distance to road: {distance:.3f} km")

        print(f"For coordinate ({lat}, {lon}), the following features have been added:")
        for feature in features_added:
            print(f"  - {feature}")

        time.sleep(0.5)

    except Exception as e:
        print(f"Error processing coordinate {i+1}: {e}")

    if (i + 1) % 10 == 0:
        df.to_csv("test.csv", index=False)
        print(f"Progress saved ({i+1}/{len(df)} coordinates processed)")

df.to_csv("test.csv", index=False)

In [None]:
import pandas as pd
import ee
import numpy as np
import time
import requests
import os
from datetime import datetime

ee.Authenticate()
ee.Initialize(project='test')

#API call to retrieve remaining features for AHP2
def get_ndvi_class(lat, lon, start_date, end_date):
    try:
        collection = ee.ImageCollection("MODIS/061/MOD13Q1").filterDate(start_date, end_date)
        image = collection.sort('system:time_start', False).first()
        ndvi_image = image.select('NDVI')
        point = ee.Geometry.Point(lon, lat)
        value = ndvi_image.sample(region=point, scale=250).first().get('NDVI').getInfo()
        if value is None:
            return None, None
        ndvi = value / 10000
        if ndvi < 0.1:
            return ndvi, "Barren/Urban"
        elif ndvi < 0.3:
            return ndvi, "Sparse"
        elif ndvi < 0.6:
            return ndvi, "Moderate"
        else:
            return ndvi, "Dense"
    except Exception as e:
        print(f"NDVI error @({lat},{lon}): {e}")
        return None, None

def get_aspect(lat, lon):
    try:
        dem = ee.Image("USGS/SRTMGL1_003")
        aspect = ee.Terrain.aspect(dem)
        point = ee.Geometry.Point([lon, lat])
        val = aspect.sample(region=point, scale=30).first().get('aspect').getInfo()
        return val
    except Exception as e:
        print(f"Aspect error @({lat},{lon}): {e}")
        return None

def classify_aspect(aspect):
    if aspect == -1:
        return 0, "Flat"
    elif 0 <= aspect <= 22.5 or 337.5 < aspect <= 360:
        return 0, "North"
    elif 22.5 < aspect <= 67.5:
        return 0.25, "Northeast"
    elif 67.5 < aspect <= 112.5:
        return 0.5, "East"
    elif 112.5 < aspect <= 157.5:
        return 0.75, "Southeast"
    elif 157.5 < aspect <= 202.5:
        return 1, "South"
    elif 202.5 < aspect <= 247.5:
        return 0.75, "Southwest"
    elif 247.5 < aspect <= 292.5:
        return 0.5, "West"
    elif 292.5 < aspect <= 337.5:
        return 0.25, "Northwest"
    else:
        return None, "Unknown"

def get_lst(lat, lon, start_date, end_date):
    try:
        collection = ee.ImageCollection("MODIS/061/MOD11A2").filterDate(start_date, end_date)
        image = collection.sort('system:time_start', False).first()
        lst_image = image.select('LST_Day_1km')
        point = ee.Geometry.Point(lon, lat)
        value = lst_image.sample(region=point, scale=1000).first().get('LST_Day_1km').getInfo()
        if value is None:
            return None
        return value * 0.02 - 273.15
    except Exception as e:
        print(f"LST error @({lat},{lon}): {e}")
        return None

def retry_request(url, params, max_retries=3, delay=1):
    for attempt in range(max_retries):
        try:
            response = requests.get(url, params=params, timeout=10)
            if response.status_code == 200:
                return response.json()
        except Exception:
            time.sleep(delay)
    return None

def get_wind_speed(lat, lon, start_date, end_date):
    url = "https://archive-api.open-meteo.com/v1/archive"
    params = {
        "latitude": lat,
        "longitude": lon,
        "start_date": start_date,
        "end_date": end_date,
        "daily": "windspeed_10m_max,windspeed_10m_min",
        "timezone": "auto"
    }
    data = retry_request(url, params)
    if not data or "daily" not in data:
        return None
    try:
        max_ws = data["daily"]["windspeed_10m_max"]
        min_ws = data["daily"]["windspeed_10m_min"]
        ws = [(x + y) / 2 for x, y in zip(max_ws, min_ws)]
        return np.mean(ws)
    except Exception as e:
        print(f"Wind parsing error @({lat},{lon}): {e}")
        return None

def process_csv(input_csv, output_csv, start_date, end_date):
    df = pd.read_csv(input_csv)
    print(f"\nLoaded {len(df)} rows from {input_csv}")

    if os.path.exists(output_csv):
        df_out = pd.read_csv(output_csv)
        print(f"Found existing output file '{output_csv}' with {len(df_out)} rows.")
    else:
        df_out = df.copy()
        df_out['NDVI'] = np.nan
        df_out['Vegetation Class'] = None
        df_out['Aspect Value'] = np.nan
        df_out['Aspect Class'] = np.nan
        df_out['Aspect Direction'] = None
        df_out['Wind Speed (m/s)'] = np.nan
        df_out['LST (°C)'] = np.nan
        print(f"Creating new output file '{output_csv}'.")

    for idx, row in df.iterrows():
        lat, lon = row['latitude'], row['longitude']
        print(f"\nProcessing point {idx + 1}/{len(df)}: ({lat}, {lon})")

        existing = df_out.iloc[idx]
        need_update = (
            pd.isna(existing['NDVI']) or
            pd.isna(existing['Aspect Value']) or
            pd.isna(existing['Wind Speed (m/s)']) or
            pd.isna(existing['LST (°C)'])
        )

        if not need_update:
            print("Already processed. Skipping.")
            continue

        ndvi, veg_class = get_ndvi_class(lat, lon, start_date, end_date)
        aspect_val = get_aspect(lat, lon)
        aspect_class, aspect_dir = classify_aspect(aspect_val) if aspect_val is not None else (None, None)
        wind_speed = get_wind_speed(lat, lon, start_date, end_date)
        lst = get_lst(lat, lon, start_date, end_date)

        df_out.at[idx, 'NDVI'] = ndvi
        df_out.at[idx, 'Vegetation Class'] = veg_class
        df_out.at[idx, 'Aspect Value'] = aspect_val
        df_out.at[idx, 'Aspect Class'] = aspect_class
        df_out.at[idx, 'Aspect Direction'] = aspect_dir
        df_out.at[idx, 'Wind Speed (m/s)'] = wind_speed
        df_out.at[idx, 'LST (°C)'] = lst

        print(f"NDVI: {ndvi}, Vegetation: {veg_class}")
        print(f"Aspect: {aspect_val}°, Direction: {aspect_dir}")
        print(f"Wind speed: {wind_speed:.2f} m/s" if wind_speed else "Wind speed: Failed to fetch")
        print(f"LST: {lst:.2f} °C" if lst else "LST: Failed to fetch")

        df_out.to_csv(output_csv, index=False)

    print(f"\nAll coordinates processed. Results saved to '{output_csv}'.")

process_csv("test", "test2.csv", "2024-04-10", "2025-04-10")

In [None]:
import csv
import time
import requests
import pandas as pd
from datetime import datetime, timedelta

# API call to retrieve extra features
def retry_request(url, params=None, max_retries=3, delay=2):
    for attempt in range(max_retries):
        try:
            response = requests.get(url, params=params, timeout=10)
            if response.status_code == 200:
                return response.json()
        except Exception as e:
            print(f"Attempt {attempt + 1} failed: {e}")
            time.sleep(delay)
    print(f"All retries failed for URL: {url}")
    return None

def get_historical_data(lat, lon, start_date, end_date):
    url = (
        f"https://archive-api.open-meteo.com/v1/archive?"
        f"latitude={lat}&longitude={lon}"
        f"&start_date={start_date}&end_date={end_date}"
        "&daily=sunshine_duration,snowfall_sum,precipitation_sum,"
        "surface_pressure_max,dew_point_2m_max,shortwave_radiation_sum,et0_fao_evapotranspiration"
        "&timezone=auto"
    )
    return retry_request(url)

def process_data(input_path, output_path, subset_rows=None):
    df = pd.read_csv(input_path)
    df.columns = df.columns.str.lower()
    subset = df if subset_rows is None else df.iloc[subset_rows]

    with open(output_path, 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow([
            "Coordinates",
            "Max Precipitation (mm)",
            "Total Sunshine (min)",
            "Total Snowfall (mm)",
            "Max Surface Pressure (hPa)",
            "Max Dew Point (°C)",
            "Total Shortwave Radiation (Wh/m²)",
            "Total Reference Evapotranspiration (mm)"
        ])

        for i, row in subset.iterrows():
            lat = float(row['latitude'])
            lon = float(row['longitude'])

            row_values = [
                row.get("max precipitation (mm)", "N/A"),
                row.get("total sunshine (min)", "N/A"),
                row.get("total snowfall (mm)", "N/A"),
                row.get("max surface pressure (hpa)", "N/A"),
                row.get("max dew point (°c)", "N/A"),
                row.get("total shortwave radiation (wh/m²)", "N/A"),
                row.get("total reference evapotranspiration (mm)", "N/A"),
            ]

            if not any(pd.isna(val) for val in row_values):
                print(f"Skipped {i+1}/{len(subset)} ({lat},{lon}) — all values present.")
                writer.writerow([f"{lat},{lon}", *row_values])
                continue

            current_date = datetime(2025, 4, 15)
            max_historical_date = (current_date - timedelta(days=5)).strftime("%Y-%m-%d")
            date_ranges = [
                ("2024-04-10", "2025-04-09"),
                ("2025-04-10", max_historical_date)
            ]

            combined_data = {
                'precipitation_sum': [],
                'sunshine_duration': [],
                'snowfall_sum': [],
                'surface_pressure_max': [],
                'dew_point_2m_max': [],
                'shortwave_radiation_sum': [],
                'et0_fao_evapotranspiration': []
            }

            data = None
            for start, end in date_ranges:
                if start > end:
                    continue
                data = get_historical_data(lat, lon, start, end)
                if data:
                    for key in combined_data:
                        combined_data[key].extend(data.get('daily', {}).get(key, []))

            et0_fao_evapotranspiration = [x for x in combined_data['et0_fao_evapotranspiration'] if x is not None]

            max_precip = max(combined_data['precipitation_sum']) if combined_data['precipitation_sum'] else "N/A"
            total_sun = sum(combined_data['sunshine_duration']) if combined_data['sunshine_duration'] else "N/A"
            total_snow = sum(combined_data['snowfall_sum']) if combined_data['snowfall_sum'] else "N/A"
            max_pressure = max(combined_data['surface_pressure_max']) if combined_data['surface_pressure_max'] else "N/A"
            max_dew = max(combined_data['dew_point_2m_max']) if combined_data['dew_point_2m_max'] else "N/A"
            total_irradiance = sum(combined_data['shortwave_radiation_sum']) if combined_data['shortwave_radiation_sum'] else "N/A"
            total_et0 = sum(et0_fao_evapotranspiration) if et0_fao_evapotranspiration else "N/A"

            writer.writerow([
                f"{lat},{lon}",
                max_precip,
                total_sun,
                total_snow,
                max_pressure,
                max_dew,
                total_irradiance,
                total_et0
            ])

            print(f"Updated {i+1}/{len(subset)} ({lat},{lon}) → Precip: {max_precip}, Sun: {total_sun}, Snow: {total_snow}, Pressure: {max_pressure}, Dew: {max_dew}, Irradiance: {total_irradiance}, ET₀: {total_et0}")
            if not data:
                print(f"Failed for ({lat},{lon}). Waiting 30 seconds before moving to the next one.")
                time.sleep(30)
            time.sleep(1.1)

process_data("test.csv", "test2.csv", subset_rows=range(0, 1000))


In [None]:
import requests
import pandas as pd
import time
from statistics import mean

# API call to retrieve pollutants extra features
def update_air_quality_data(input_csv, output_csv, columns, lat_col='latitude', lon_col='longitude', start_date='2024-04-10', end_date='2025-04-10'):
    df_full = pd.read_csv(input_csv)
    total = len(df_full)

    for index, row in df_full.iterrows():
        values = row[columns]
        if values.notna().all():
            continue

        lat = row[lat_col]
        lon = row[lon_col]

        try:
            url = 'https://air-quality-api.open-meteo.com/v1/air-quality'
            params = {
                'latitude': lat,
                'longitude': lon,
                'hourly': ','.join(columns),
                'start_date': start_date,
                'end_date': end_date,
                'timezone': 'auto'
            }

            response = requests.get(url, params=params)
            response.raise_for_status()
            data = response.json()

            hourly = data.get('hourly', {})

            def safe_avg(values):
                return round(mean(v for v in values if v is not None), 2) if values else None

            for col in columns:
                avg_value = safe_avg(hourly.get(col, []))
                df_full.loc[index, col] = avg_value

            remaining = total - index - 1
            print(f"({index+1}/{total}) Updated (lat: {lat}, lon: {lon}) — {', '.join([f'{col}={df_full.loc[index, col]}' for col in columns])} — {remaining} remaining")

        except Exception as e:
            print(f"Error at index {index} (lat: {lat}, lon: {lon}): {e}")

        if index % 100 == 0 and index > 0:
            df_full.to_csv(output_csv, index=False)
            print(f"Progress saved at index {index}")

        time.sleep(1)

    df_full.to_csv(output_csv, index=False)
    print(f"All done. Data saved to '{output_csv}'")

update_air_quality_data('test.csv', 'test2.csv', ['PM2.5', 'NO2', 'SO2', 'Dust'])
