# Generate Latitude and Longitude for Cities

In [7]:
!pip install geopy pandas






In [8]:
import pandas as pd
from geopy.geocoders import Nominatim
import time

In [9]:
# Load the CSV dataset
df = pd.read_csv("environment_data.csv")
df.head()

Unnamed: 0,Date,City,Country,AQI,PM2.5 (µg/m³),PM10 (µg/m³),NO2 (ppb),SO2 (ppb),CO (ppm),O3 (ppb),Temperature (°C),Humidity (%),Wind Speed (m/s)
0,2024-01-01,New York,USA,38,120.0,182.9,24.3,26.0,9.1,153.3,18.6,40,13.2
1,2024-01-01,Los Angeles,USA,280,38.4,46.9,41.8,34.7,3.78,190.7,-2.2,59,9.5
2,2024-01-01,London,UK,117,168.1,34.3,81.5,8.2,3.67,105.4,36.3,62,3.4
3,2024-01-01,Beijing,China,197,96.8,35.4,18.5,39.4,9.51,92.8,29.9,32,1.8
4,2024-01-01,Delhi,India,187,76.2,226.8,46.9,17.2,1.02,68.4,9.9,55,3.3


In [10]:
geolocator = Nominatim(user_agent="terracanvas_ipynb")

In [11]:
def get_lat_lon(city, country):
    """
    Returns latitude and longitude for a given city and country.
    If location is not found, returns (None, None).
    """
    try:
        location = geolocator.geocode(f"{city}, {country}")
        if location:
            return location.latitude, location.longitude
    except:
        pass
    return None, None


In [12]:
# Get unique city-country pairs
unique_cities = df[['City', 'Country']].drop_duplicates().reset_index(drop=True)

# Lists to store coordinates
latitudes = []
longitudes = []

# Geocode each unique city
for i, row in unique_cities.iterrows():
    city = row['City']
    country = row['Country']
    lat, lon = get_lat_lon(city, country)
    latitudes.append(lat if lat else 0)
    longitudes.append(lon if lon else 0)
    print(f"{i+1}/{len(unique_cities)}: {city}, {country} => lat: {lat}, lon: {lon}")
    time.sleep(1)  # pause to avoid API limits

# Add coordinates to unique_cities DataFrame
unique_cities['Latitude'] = latitudes
unique_cities['Longitude'] = longitudes

1/10: New York, USA => lat: 40.7127281, lon: -74.0060152
2/10: Los Angeles, USA => lat: 34.0536909, lon: -118.242766
3/10: London, UK => lat: 51.5074456, lon: -0.1277653
4/10: Beijing, China => lat: 40.190632, lon: 116.412144
5/10: Delhi, India => lat: 28.6138954, lon: 77.2090057
6/10: Paris, France => lat: 48.8588897, lon: 2.320041
7/10: Tokyo, Japan => lat: 35.6768601, lon: 139.7638947
8/10: Sydney, Australia => lat: -33.8698439, lon: 151.2082848
9/10: São Paulo, Brazil => lat: -23.5506507, lon: -46.6333824
10/10: Cairo, Egypt => lat: 30.0443879, lon: 31.2357257


In [13]:
# Merge coordinates back to original dataset
df = df.merge(unique_cities, on=['City', 'Country'], how='left')
df.head()

Unnamed: 0,Date,City,Country,AQI,PM2.5 (µg/m³),PM10 (µg/m³),NO2 (ppb),SO2 (ppb),CO (ppm),O3 (ppb),Temperature (°C),Humidity (%),Wind Speed (m/s),Latitude,Longitude
0,2024-01-01,New York,USA,38,120.0,182.9,24.3,26.0,9.1,153.3,18.6,40,13.2,40.712728,-74.006015
1,2024-01-01,Los Angeles,USA,280,38.4,46.9,41.8,34.7,3.78,190.7,-2.2,59,9.5,34.053691,-118.242766
2,2024-01-01,London,UK,117,168.1,34.3,81.5,8.2,3.67,105.4,36.3,62,3.4,51.507446,-0.127765
3,2024-01-01,Beijing,China,197,96.8,35.4,18.5,39.4,9.51,92.8,29.9,32,1.8,40.190632,116.412144
4,2024-01-01,Delhi,India,187,76.2,226.8,46.9,17.2,1.02,68.4,9.9,55,3.3,28.613895,77.209006


In [14]:
# Save updated dataset with coordinates
df.to_csv("environment_data_with_coords.csv", index=False)
print("Updated CSV saved as 'environment_data_with_coords.csv'")

Updated CSV saved as 'environment_data_with_coords.csv'
