In [2]:
import pandas as pd

# Load the Excel file
file_path = "Search_collection_id_11904_schema_LegalEntity.xlsx"
xls = pd.ExcelFile(file_path)

# List all sheet names
print("Sheets in the Excel file:", xls.sheet_names)

# Assuming the main data is in the first sheet
df = pd.read_excel(xls, sheet_name=xls.sheet_names[0])

# Show the first few rows
print(df.head())

# Show column headers
print("\nColumns in the dataset:")
print(df.columns.tolist())

# Optional: basic info
print("\nDataset info:")
print(df.info())

Sheets in the Excel file: ['Legal entities']
                                                  ID  \
0  011d978ff27cf2abb1a7523c2205b7a9d600a81e.82720...   
1  0130467638a7c16072d072905c5be355f7533a17.69386...   
2  017dacb3d008b8a11cac1e42e9fccdf61013d833.58a14...   
3  01920ad08a43892768daa3cc8ae757c3f6520fe7.4770e...   
4  02456fc2696627bbd0c1b380365b895dabab59a1.d6ae7...   

                                                 url  \
0  https://aleph.occrp.org/entities/011d978ff27cf...   
1  https://aleph.occrp.org/entities/0130467638a7c...   
2  https://aleph.occrp.org/entities/017dacb3d008b...   
3  https://aleph.occrp.org/entities/01920ad08a438...   
4  https://aleph.occrp.org/entities/02456fc269662...   

                           collection                   Name  E-Mail  Phone  \
0  Swedish Aircraft Registry - Owners   Tågalycke Konsult AB     NaN    NaN   
1  Swedish Aircraft Registry - Owners       Fly Czech s.r.o.     NaN    NaN   
2  Swedish Aircraft Registry - Owners  Swetr

In [10]:
import pandas as pd
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter

# Load Excel file
file_path = "Search_collection_id_11904_schema_LegalEntity.xlsx"
df = pd.read_excel(file_path, sheet_name="Legal entities")

# Keep only relevant columns
df = df[['Name', 'Address']].copy()

# Just take first 10 rows for testing
df = df.head(10)

# Initialize geocoder with a longer timeout
geolocator = Nominatim(user_agent="swedish_aircraft_mapper", timeout=10)

# Use RateLimiter to avoid overloading server
geocode = RateLimiter(
    geolocator.geocode,
    min_delay_seconds=1,    # wait at least 1s between calls
    max_retries=2,          # retry up to 2 times
    error_wait_seconds=5,   # wait 5s after error before retry
    swallow_exceptions=True # don't crash, just return None
)

# Apply geocoding
df['location'] = df['Address'].apply(geocode)
df['latitude'] = df['location'].apply(lambda loc: loc.latitude if loc else None)
df['longitude'] = df['location'].apply(lambda loc: loc.longitude if loc else None)

# Check results
print(df)

# Optional: save test output
df.to_csv("swedish_aircraft_geocoded_test.csv", index=False)

                                    Name                Address  \
0                   Tågalycke Konsult AB                KLIPPAN   
1                       Fly Czech s.r.o.                Praha 6   
2                  Swetrail Transport AB                VÄRNAMO   
3                            AVI-LINK AS             Jonsvatnet   
4                      Kjell A Østnes AS                   OSLO   
5                          Swedewings AB             SKELLEFTEÅ   
6                  Swedair Management AB              Jönköping   
7                                 HOZ AB                   SALA   
8  Celestial Aviation Trading 26 Limited  Shannon, County Clare   
9                      UBI Leasing S.p.A                BRESCIA   

                                            location   latitude   longitude  
0  (Klippans kommun, Skåne län, Sverige, (56.15, ...  56.150000   13.166667  
1  (Praha 6, 102/19, Kafkova, Dejvice, Praha, obv...  50.098769   14.396196  
2  (Värnamo, Värnamo kommun,

In [8]:
import pandas as pd
import googlemaps
import time

# Load API key from file
with open("google_api_key.txt", "r") as f:
    API_KEY = f.read().strip()

# Initialize Google Maps client
gmaps = googlemaps.Client(key=API_KEY)

# Load Excel file
file_path = "Search_collection_id_11904_schema_LegalEntity.xlsx"
df = pd.read_excel(file_path, sheet_name="Legal entities")
df = df[['Name', 'Address']].copy()

# Function to geocode
def geocode_address(address, retries=3, delay=1):
    for i in range(retries):
        try:
            result = gmaps.geocode(address)
            if result:
                loc = result[0]['geometry']['location']
                return loc['lat'], loc['lng']
            return None, None
        except Exception as e:
            print(f"Error geocoding {address}: {e}, retrying...")
            time.sleep(delay)
    return None, None

# Test with first 10 addresses
sample = df.head(10).copy()
latitudes, longitudes = [], []

for addr in sample['Address']:
    lat, lng = geocode_address(addr)
    latitudes.append(lat)
    longitudes.append(lng)
    time.sleep(0.2)  # polite delay for quota

sample['latitude'] = latitudes
sample['longitude'] = longitudes

print(sample[['Name', 'Address', 'latitude', 'longitude']])


                                    Name                Address   latitude  \
0                   Tågalycke Konsult AB                KLIPPAN  56.134900   
1                       Fly Czech s.r.o.                Praha 6  50.075538   
2                  Swetrail Transport AB                VÄRNAMO  57.183160   
3                            AVI-LINK AS             Jonsvatnet  63.365431   
4                      Kjell A Østnes AS                   OSLO  59.913869   
5                          Swedewings AB             SKELLEFTEÅ  64.750244   
6                  Swedair Management AB              Jönköping  57.782614   
7                                 HOZ AB                   SALA        NaN   
8  Celestial Aviation Trading 26 Limited  Shannon, County Clare  52.711782   
9                      UBI Leasing S.p.A                BRESCIA  45.541553   

   longitude  
0  13.129041  
1  14.437800  
2  14.047821  
3  10.588992  
4  10.752245  
5  20.950917  
6  14.161788  
7        NaN  
8  -8.