In [1]:
import pandas as pd

# Load the dataset
df_greece = pd.read_excel("Search_collection_id_10800_schema_LegalEntity.xlsx")

# Inspect the first few rows
print(df_greece.head())

# Check the columns
print(df_greece.columns)

# Basic info
print(df_greece.info())

# Number of rows
print(f"Number of legal entities: {len(df_greece)}")

                                                  ID  \
0  00d2328335d23fcdf9d1b9aa6484ecc6c78b738d.42332...   
1  0104ab6679d47a1394fb1f40b353b8c12ac3bee5.28b0b...   
2  0148ea9d2cfe368bf285069083c02e17faa18eb5.6572f...   
3  018a20a31c326ec5c1adbf48721180d01118134b.9fabc...   
4  018b96e90a71d4630a61a461e681db97261856f7.bcee5...   

                                                 url              collection  \
0  https://aleph.occrp.org/entities/00d2328335d23...  Greece Media Ownership   
1  https://aleph.occrp.org/entities/0104ab6679d47...  Greece Media Ownership   
2  https://aleph.occrp.org/entities/0148ea9d2cfe3...  Greece Media Ownership   
3  https://aleph.occrp.org/entities/018a20a31c326...  Greece Media Ownership   
4  https://aleph.occrp.org/entities/018b96e90a71d...  Greece Media Ownership   

                                                Name  E-Mail  Phone  \
0                             ΔΡΟΥΖΑ ΑΝΝΑΔΡΟΥΖΑ ΑΝΝΑ     NaN    NaN   
1                                  ΜΑΝΙΑ

In [3]:
import pandas as pd
from tqdm import tqdm
import googlemaps

# Load dataset
df = pd.read_excel("Search_collection_id_10800_schema_LegalEntity.xlsx")

# Keep only rows with addresses
df_geo = df[df['Address'].notna()].copy()
print(f"Number of entities with addresses: {len(df_geo)}")

# Initialize Google Maps client
gmaps = googlemaps.Client(key=open("google_api_key.txt").read().strip())

# Add columns for Latitude and Longitude
df_geo['Latitude'] = None
df_geo['Longitude'] = None

# Geocode each address with a progress bar
for i, row in tqdm(df_geo.iterrows(), total=len(df_geo)):
    try:
        geocode_result = gmaps.geocode(row['Address'] + ", Greece")
        if geocode_result:
            loc = geocode_result[0]['geometry']['location']
            df_geo.at[i, 'Latitude'] = loc['lat']
            df_geo.at[i, 'Longitude'] = loc['lng']
    except Exception as e:
        print(f"Error geocoding {row['Address']}: {e}")

# Save geocoded data
df_geo.to_csv("Greece_MediaOwnership_geocoded.csv", index=False)
print("Geocoding complete and saved!")

Number of entities with addresses: 710



00%|██████████████████████████████████████████████████████████████████| 710/710 [01:54<00:00,  6.20it/s]

Geocoding complete and saved!


In [14]:
df_geo.head()

Unnamed: 0,ID,url,collection,Name,E-Mail,Phone,Registration number,Country,Legal form,Status,...,Tax Number,Tax status,Topics,V.A.T. Identifier,Weak alias,Website,Wikidata ID,Wikipedia Article,Latitude,Longitude
3,018a20a31c326ec5c1adbf48721180d01118134b.9fabc...,https://aleph.occrp.org/entities/018a20a31c326...,Greece Media Ownership,ONMEDIA DIGITAL ΜΟΝΟΠΡΟΣΩΠΗ ΑΝΩΝΥΜΗ ΕΤΑΙΡΕΙΑ; ...,,,,gr,Ανώνυμη Εταιρεία (Α.Ε.),,...,996846160.0,,,,,https://www.cretalive.gr/; https://www.capitan...,,,35.329239,25.138288
11,04ba3c267a165717235088635e7694aea1e686b6.2af84...,https://aleph.occrp.org/entities/04ba3c267a165...,Greece Media Ownership,ΥΙΟΙ ΣΑΒΒΑ ΤΣΟΠΑΝΑΚΗ Η ΚΩΤΙΑΔΗ ΟΕ; ΡΟΔΙΑΚΗ; ΕΦ...,,,,gr,Ομόρρυθμη Εταιρεία (Ο.Ε.),,...,82924230.0,,,,,http://WWW.RODIAKI.GR/,,,36.439675,28.234484
13,079c06fd532f0e25df3ce41d4a866b34e25190df.d6b2c...,https://aleph.occrp.org/entities/079c06fd532f0...,Greece Media Ownership,TELIA INTERNET ΙΔΙΩΤΙΚΗ ΚΕΦΑΛΑΙΟΥΧΙΚΗ ΕΤΑΙΡΙΑ;...,,,,gr,Ιδιωτική Κεφαλαιουχική Εταιρεία (Ι.Κ.Ε.),,...,801707355.0,,,,,http://pagenews.gr/,,,38.040843,23.803142
14,088bb5ae028948a1c6afaca27047652c8f7f5ff1.314bd...,https://aleph.occrp.org/entities/088bb5ae02894...,Greece Media Ownership,THESSDIGITAL Ι Κ Ε; -; ΤΗESSDIGITAL IKE,,,,gr,Ιδιωτική Κεφαλαιουχική Εταιρεία (Ι.Κ.Ε.),,...,801907469.0,,,,,http://theopinion.gr/,,,40.635464,22.944377
16,09974ba661794cdbf96181566703460fe71609aa.579d4...,https://aleph.occrp.org/entities/09974ba661794...,Greece Media Ownership,ΧΑΛΟΣ ΓΕΩΡΓΙΟΣ; -,,,,gr,Ατομική,,...,170317325.0,,,,,http://oichalialive.gr/,,,39.60419,21.981511


In [15]:
import gmplot

# Drop rows without successful coordinates
df_map = df_geo.dropna(subset=['Latitude', 'Longitude'])

# Center map on Greece
lat_center = df_map['Latitude'].mean()
lng_center = df_map['Longitude'].mean()

gmap = gmplot.GoogleMapPlotter(lat_center, lng_center, 6, apikey=open("google_api_key.txt").read().strip())

# Add markers
for idx, row in df_map.iterrows():
    gmap.marker(row['Latitude'], row['Longitude'], title=row['Name'].replace('"', '\\"'))

# Save HTML
gmap.draw("Greece_MediaOwnership_map.html")
print("Saved interactive map!")

Saved interactive map!
