# Mapping Sustainability Data

In [3]:
#!pip install geopy
#!pip install folium

## Python Packages for Geospatial Analysis and Mapping

### `geopy`

The **`geopy`** package in Python provides tools for **geocoding** (finding latitude and longitude from an address) and **reverse geocoding** (finding an address from latitude and longitude). It acts as a wrapper around popular geocoding web services such as Google Maps, OpenStreetMap (Nominatim), Bing Maps, and others.  

Key functionalities include:
- Converting street addresses into geographic coordinates.
- Converting geographic coordinates into addresses.
- Calculating distances between points using different methods (great-circle, Vincenty, etc.).

**Example uses**:
- Find the coordinates of `"Sydney Opera House"`.
- Calculate the distance between Sydney and Melbourne.


### `folium`

The **`folium`** package allows you to **create interactive maps** in Python, built on top of the JavaScript library **Leaflet.js**. It is particularly useful for data visualisation when working with geographic data.  

Key functionalities include:
- Displaying maps centred at specific coordinates.
- Adding markers, popups, and circles to maps.
- Overlaying choropleth maps and heatmaps.
- Saving interactive maps as HTML files to share or embed.

**Example uses**:
- Create a choropleth map of world population.
- Visualise geospatial data interactively inside a Jupyter Notebook.


**Summary**

- **`geopy`** → focuses on **geocoding** and **distance calculations**.  
- **`folium`** → focuses on **visualisation** and **interactive maps**.

These two packages are often used **together**:  
1. Use `geopy` to obtain coordinates.  
2. Use `folium` to plot those coordinates on an interactive map.  


In [1]:
import yfinance as yf
import pandas as pd
from geopy.geocoders import Nominatim
from geopy.distance import geodesic
import folium   # library to plot the coordinates on an interactive map
from sklearn.preprocessing import MinMaxScaler

## Getting coordinates

Getting Company Headquarters Coordinates: You can use the `geopy` library to get the coordinates of company headquarters based on the location information provided by **Yahoo Finance**. The info object from Yahoo Finance often contains a 'city' and 'country' key, which you can use to determine the coordinates using a geocoding API like OpenStreetMap's Nominatim.

Try it in a simple code example below:

In [2]:
geolocator = Nominatim(user_agent="company_locator")
location=geolocator.geocode("Sydney, Australia")
lat = location.latitude
lon = location.longitude
print(f"Latitude: {lat}; Longitude: {lon}")

Latitude: -33.8698439; Longitude: 151.2082848


## Calculating distances

In [3]:
# Get coordinates for Sydney and Melbourne
sydney_location = geolocator.geocode("Sydney, Australia")
melbourne_location = geolocator.geocode("Melbourne, Australia")

print("Sydney:", (sydney_location.latitude, sydney_location.longitude))
print("Melbourne:", (melbourne_location.latitude, melbourne_location.longitude))

# Calculate distance
sydney_coords = (sydney_location.latitude, sydney_location.longitude)
melbourne_coords = (melbourne_location.latitude, melbourne_location.longitude)

distance_km = geodesic(sydney_coords, melbourne_coords).kilometers
print(f"Distance between Sydney and Melbourne: {distance_km:.2f} km")

Sydney: (-33.8698439, 151.2082848)
Melbourne: (-37.8142454, 144.9631732)
Distance between Sydney and Melbourne: 713.75 km


## Select the companies for analysis

In [4]:
# Option A: Specify a list of tickers:
tickers = ['DELL', 'ABNB', 'PANW', 'BKNG', 'UBER', 'IBM', 'SCCO', 'ADBE',
          'NFLX', 'SAP', 'TXN', 'ORCL', 'TTD', 'MU', 'PYPL', 'INTC',
          'GOOG', 'MSFT', 'NVDA', 'AMZN', 'META', 'TSLA', 'AAPL', 'AMD', 'CRM',
          'BA', 'AA', 'MMM', 'AMGN', 'CAT', 'CVX', 'CSCO', 'KO', 'DOW', 'DIS']

In [5]:
# Option B: Specify a list of tickers:
# Open the file in read mode
#with open('data/stock_symbol_list_SP500_B.txt', 'r') as file:
#    # Read each line and strip any leading/trailing whitespace characters (like newlines)
#    tickers = [line.strip() for line in file]

## Download company data

In [6]:
# Initialize an empty list to store data
data = []

# Initialize geolocator
geolocator = Nominatim(user_agent="company_locator")

In [7]:
# Loop through each ticker and extract the required information
for ticker in tickers:
    stock = yf.Ticker(ticker)
   
    print(ticker)
    
    # Initialize ESG scores to None by default
    environment_score = social_score = governance_score = None
    city = country = None
    lat = lon = None
    
    try:
        # Extract the sustainability scores 
        sustainability = stock.sustainability
        
        # If sustainability data is available, extract the relevant ESG scores
        environment_score = sustainability.loc['environmentScore'].values[0]
        social_score = sustainability.loc['socialScore'].values[0]
        governance_score = sustainability.loc['governanceScore'].values[0]
    except KeyError:
        # If the ESG scores are not available, they remain as None
        print(f"Sustainability data is currently not available for {ticker}.")
        
    # Extract other relevant information
    info = stock.info
    market_cap = info.get('marketCap')
    industry_sector = info.get('sector')
    city = info.get('city')
    country = info.get('country')
    
    # Get coordinates using the city and country
    if city and country:
        location = geolocator.geocode(f"{city}, {country}")
        if location:
            lat = location.latitude
            lon = location.longitude
        else:
            print(f"Coordinates not found for {ticker} in {city}, {country}")
    
    # Append the data to the list
    data.append([ticker, environment_score, social_score, governance_score, market_cap, industry_sector, city, country, lat, lon])

# Convert the list to a pandas DataFrame
df = pd.DataFrame(data, columns=['Ticker', 'Environment Score', 'Social Score', 'Governance Score', 'Market Cap', 'Industry Sector', 'City', 'Country', 'Latitude', 'Longitude'])

DELL
ABNB
PANW
BKNG
UBER
IBM
SCCO
ADBE
NFLX
SAP


HTTP Error 404: 


Sustainability data is currently not available for SAP.
TXN
ORCL
TTD
MU
PYPL
INTC
GOOG


HTTP Error 404: 


Sustainability data is currently not available for GOOG.
MSFT
NVDA
AMZN
META
TSLA
AAPL
AMD
CRM
BA
AA
MMM
AMGN
CAT
CVX
CSCO
KO
DOW
DIS


In [9]:
# Display the DataFrame
df

Unnamed: 0,Ticker,Environment Score,Social Score,Governance Score,Market Cap,Industry Sector,City,Country,Latitude,Longitude
0,DELL,2.31,7.0,4.56,84432764928,Technology,Round Rock,United States,30.508592,-97.678806
1,ABNB,0.88,9.83,6.68,75800944640,Consumer Cyclical,San Francisco,United States,37.779259,-122.419329
2,PANW,0.05,7.99,4.89,130074296320,Technology,Santa Clara,United States,37.354113,-121.955174
3,BKNG,0.87,9.55,4.02,178442420224,Consumer Cyclical,Norwalk,United States,41.117597,-73.407897
4,UBER,3.74,12.16,5.81,189752360960,Technology,San Francisco,United States,37.779259,-122.419329
5,IBM,1.58,6.68,5.01,231510409216,Technology,Armonk,United States,41.132973,-73.712606
6,SCCO,11.89,7.76,5.91,83639222272,Basic Materials,Phoenix,United States,33.448437,-112.074141
7,ADBE,2.78,6.75,4.71,148033077248,Technology,San Jose,United States,37.336166,-121.890591
8,NFLX,0.16,7.38,8.35,528531456000,Communication Services,Los Gatos,United States,37.226611,-121.97468
9,SAP,,,,313430736896,Technology,Walldorf,Germany,49.303813,8.643352


## Plot an interactive map

### Map of company locations

In [10]:
# Plot the coordinates on a map using folium
# Initialize a map centered at a global level
company_map = folium.Map(location=[20, 0], zoom_start=2)

# Add a marker for each company
for index, row in df.iterrows():
    if pd.notna(row['Latitude']) and pd.notna(row['Longitude']):
        folium.Marker(
            location=[row['Latitude'], row['Longitude']],
            popup=f"{row['Ticker']} ({row['City']}, {row['Country']})",
            tooltip=row['Ticker']
        ).add_to(company_map)

# Save the map to an HTML file and display it
company_map.save("company_map.html")
company_map

### Map companies's market capitalization

In [11]:
# Normalize the market capitalization using Min-Max Scaling
scaler = MinMaxScaler(feature_range=(5, 50))  # Bubble size range between 5 and 50
df['Scaled Market Cap'] = scaler.fit_transform(df[['Market Cap']])

# Display the DataFrame
display(df)

Unnamed: 0,Ticker,Environment Score,Social Score,Governance Score,Market Cap,Industry Sector,City,Country,Latitude,Longitude,Scaled Market Cap
0,DELL,2.31,7.0,4.56,84432764928,Technology,Round Rock,United States,30.508592,-97.678806,5.82198
1,ABNB,0.88,9.83,6.68,75800944640,Consumer Cyclical,San Francisco,United States,37.779259,-122.419329,5.728857
2,PANW,0.05,7.99,4.89,130074296320,Technology,Santa Clara,United States,37.354113,-121.955174,6.314377
3,BKNG,0.87,9.55,4.02,178442420224,Consumer Cyclical,Norwalk,United States,41.117597,-73.407897,6.836189
4,UBER,3.74,12.16,5.81,189752360960,Technology,San Francisco,United States,37.779259,-122.419329,6.958205
5,IBM,1.58,6.68,5.01,231510409216,Technology,Armonk,United States,41.132973,-73.712606,7.408705
6,SCCO,11.89,7.76,5.91,83639222272,Basic Materials,Phoenix,United States,33.448437,-112.074141,5.813419
7,ADBE,2.78,6.75,4.71,148033077248,Technology,San Jose,United States,37.336166,-121.890591,6.508123
8,NFLX,0.16,7.38,8.35,528531456000,Communication Services,Los Gatos,United States,37.226611,-121.97468,10.613071
9,SAP,,,,313430736896,Technology,Walldorf,Germany,49.303813,8.643352,8.29249


In [12]:
# Initialize a map centered globally
company_map = folium.Map(location=[20, 0], zoom_start=2)

# Add a marker with bubbles scaled by market capitalization
for index, row in df.iterrows():
    if pd.notna(row['Latitude']) and pd.notna(row['Market Cap']):
        folium.CircleMarker(
            location=[row['Latitude'], row['Longitude']],
            radius=row['Scaled Market Cap'],  # Scaled bubble size
            color='blue',
            fill=True,
            fill_opacity=0.5,  # Adjust transparency as needed
            popup=folium.Popup(f"{row['Ticker']} - Market Cap: ${row['Market Cap']:,}")
        ).add_to(company_map)

# Save the map to an HTML file and display it
company_map.save("company_map_market_cap.html")
company_map

### Map companies' ESG risk profiles

In [13]:
# Initialize a map centered globally
company_map = folium.Map(location=[20, 0], zoom_start=2)

# Add layers for Environmental, Social, and Governance Scores
esg_layers = {
    'Environment Score': folium.FeatureGroup(name='Environment Score', overlay=True),
    'Social Score': folium.FeatureGroup(name='Social Score', overlay=True),
    'Governance Score': folium.FeatureGroup(name='Governance Score', overlay=True)
}

# Function to add circle marker based on ESG score
def add_esg_layer(esg_layer, score_column, color, popup_label):
    for index, row in df.iterrows():
        if pd.notna(row['Latitude']) and pd.notna(row[score_column]):
            folium.CircleMarker(
                location=[row['Latitude'], row['Longitude']],
                radius=row[score_column] / 0.5,  # Adjust the size of the bubbles
                color=color,
                fill=True,
                fill_opacity=0.4, # Adjust transparency for the fill (0 = fully transparent, 1 = fully opaque)
                opacity=0.2,  # Adjust transparency for the circle's border (0 = fully transparent, 1 = fully opaque)
                popup=folium.Popup(f"{row['Ticker']} - {popup_label}: {row[score_column]}")
            ).add_to(esg_layer)

# Add circle markers for each ESG score
add_esg_layer(esg_layers['Environment Score'], 'Environment Score', 'green', 'Environment Score')
add_esg_layer(esg_layers['Social Score'], 'Social Score', 'blue', 'Social Score')
add_esg_layer(esg_layers['Governance Score'], 'Governance Score', 'red', 'Governance Score')

# Add the layers to the map
for layer in esg_layers.values():
    layer.add_to(company_map)

# Add layer control to toggle between the ESG layers
folium.LayerControl().add_to(company_map)

# Save the map to an HTML file and display it
company_map.save("company_map_esg.html")
company_map