<a href="https://colab.research.google.com/github/gabihgodinho/Data-Science-Projects/blob/main/Proportional_symbols.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#**How plot proportional symbols on a map using Python**

A </font> <font color="green">proportional symbol map</font> is a dot density map where the size of the dot depends on a certain variable. For example, you can represent the number of airports for each country in the world as a variable-size dot. The bigger the dot representing a country, the greater the number of airports in that country.

In this tutorial I will teach you how to make this type of plot. For this first example, we will use the [dataset airports](https://ourairports.com/data/) released as open data by OurAirports.com.

In [None]:
#importing the libraries that will be used
from google.colab import files
import pandas as pd
import altair as alt

In [None]:
#If the file you want to work with is on your machine, upload it.
uploaded = files.upload()

Saving airports.csv to airports.csv


In [None]:
alt.data_transformers.disable_max_rows()

url = "https://raw.githubusercontent.com/deldersveld/topojson/master/world-continents.json"
source = alt.topo_feature(url, "continent")

base = alt.Chart(source).mark_geoshape(
    fill='lightgray',
    stroke='white'
).project('mercator').properties(
    width=800,
    height=600
)

df = pd.read_csv('airports.csv')
df = df[df.type == 'large_airport']
df2 = df.groupby(by=['iso_country'])['name'].count().to_frame().reset_index()
df2.rename(columns={'name' : 'number_of_airports'},inplace=True)



In [None]:
df

Unnamed: 0,id,ident,type,name,latitude_deg,longitude_deg,elevation_ft,continent,iso_country,iso_region,municipality,scheduled_service,gps_code,iata_code,local_code,home_link,wikipedia_link,keywords
11221,3,AGGH,large_airport,Honiara International Airport,-9.428000,160.054993,28.0,OC,SB,SB-GU,Honiara,yes,AGGH,HIR,,,https://en.wikipedia.org/wiki/Honiara_Internat...,Henderson Field
13223,67,AYPY,large_airport,Port Moresby Jacksons International Airport,-9.443380,147.220001,146.0,OC,PG,PG-NCD,Port Moresby,yes,AYPY,POM,,,https://en.wikipedia.org/wiki/Jacksons_Interna...,
13766,123,BIKF,large_airport,Keflavik International Airport,63.985001,-22.605600,171.0,EU,IS,IS-2,Reykjavík,yes,BIKF,KEF,,https://www.isavia.is/en/keflavik-airport,https://en.wikipedia.org/wiki/Keflav%C3%ADk_In...,"Keflavik Naval Air Station,REK"
19941,1717,CYEG,large_airport,Edmonton International Airport,53.309700,-113.580002,2373.0,,CA,CA-AB,Edmonton,yes,CYEG,YEG,CYEG,http://www.edmontonairports.com/,https://en.wikipedia.org/wiki/Edmonton_Interna...,
19997,1770,CYHZ,large_airport,Halifax / Stanfield International Airport,44.880798,-63.508598,477.0,,CA,CA-NS,Halifax,yes,CYHZ,YHZ,,http://www.hiaa.ca/,https://en.wikipedia.org/wiki/Halifax_Internat...,Robert L. Stanfield International Airport
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75855,27236,ZWWW,large_airport,Ürümqi Diwopu International Airport,43.907101,87.474197,2125.0,AS,CN,CN-65,Ürümqi,yes,ZWWW,URC,,,https://en.wikipedia.org/wiki/%C3%9Cr%C3%BCmqi...,
75859,27237,ZYCC,large_airport,Changchun Longjia International Airport,43.996201,125.684998,706.0,AS,CN,CN-22,Changchun,yes,ZYCC,CGQ,,,https://en.wikipedia.org/wiki/Changchun_Longji...,
75866,27238,ZYHB,large_airport,Harbin Taiping International Airport,45.623402,126.250000,457.0,AS,CN,CN-23,Harbin,yes,ZYHB,HRB,,,https://en.wikipedia.org/wiki/Harbin_Taiping_I...,
75881,27242,ZYTL,large_airport,Dalian Zhoushuizi International Airport,38.965698,121.539001,107.0,AS,CN,CN-21,"Ganjingzi, Dalian",yes,ZYTL,DLC,,,https://en.wikipedia.org/wiki/Dalian_Zhoushuiz...,Dalian Air Base


In [None]:
df2

Unnamed: 0,iso_country,number_of_airports
0,AE,4
1,AL,1
2,AM,1
3,AO,1
4,AR,2
...,...,...
154,VN,2
155,VU,1
156,ZA,3
157,ZM,1


In [None]:
points = alt.Chart(df).mark_circle().encode(
    longitude='longitude_deg:Q',
    latitude='latitude_deg:Q',
    size=alt.Size('number_of_airports:Q', title='# airports'),
    color=alt.value('red')
).properties(
    width=500,
    height=400
).transform_lookup(
    lookup='iso_country',
    from_=alt.LookupData(df2, 'iso_country', ['iso_country', 'number_of_airports'])
)

base + points

In [None]:
#This line imports the Plotly Express module, which provides a high-level interface for creating interactive visualizations with Plotly
import plotly.express as px


In [None]:
#The px.data.gapminder() function returns the gapminder dataset, which is a built-in dataset provided by Plotly Express
#The .query("year==2007") part filters the dataset to only include data for the year 2007, creating a new DataFrame df containing the data for that specific year
df = px.data.gapminder().query("year==2007")
df

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap,iso_alpha,iso_num
11,Afghanistan,Asia,2007,43.828,31889923,974.580338,AFG,4
23,Albania,Europe,2007,76.423,3600523,5937.029526,ALB,8
35,Algeria,Africa,2007,72.301,33333216,6223.367465,DZA,12
47,Angola,Africa,2007,42.731,12420476,4797.231267,AGO,24
59,Argentina,Americas,2007,75.320,40301927,12779.379640,ARG,32
...,...,...,...,...,...,...,...,...
1655,Vietnam,Asia,2007,74.249,85262356,2441.576404,VNM,704
1667,West Bank and Gaza,Asia,2007,73.422,4018332,3025.349798,PSE,275
1679,"Yemen, Rep.",Asia,2007,62.698,22211743,2280.769906,YEM,887
1691,Zambia,Africa,2007,42.384,11746035,1271.211593,ZMB,894


In [None]:
fig = px.scatter_geo(df, locations="iso_alpha", color="continent",
                     hover_name="country", size="pop",
                     projection="natural earth")
fig.show()

In summary, the code creates an interactive geographical scatter plot that visualizes the population of various countries in the year 2007 on a world map. Each country is represented as a point, and the size of the point corresponds to its population. The points are colored based on the continent to which the countries belong, and hovering over a point displays the country's name.

---



---



**In case you want to have an animation:**
This code is similar to the previous one but with one additional feature - it includes an animation based on the year column. The resulting plot will show a sequence of frames, each representing the scatter plot for a specific year from the "gapminder" dataset. This will allow you to see how the population distribution of different countries changes over time on a world map.

In [35]:
import plotly.express as px
df1 = px.data.gapminder()
fig = px.scatter_geo(df1, locations="iso_alpha", color="continent",
                     hover_name="country", size="pop",
                     animation_frame="year",
                     projection="natural earth")
fig.show()
# Save the plot as an HTML file
fig.write_html("gapminder_animation.html")

And a third example:

In [None]:
import plotly.graph_objects as go
import pandas as pd

In [None]:
df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_ebola.csv')
df.head(10)

Unnamed: 0,Country,Month,Year,Lat,Lon,Value
0,Guinea,3,14,9.95,-9.7,122.0
1,Guinea,4,14,9.95,-9.7,224.0
2,Guinea,5,14,9.95,-9.7,291.0
3,Guinea,6,14,9.95,-9.7,413.0
4,Guinea,7,14,9.95,-9.7,460.0
5,Guinea,8,14,9.95,-9.7,771.0
6,Guinea,9,14,9.95,-9.7,1022.0
7,Guinea,10,14,9.95,-9.7,
8,Guinea,11,14,9.95,-9.7,
9,Guinea,12,14,9.95,-9.7,


In [None]:
colors = ['rgb(239,243,255)','rgb(189,215,231)','rgb(107,174,214)','rgb(33,113,181)']
months = {6:'June',7:'July',8:'Aug',9:'Sept'}

fig = go.Figure()

for i in range(6,10)[::-1]:
    df_month = df.query('Month == %d' %i)
    fig.add_trace(go.Scattergeo(
            lon = df_month['Lon'],
            lat = df_month['Lat'],
            text = df_month['Value'],
            name = months[i],
            marker = dict(
                size = df_month['Value']/50,
                color = colors[i-6],
                line_width = 0
            )))

df_sept = df.query('Month == 9')
fig['data'][0].update(mode='markers+text', textposition='bottom center',
                      text=df_sept['Value'].map('{:.0f}'.format).astype(str)+' '+\
                      df_sept['Country'])

# Inset
fig.add_trace(go.Choropleth(
        locationmode = 'country names',
        locations = df_sept['Country'],
        z = df_sept['Value'],
        text = df_sept['Country'],
        colorscale = [[0,'rgb(0, 0, 0)'],[1,'rgb(0, 0, 0)']],
        autocolorscale = False,
        showscale = False,
        geo = 'geo2'
    ))
fig.add_trace(go.Scattergeo(
        lon = [21.0936],
        lat = [7.1881],
        text = ['Africa'],
        mode = 'text',
        showlegend = False,
        geo = 'geo2'
    ))

fig.update_layout(
    title = go.layout.Title(
        text = 'Ebola cases reported by month in West Africa 2014<br> \
Source: <a href="https://data.humdata.org/dataset/rowca-ebola-cases">\
HDX</a>'),
    geo = go.layout.Geo(
        resolution = 50,
        scope = 'africa',
        showframe = False,
        showcoastlines = True,
        landcolor = "rgb(229, 229, 229)",
        countrycolor = "white" ,
        coastlinecolor = "white",
        projection_type = 'mercator',
        lonaxis_range= [ -15.0, -5.0 ],
        lataxis_range= [ 0.0, 12.0 ],
        domain = dict(x = [ 0, 1 ], y = [ 0, 1 ])
    ),
    geo2 = go.layout.Geo(
        scope = 'africa',
        showframe = False,
        landcolor = "rgb(229, 229, 229)",
        showcountries = False,
        domain = dict(x = [ 0, 0.6 ], y = [ 0, 0.6 ]),
        bgcolor = 'rgba(255, 255, 255, 0.0)',
    ),
    legend_traceorder = 'reversed'
)

fig.show()