In [1]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
import altair as alt

In [2]:
# data source
wikiurl="https://en.wikipedia.org/wiki/List_of_countries_by_coconut_production"
table_class="wikitable sortable jquery-tablesorter"
response=requests.get(wikiurl)

# parse data into a beautifulsoup object
soup = BeautifulSoup(response.text, 'html.parser')
coconut=soup.find('table',{'class':"wikitable sortable"})

In [3]:
# convert to python df
df=pd.read_html(str(coconut))
# convert list to dataframe
df=pd.DataFrame(df[0])
print(df.head(20))

    Rank      Country/region      2020      2019      2018      2017      2016
0      1           Indonesia  16824848  17074536  17100000  17200000  17400000
1      2               India  14695000  14682000  16413000  11166772  11344306
2      3         Philippines  14490923  14765057  14726165  14049131  13825080
3      4              Brazil   2458839   2348663   2345400   2210139   2634396
4      5           Sri Lanka   2233600   2468800   2098400   1960000   2408800
5      6             Vietnam   1719415   1677044   1571709   1499228   1469960
6      7    Papua New Guinea   1217293   1205510   1186400   1186400   1186400
7      8              Mexico    895291    908302    926400    927200    925600
8      9            Thailand    827424    866416    858235    761914    904094
9     10            Malaysia    560984    536606    495531    517589    504773
10    11             Myanmar    541415    530540    511790    489691    490276
11    12          Bangladesh    431596    431596    

In [4]:
df_select = df.head(20)
df_select

Unnamed: 0,Rank,Country/region,2020,2019,2018,2017,2016
0,1,Indonesia,16824848,17074536,17100000,17200000,17400000
1,2,India,14695000,14682000,16413000,11166772,11344306
2,3,Philippines,14490923,14765057,14726165,14049131,13825080
3,4,Brazil,2458839,2348663,2345400,2210139,2634396
4,5,Sri Lanka,2233600,2468800,2098400,1960000,2408800
5,6,Vietnam,1719415,1677044,1571709,1499228,1469960
6,7,Papua New Guinea,1217293,1205510,1186400,1186400,1186400
7,8,Mexico,895291,908302,926400,927200,925600
8,9,Thailand,827424,866416,858235,761914,904094
9,10,Malaysia,560984,536606,495531,517589,504773


In [5]:
#reshape DataFrame from wide format to long format
df_long = pd.melt(df_select, id_vars='Country/region', value_vars=['2020', '2019', '2018', '2017', '2016'])
print(df_long)

   Country/region variable     value
0       Indonesia     2020  16824848
1           India     2020  14695000
2     Philippines     2020  14490923
3          Brazil     2020   2458839
4       Sri Lanka     2020   2233600
..            ...      ...       ...
95       Tanzania     2016    444800
96        Vanuatu     2016    300000
97     Mozambique     2016    252667
98        Nigeria     2016    227200
99          Samoa     2016    173550

[100 rows x 3 columns]


In [6]:
alt.Chart(df_long).mark_line(point = True, strokeWidth= 2,interpolate="linear").encode(
    x = "variable",
     y = "rank:O",
    color=alt.Color("Country/region:N")
).transform_window(
    rank="rank()",
    sort=[alt.SortField("value", order="descending")],
    groupby=["variable"]
).properties(
    title="Bump Chart for Coconut Production",
    width=600,
    height=600,
    
)