# Example on World Bank Suicide Mortality data

Data comes from:

https://data.worldbank.org/indicator/SH.STA.SUIC.P5?end=2019&start=2000&view=chart

In [7]:
import urllib.request
urllib.request.urlretrieve("https://api.worldbank.org/v2/en/indicator/SH.STA.SUIC.P5?downloadformat=csv", "./data/worldbank.org/raw/SH.STA.SUIC.P5.zip")

('./data/worldbank.org/raw/SH.STA.SUIC.P5.zip',
 <http.client.HTTPMessage at 0x10a4806d0>)

In [8]:
!unzip -o ./data/worldbank.org/raw/SH.STA.SUIC.P5.zip -d ./data/worldbank.org/raw/

Archive:  ./data/worldbank.org/raw/SH.STA.SUIC.P5.zip
  inflating: ./data/worldbank.org/raw/Metadata_Indicator_API_SH.STA.SUIC.P5_DS2_en_csv_v2_5555449.csv  
  inflating: ./data/worldbank.org/raw/API_SH.STA.SUIC.P5_DS2_en_csv_v2_5555449.csv  
  inflating: ./data/worldbank.org/raw/Metadata_Country_API_SH.STA.SUIC.P5_DS2_en_csv_v2_5555449.csv  


In [13]:
!head -n 5 ./data/worldbank.org/raw/API_SH.STA.SUIC.P5_DS2_en_csv_v2_5555449.csv  

"Data Source","World Development Indicators",

"Last Updated Date","2023-05-10",

"Country Name","Country Code","Indicator Name","Indicator Code","1960","1961","1962","1963","1964","1965","1966","1967","1968","1969","1970","1971","1972","1973","1974","1975","1976","1977","1978","1979","1980","1981","1982","1983","1984","1985","1986","1987","1988","1989","1990","1991","1992","1993","1994","1995","1996","1997","1998","1999","2000","2001","2002","2003","2004","2005","2006","2007","2008","2009","2010","2011","2012","2013","2014","2015","2016","2017","2018","2019","2020","2021","2022",


# GPT-4 prompt

```
please take this example from https://nbviewer.org/github/onefact/datathinking.org-codespace/blob/main/notebooks/princeton-university/week-1-visualizing-33-million-phone-calls-in-new-york-city.ipynb

%%sql
SELECT *
FROM read_csv('./data/cityofnewyork.us/311-Service-Requests-from-2010-to-Present.csv',
    header=True,
    delim=',',
    quote='"',
    columns={'Unique Key': 'BIGINT',
    'Created Date': 'VARCHAR',
    'Closed Date': 'VARCHAR',
    'Agency': 'VARCHAR',
    'Agency Name': 'VARCHAR',
    'Complaint Type': 'VARCHAR',
    'Descriptor': 'VARCHAR',
    'Location Type': 'VARCHAR',
    'Incident Zip': 'VARCHAR',
    'Incident Address': 'VARCHAR',
    'Street Name': 'VARCHAR',
    'Cross Street 1': 'VARCHAR',
    'Cross Street 2': 'VARCHAR',
    'Intersection Street 1': 'VARCHAR',
    'Intersection Street 2': 'VARCHAR',
    'Address Type': 'VARCHAR',
    'City': 'VARCHAR',
    'Landmark': 'VARCHAR',
    'Facility Type': 'VARCHAR',
    'Status': 'VARCHAR',
    'Due Date': 'VARCHAR',
    'Resolution Description': 'VARCHAR',
    'Resolution Action Updated Date': 'VARCHAR',
    'Community Board': 'VARCHAR',
    'BBL': 'VARCHAR',
    'Borough': 'VARCHAR',

    'X Coordinate (State Plane)': 'VARCHAR',
    'Y Coordinate (State Plane)': 'VARCHAR',
    'Open Data Channel Type': 'VARCHAR',
    'Park Facility Name': 'VARCHAR',
    'Park Borough': 'VARCHAR',
    'Vehicle Type': 'VARCHAR',
    'Taxi Company Borough': 'VARCHAR',
    'Taxi Pick Up Location': 'VARCHAR',
    'Bridge Highway Name': 'VARCHAR',
    'Bridge Highway Direction': 'VARCHAR',
    'Road Ramp': 'VARCHAR',
    'Bridge Highway Segment': 'VARCHAR',
    'Latitude': 'DOUBLE',
    'Longitude': 'DOUBLE',
    'Location': 'VARCHAR'}) 

and this header: 

"Country Name","Country Code","Indicator Name","Indicator Code","1960","1961","1962","1963","1964","1965","1966","1967","1968","1969","1970","1971","1972","1973","1974","1975","1976","1977","1978","1979","1980","1981","1982","1983","1984","1985","1986","1987","1988","1989","1990","1991","1992","1993","1994","1995","1996","1997","1998","1999","2000","2001","2002","2003","2004","2005","2006","2007","2008","2009","2010","2011","2012","2013","2014","2015","2016","2017","2018","2019","2020","2021","2022",

to give the equivalent command for this file of indicators per country in FLOAT data type. the path to the file is "./data/worldbank.org/raw/API_SH.STA.SUIC.P5_DS2_en_csv_v2_5555449.csv"
```
Output:

https://chat.openai.com/share/3fe4870c-5cd6-4ae6-b085-ce1e1315a32a

Had to add skip=4 to the read_csv command to skip the header rows.

In [15]:
# Load duckdb, which lets us efficiently load large files
import duckdb

# Load pandas, which lets us manipulate dataframes
import pandas as pd

# Import jupysql Jupyter extension to create SQL cells
%load_ext sql

# Set configrations on jupysql to directly output data to Pandas and to simplify the output that is printed to the notebook.
%config SqlMagic.autopandas = True

%config SqlMagic.feedback = False
%config SqlMagic.displaycon = False

# Connect jupysql to DuckDB using a SQLAlchemy-style connection string. Either connect to an in memory DuckDB, or a file backed db.
%sql duckdb:///:memory:

In [17]:
%%sql
SELECT *
FROM read_csv('./data/worldbank.org/raw/API_SH.STA.SUIC.P5_DS2_en_csv_v2_5555449.csv',
    header=True,
    skip=4,
    delim=',',
    quote='"',
    columns={'Country Name': 'VARCHAR',
    'Country Code': 'VARCHAR',
    'Indicator Name': 'VARCHAR',
    'Indicator Code': 'VARCHAR',
    '1960': 'FLOAT',
    '1961': 'FLOAT',
    '1962': 'FLOAT',
    '1963': 'FLOAT',
    '1964': 'FLOAT',
    '1965': 'FLOAT',
    '1966': 'FLOAT',
    '1967': 'FLOAT',
    '1968': 'FLOAT',
    '1969': 'FLOAT',
    '1970': 'FLOAT',
    '1971': 'FLOAT',
    '1972': 'FLOAT',
    '1973': 'FLOAT',
    '1974': 'FLOAT',
    '1975': 'FLOAT',
    '1976': 'FLOAT',
    '1977': 'FLOAT',
    '1978': 'FLOAT',
    '1979': 'FLOAT',
    '1980': 'FLOAT',
    '1981': 'FLOAT',
    '1982': 'FLOAT',
    '1983': 'FLOAT',
    '1984': 'FLOAT',
    '1985': 'FLOAT',
    '1986': 'FLOAT',
    '1987': 'FLOAT',
    '1988': 'FLOAT',
    '1989': 'FLOAT',
    '1990': 'FLOAT',
    '1991': 'FLOAT',
    '1992': 'FLOAT',
    '1993': 'FLOAT',
    '1994': 'FLOAT',
    '1995': 'FLOAT',
    '1996': 'FLOAT',
    '1997': 'FLOAT',
    '1998': 'FLOAT',
    '1999': 'FLOAT',
    '2000': 'FLOAT',
    '2001': 'FLOAT',
    '2002': 'FLOAT',
    '2003': 'FLOAT',
    '2004': 'FLOAT',
    '2005': 'FLOAT',
    '2006': 'FLOAT',
    '2007': 'FLOAT',
    '2008': 'FLOAT',
    '2009': 'FLOAT',
    '2010': 'FLOAT',
    '2011': 'FLOAT',
    '2012': 'FLOAT',
    '2013': 'FLOAT',
    '2014': 'FLOAT',
    '2015': 'FLOAT',
    '2016': 'FLOAT',
    '2017': 'FLOAT',
    '2018': 'FLOAT',
    '2019': 'FLOAT',
    '2020': 'FLOAT',
    '2021': 'FLOAT',
    '2022': 'FLOAT'}) 

Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
0,Aruba,ABW,"Suicide mortality rate (per 100,000 population)",SH.STA.SUIC.P5,,,,,,,...,,,,,,,,,,
1,Africa Eastern and Southern,AFE,"Suicide mortality rate (per 100,000 population)",SH.STA.SUIC.P5,,,,,,,...,8.745026,8.565287,8.489758,8.328658,8.232943,8.118032,8.024906,,,
2,Afghanistan,AFG,"Suicide mortality rate (per 100,000 population)",SH.STA.SUIC.P5,,,,,,,...,4.000000,3.900000,4.000000,4.000000,4.100000,4.100000,4.100000,,,
3,Africa Western and Central,AFW,"Suicide mortality rate (per 100,000 population)",SH.STA.SUIC.P5,,,,,,,...,6.203106,6.001738,5.774544,5.680857,5.567861,5.439667,5.420255,,,
4,Angola,AGO,"Suicide mortality rate (per 100,000 population)",SH.STA.SUIC.P5,,,,,,,...,6.900000,6.400000,6.500000,6.200000,6.000000,6.000000,6.100000,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
261,Kosovo,XKX,"Suicide mortality rate (per 100,000 population)",SH.STA.SUIC.P5,,,,,,,...,,,,,,,,,,
262,"Yemen, Rep.",YEM,"Suicide mortality rate (per 100,000 population)",SH.STA.SUIC.P5,,,,,,,...,5.700000,5.700000,5.500000,5.700000,5.600000,5.900000,5.800000,,,
263,South Africa,ZAF,"Suicide mortality rate (per 100,000 population)",SH.STA.SUIC.P5,,,,,,,...,23.700001,24.100000,24.500000,24.400000,25.200001,24.100000,23.500000,,,
264,Zambia,ZMB,"Suicide mortality rate (per 100,000 population)",SH.STA.SUIC.P5,,,,,,,...,9.100000,9.000000,8.700000,8.500000,8.500000,8.100000,7.300000,,,


In [19]:
%%sql
COPY (FROM read_csv('./data/worldbank.org/raw/API_SH.STA.SUIC.P5_DS2_en_csv_v2_5555449.csv',
    header=True,
    skip=4,
    delim=',',
    quote='"',
    columns={'Country Name': 'VARCHAR',
    'Country Code': 'VARCHAR',
    'Indicator Name': 'VARCHAR',
    'Indicator Code': 'VARCHAR',
    '1960': 'FLOAT',
    '1961': 'FLOAT',
    '1962': 'FLOAT',
    '1963': 'FLOAT',
    '1964': 'FLOAT',
    '1965': 'FLOAT',
    '1966': 'FLOAT',
    '1967': 'FLOAT',
    '1968': 'FLOAT',
    '1969': 'FLOAT',
    '1970': 'FLOAT',
    '1971': 'FLOAT',
    '1972': 'FLOAT',
    '1973': 'FLOAT',
    '1974': 'FLOAT',
    '1975': 'FLOAT',
    '1976': 'FLOAT',
    '1977': 'FLOAT',
    '1978': 'FLOAT',
    '1979': 'FLOAT',
    '1980': 'FLOAT',
    '1981': 'FLOAT',
    '1982': 'FLOAT',
    '1983': 'FLOAT',
    '1984': 'FLOAT',
    '1985': 'FLOAT',
    '1986': 'FLOAT',
    '1987': 'FLOAT',
    '1988': 'FLOAT',
    '1989': 'FLOAT',
    '1990': 'FLOAT',
    '1991': 'FLOAT',
    '1992': 'FLOAT',
    '1993': 'FLOAT',
    '1994': 'FLOAT',
    '1995': 'FLOAT',
    '1996': 'FLOAT',
    '1997': 'FLOAT',
    '1998': 'FLOAT',
    '1999': 'FLOAT',
    '2000': 'FLOAT',
    '2001': 'FLOAT',
    '2002': 'FLOAT',
    '2003': 'FLOAT',
    '2004': 'FLOAT',
    '2005': 'FLOAT',
    '2006': 'FLOAT',
    '2007': 'FLOAT',
    '2008': 'FLOAT',
    '2009': 'FLOAT',
    '2010': 'FLOAT',
    '2011': 'FLOAT',
    '2012': 'FLOAT',
    '2013': 'FLOAT',
    '2014': 'FLOAT',
    '2015': 'FLOAT',
    '2016': 'FLOAT',
    '2017': 'FLOAT',
    '2018': 'FLOAT',
    '2019': 'FLOAT',
    '2020': 'FLOAT',
    '2021': 'FLOAT',
    '2022': 'FLOAT'}) 
) TO './data/worldbank.org/processed/global_suicide_mortality_rate.parquet' (COMPRESSION ZSTD);

Unnamed: 0,Count
0,266


In [20]:
import vegafusion as vf
import polars as pl
import altair as alt
alt.data_transformers.disable_max_rows()
alt.renderers.enable('html')

# Configure DuckDB connection
vf.runtime.set_connection("duckdb")

# Enable Mime Renderer
vf.enable(row_limit=100000000)

vegafusion.enable(mimetype='html', row_limit=100000000, embed_options=None)

In [21]:
# Load the suicide mortality rates into a Polars dataframe
suicide_mortality = pl.read_parquet("./data/worldbank.org/processed/global_suicide_mortality_rate.parquet")


In [22]:
print(suicide_mortality.schema)

{'Country Name': Utf8, 'Country Code': Utf8, 'Indicator Name': Utf8, 'Indicator Code': Utf8, '1960': Float32, '1961': Float32, '1962': Float32, '1963': Float32, '1964': Float32, '1965': Float32, '1966': Float32, '1967': Float32, '1968': Float32, '1969': Float32, '1970': Float32, '1971': Float32, '1972': Float32, '1973': Float32, '1974': Float32, '1975': Float32, '1976': Float32, '1977': Float32, '1978': Float32, '1979': Float32, '1980': Float32, '1981': Float32, '1982': Float32, '1983': Float32, '1984': Float32, '1985': Float32, '1986': Float32, '1987': Float32, '1988': Float32, '1989': Float32, '1990': Float32, '1991': Float32, '1992': Float32, '1993': Float32, '1994': Float32, '1995': Float32, '1996': Float32, '1997': Float32, '1998': Float32, '1999': Float32, '2000': Float32, '2001': Float32, '2002': Float32, '2003': Float32, '2004': Float32, '2005': Float32, '2006': Float32, '2007': Float32, '2008': Float32, '2009': Float32, '2010': Float32, '2011': Float32, '2012': Float32, '2013'

In [30]:
suicide_mortality

Country Name,Country Code,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
str,str,str,str,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32,f32
"""Aruba""","""ABW""","""Suicide mortal…","""SH.STA.SUIC.P5…",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""Africa Eastern…","""AFE""","""Suicide mortal…","""SH.STA.SUIC.P5…",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,10.942055,10.566542,10.425021,10.280305,10.052388,9.900248,9.729664,9.686305,9.910591,9.863977,9.472227,9.063319,8.856541,8.745026,8.565287,8.489758,8.328658,8.232943,8.118032,8.024906,,,
"""Afghanistan""","""AFG""","""Suicide mortal…","""SH.STA.SUIC.P5…",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4.9,5.0,5.0,5.0,5.0,5.0,4.9,4.8,4.6,4.4,4.3,4.1,4.0,4.0,3.9,4.0,4.0,4.1,4.1,4.1,,,
"""Africa Western…","""AFW""","""Suicide mortal…","""SH.STA.SUIC.P5…",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,6.806788,6.722093,6.613712,6.601393,6.565814,6.570061,6.574923,6.563629,6.445107,6.447209,6.423698,6.395704,6.304507,6.203106,6.001738,5.774544,5.680857,5.567861,5.439667,5.420255,,,
"""Angola""","""AGO""","""Suicide mortal…","""SH.STA.SUIC.P5…",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.7,8.6,8.6,8.8,8.6,8.1,8.6,7.4,7.5,6.7,7.0,6.6,6.9,6.9,6.4,6.5,6.2,6.0,6.0,6.1,,,
"""Albania""","""ALB""","""Suicide mortal…","""SH.STA.SUIC.P5…",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4.9,4.5,4.6,4.8,4.8,7.5,7.8,8.1,8.2,8.1,7.8,7.9,5.2,5.3,5.0,4.8,4.7,4.7,4.5,4.3,,,
"""Andorra""","""AND""","""Suicide mortal…","""SH.STA.SUIC.P5…",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"""Arab World""","""ARB""","""Suicide mortal…","""SH.STA.SUIC.P5…",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4.569915,4.582525,4.531061,4.481845,4.431762,4.427148,4.395477,4.405686,4.447792,4.462291,4.494429,4.554406,4.564396,4.488044,4.425839,4.318294,4.244035,4.165167,4.197532,4.162428,,,
"""United Arab Em…","""ARE""","""Suicide mortal…","""SH.STA.SUIC.P5…",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.6,8.6,8.0,7.5,7.7,7.9,8.3,8.3,8.5,8.5,8.4,8.2,7.9,7.5,7.0,6.5,6.0,5.6,6.0,6.4,,,
"""Argentina""","""ARG""","""Suicide mortal…","""SH.STA.SUIC.P5…",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,9.1,10.3,10.2,9.7,8.7,8.6,8.6,8.3,8.7,8.4,8.6,8.5,9.1,8.6,9.3,8.3,8.3,8.9,9.2,8.4,,,


## GPT-4 Prompt

```
please create a line chart using altair and polars in python using this schema: 

{'Country Name': Utf8, 'Country Code': Utf8, 'Indicator Name': Utf8, 'Indicator Code': Utf8, '1960': Float32, '1961': Float32, '1962': Float32, '1963': Float32, '1964': Float32, '1965': Float32, '1966': Float32, '1967': Float32, '1968': Float32, '1969': Float32, '1970': Float32, '1971': Float32, '1972': Float32, '1973': Float32, '1974': Float32, '1975': Float32, '1976': Float32, '1977': Float32, '1978': Float32, '1979': Float32, '1980': Float32, '1981': Float32, '1982': Float32, '1983': Float32, '1984': Float32, '1985': Float32, '1986': Float32, '1987': Float32, '1988': Float32, '1989': Float32, '1990': Float32, '1991': Float32, '1992': Float32, '1993': Float32, '1994': Float32, '1995': Float32, '1996': Float32, '1997': Float32, '1998': Float32, '1999': Float32, '2000': Float32, '2001': Float32, '2002': Float32, '2003': Float32, '2004': Float32, '2005': Float32, '2006': Float32, '2007': Float32, '2008': Float32, '2009': Float32, '2010': Float32, '2011': Float32, '2012': Float32, '2013': Float32, '2014': Float32, '2015': Float32, '2016': Float32, '2017': Float32, '2018': Float32, '2019': Float32, '2020': Float32, '2021': Float32, '2022': Float32}
```

Result: https://chat.openai.com/share/a661a826-3f66-48bc-b530-fb5877534575

In [32]:
import pandas as pd

# Convert Polars DataFrame to Pandas DataFrame
df_pandas = suicide_mortality.to_pandas()

# Melt the DataFrame into a long format
df_long = df_pandas.melt(id_vars=['Country Name', 'Country Code', 'Indicator Name', 'Indicator Code'], var_name='Year', value_name='Value')

# Filter out most of the missing data
df_long = df_long[df_long["Year"].astype(int) > 2000]

# Now let's use Altair to create a line chart
line_chart = alt.Chart(df_long).mark_line().encode(
    x='Year:O',  # treat the year as ordinal (ordered) data
    y='Value:Q',  # treat the values as quantitative data
    color='Country Name:N'  # color lines by the country name
)

line_chart.display()
