### What proportional of people from each country represented, uses drugs?

In [17]:
# Import Library Dependencies
import pandas
import matplotlib.pyplot as plt
import hvplot.pandas
import geoviews as gv
from pathlib import Path

import pandas as pd
import json
import requests
from config import api_key

#country info library to get capital for coding later - pip install
from countryinfo import CountryInfo

# Turn off warning messages
import warnings
warnings.filterwarnings("ignore")

In [2]:
# Store filepath in a variable
drugs_analysis = Path("data/drug_consuption_clean.csv")

In [3]:
# Read our Data file with the pandas library
drugs_analysis_df = pd.read_csv(drugs_analysis)
drugs_analysis_df.head()

Unnamed: 0.1,Unnamed: 0,ID,Age,Gender,Education,Country,Ethnicity,Impulsive,SS,Amphetamines,...,Cannabis,Cocaine,Crack,Ecstasy,Heroin,Ketamine,LSD,Meth,Mushrooms,VSA
0,0,1,35-44 years,Female,Professional Certificate/ Diploma,United Kingdom,Mixed-White/Asian,-0.21712,-1.18084,CL2,...,CL0,CL0,CL0,CL0,CL0,CL0,CL0,CL0,CL0,CL0
1,1,2,25-34 years,Male,Doctorate Degree,United Kingdom,White,-0.71126,-0.21575,CL2,...,CL4,CL3,CL0,CL4,CL0,CL2,CL2,CL3,CL0,CL0
2,2,3,35-44 years,Male,Professional Certificate/ Diploma,United Kingdom,White,-1.37983,0.40148,CL0,...,CL3,CL0,CL0,CL0,CL0,CL0,CL0,CL0,CL1,CL0
3,3,4,18-24 years,Female,Masters Degree,United Kingdom,White,-1.37983,-1.18084,CL0,...,CL2,CL2,CL0,CL0,CL0,CL2,CL0,CL0,CL0,CL0
4,4,5,35-44 years,Female,Doctorate Degree,United Kingdom,White,-0.21712,-0.21575,CL1,...,CL3,CL0,CL0,CL1,CL0,CL0,CL0,CL0,CL2,CL0


In [4]:
# Define an empty list to fetch the countries
countries = []

# Define an empty list to fetch the data of each country
# Loop through the df to get the countries
for country in drugs_analysis_df['Country']:
    
    # If the country is unique, then add it to our countries list
    if country not in countries:
        countries.append(country)

In [5]:
# Save config information

url = "https://api.openweathermap.org/geo/1.0/direct?"

country_data = []

# Loop through all the counties in our list to fetch country data
for country in countries:

    # get capital for more accurate lat and longitudes
    capital = CountryInfo(country).capital()

    # Create endpoint URL with each country 
    country_url = f"{url}appid={api_key}&q={capital},{country}&limit=1"
   
    try:
        country_data_response = requests.get(country_url)
        country_data_json = country_data_response.json()

        # Get the latitude and Longitude from the response
        country_latitude = country_data_json[0]['lat']
        country_longitude = country_data_json[0]['lon']
        
        # Append the country information into country_data list            
        country_data.append({"Country": country, "Capital": capital,  
                             "Lat": country_latitude, "Lon": country_longitude})

    # If an error is experienced, skip the city
    except:
        print("Country not found. Skipping...")
        pass
              
# Indicate that Data Loading is complete 
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")

-----------------------------
Data Retrieval Complete      
-----------------------------


In [6]:
# create a DataFrame from countries, latitude, and longitude
country_data_df = pd.DataFrame(country_data)

# Show Record Count
country_data_df

Unnamed: 0,Country,Capital,Lat,Lon
0,United Kingdom,London,51.507322,-0.127647
1,Canada,Ottawa,45.420878,-75.690111
2,United States,Washington D.C.,38.895037,-77.036543
3,Australia,Canberra,-35.297591,149.101268
4,Republic of Ireland,Dublin,53.349379,-6.260559
5,New Zealand,Wellington,-41.288795,174.777211


In [7]:
#group the drug consumption by country and include the country counts
summary_df = drugs_analysis_df.groupby('Country').size().reset_index(name='Country_Counts')

#merge the summary_df and country_data_df on country
merged_df = pd.merge(summary_df, country_data_df, on='Country')

merged_df

Unnamed: 0,Country,Country_Counts,Capital,Lat,Lon
0,Australia,54,Canberra,-35.297591,149.101268
1,Canada,87,Ottawa,45.420878,-75.690111
2,New Zealand,5,Wellington,-41.288795,174.777211
3,Republic of Ireland,20,Dublin,53.349379,-6.260559
4,United Kingdom,1044,London,51.507322,-0.127647
5,United States,557,Washington D.C.,38.895037,-77.036543


In [8]:
#go through each row of the drug_consumption_clean_df, checking the data for all drug columns assign 0 if value is CL0, otherwise assign 1
#put the value in the Drugs column
#Used ChatGPT to ask Python code to loop through a dataset on one column and then loop across 9 columns 
#for each row to check if value = 'CL0' and create new column to set to 1 if true or 0 if false

drugs_analysis_df['Drugs'] = drugs_analysis_df.apply(lambda row: 1 if row['Crack'] != "CL0" 
                                                         or row['Cocaine'] != "CL0" 
                                                         or row['Ecstasy'] != "CL0" 
                                                         or row['Heroin'] != "CL0" 
                                                         or row['Ketamine'] != "CL0" 
                                                         or row['LSD'] != "CL0" 
                                                         or row['Meth'] != "CL0" 
                                                         or row['Mushrooms'] != "CL0" 
                                                         or row['VSA'] != "CL0" 
                                                         else 0, axis=1)

drugs_analysis_df

Unnamed: 0.1,Unnamed: 0,ID,Age,Gender,Education,Country,Ethnicity,Impulsive,SS,Amphetamines,...,Cocaine,Crack,Ecstasy,Heroin,Ketamine,LSD,Meth,Mushrooms,VSA,Drugs
0,0,1,35-44 years,Female,Professional Certificate/ Diploma,United Kingdom,Mixed-White/Asian,-0.21712,-1.18084,CL2,...,CL0,CL0,CL0,CL0,CL0,CL0,CL0,CL0,CL0,0
1,1,2,25-34 years,Male,Doctorate Degree,United Kingdom,White,-0.71126,-0.21575,CL2,...,CL3,CL0,CL4,CL0,CL2,CL2,CL3,CL0,CL0,1
2,2,3,35-44 years,Male,Professional Certificate/ Diploma,United Kingdom,White,-1.37983,0.40148,CL0,...,CL0,CL0,CL0,CL0,CL0,CL0,CL0,CL1,CL0,1
3,3,4,18-24 years,Female,Masters Degree,United Kingdom,White,-1.37983,-1.18084,CL0,...,CL2,CL0,CL0,CL0,CL2,CL0,CL0,CL0,CL0,1
4,4,5,35-44 years,Female,Doctorate Degree,United Kingdom,White,-0.21712,-0.21575,CL1,...,CL0,CL0,CL1,CL0,CL0,CL0,CL0,CL2,CL0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1762,1880,1884,18-24 years,Female,"Some College,No Certificate Or Degree",United States,White,0.88113,1.92173,CL0,...,CL0,CL0,CL0,CL0,CL0,CL3,CL0,CL0,CL5,1
1763,1881,1885,18-24 years,Male,"Some College,No Certificate Or Degree",United States,White,0.88113,0.76540,CL0,...,CL0,CL0,CL2,CL0,CL0,CL5,CL4,CL4,CL0,1
1764,1882,1886,25-34 years,Female,University Degree,United States,White,0.52975,-0.52593,CL6,...,CL4,CL0,CL4,CL0,CL2,CL2,CL0,CL2,CL0,1
1765,1883,1887,18-24 years,Female,"Some College,No Certificate Or Degree",United States,White,1.29221,1.22470,CL0,...,CL0,CL0,CL3,CL0,CL0,CL3,CL0,CL3,CL0,1


In [9]:
#sum the drug consumption for each country
sum_drug_consupmtion_df = drugs_analysis_df.groupby("Country")['Drugs'].sum().reset_index()
sum_drug_consupmtion_df

Unnamed: 0,Country,Drugs
0,Australia,45
1,Canada,69
2,New Zealand,5
3,Republic of Ireland,16
4,United Kingdom,501
5,United States,525


In [16]:
#sum the drug consumption for each country
sum_drug_ethnicity_df = drugs_analysis_df.groupby("Ethnicity")['Ethnicity'].count()
sum_drug_ethnicity_df

Ethnicity
Asian                  24
Black                  32
Mixed-Black/Asian       2
Mixed-White/Asian      20
Mixed-White/Black      17
Other                  56
White                1616
Name: Ethnicity, dtype: int64

In [10]:
#merge the merged_df and the sum_drug_consumption_df on country
prop_merged_df = pd.merge(merged_df, sum_drug_consupmtion_df, on='Country')

#Calculate the percentage drug use for each country
prop_merged_df['Percent_drug_use'] = prop_merged_df.apply(lambda row: (row['Drugs']/row['Country_Counts'])*100, axis=1)

prop_merged_df

Unnamed: 0,Country,Country_Counts,Capital,Lat,Lon,Drugs,Percent_drug_use
0,Australia,54,Canberra,-35.297591,149.101268,45,83.333333
1,Canada,87,Ottawa,45.420878,-75.690111,69,79.310345
2,New Zealand,5,Wellington,-41.288795,174.777211,5,100.0
3,Republic of Ireland,20,Dublin,53.349379,-6.260559,16,80.0
4,United Kingdom,1044,London,51.507322,-0.127647,501,47.988506
5,United States,557,Washington D.C.,38.895037,-77.036543,525,94.254937


In [11]:
# Configure the map plot
#cmap (colour map) referece - https://holoviews.org/user_guide/Colormaps.html
map_plot = prop_merged_df.hvplot.points(
    "Lon",
    "Lat",
    geo = True,
    tiles = "EsriNatGeo",
    frame_width = 900,
    frame_height = 500,
    size = "Country_Counts",
    scale = 1.,
    color = "Percent_drug_use",
    cmap='jet',
    legend=True,
    hover_cols=['Country']
)

# Display the map plot
map_plot