In [1]:
import requests
import csv
import pandas as pd
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from pprint import pprint

In [2]:
url = "https://countriesnow.space/api/v0.1/countries/capital"
payload = {}
headers = {}

response = requests.request("GET", url, headers=headers, data=payload)

In [3]:
# Check if the API request was successful (status code 200)
if response.status_code == 200:
    # Parse the JSON response
    data = response.json()

    # Extract relevant information (country name and capital) from the response
    country_data = [(country["name"], country["capital"]) for country in data.get("data", [])]

    # Define CSV file path
    csv_file_path = "country_capitals.csv"

    # Write the extracted data to a CSV file
    with open(csv_file_path, mode="w", newline="", encoding="utf-8") as csv_file:
        fieldnames = ["Country", "Capital"]
        writer = csv.DictWriter(csv_file, fieldnames=fieldnames)

        # Write header
        writer.writeheader()

        # Write data rows
        for country, capital in country_data:
            writer.writerow({"Country": country, "Capital": capital})

    print(f"CSV file saved at: {csv_file_path}")

else:
    # Print an error message if the API request fails
    print(f"Error: {response.status_code} - {response.text}")

CSV file saved at: country_capitals.csv


In [4]:
mapping_df = pd.read_csv("Resources/country_capitals.csv")
mapping_df

Unnamed: 0,Country,Capital
0,Afghanistan,Kabul
1,Aland Islands,Mariehamn
2,Albania,Tirana
3,Algeria,Algiers
4,American Samoa,Pago Pago
...,...,...
246,Wallis and Futuna,Mata Utu
247,Western Sahara,El-Aaiun
248,Yemen,Sanaa
249,Zambia,Lusaka


In [5]:
heart_attack = pd.read_csv("Resources/heart_attack_prediction_dataset.csv")
heart_attack

Unnamed: 0,Patient ID,Age,Sex,Cholesterol,Blood Pressure,Heart Rate,Diabetes,Family History,Smoking,Obesity,...,Sedentary Hours Per Day,Income,BMI,Triglycerides,Physical Activity Days Per Week,Sleep Hours Per Day,Country,Continent,Hemisphere,Heart Attack Risk
0,BMW7812,67,Male,208,158/88,72,0,0,1,0,...,6.615001,261404,31.251233,286,0,6,Argentina,South America,Southern Hemisphere,0
1,CZE1114,21,Male,389,165/93,98,1,1,1,1,...,4.963459,285768,27.194973,235,1,7,Canada,North America,Northern Hemisphere,0
2,BNI9906,21,Female,324,174/99,72,1,0,0,0,...,9.463426,235282,28.176571,587,4,4,France,Europe,Northern Hemisphere,0
3,JLN3497,84,Male,383,163/100,73,1,1,1,0,...,7.648981,125640,36.464704,378,3,4,Canada,North America,Northern Hemisphere,0
4,GFO8847,66,Male,318,91/88,93,1,1,1,1,...,1.514821,160555,21.809144,231,1,5,Thailand,Asia,Northern Hemisphere,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8758,MSV9918,60,Male,121,94/76,61,1,1,1,0,...,10.806373,235420,19.655895,67,7,7,Thailand,Asia,Northern Hemisphere,0
8759,QSV6764,28,Female,120,157/102,73,1,0,0,1,...,3.833038,217881,23.993866,617,4,9,Canada,North America,Northern Hemisphere,0
8760,XKA5925,47,Male,250,161/75,105,0,1,1,1,...,2.375214,36998,35.406146,527,4,4,Brazil,South America,Southern Hemisphere,1
8761,EPE6801,36,Male,178,119/67,60,1,0,1,0,...,0.029104,209943,27.294020,114,2,8,Brazil,South America,Southern Hemisphere,0


In [6]:
merged_df = pd.merge(mapping_df, heart_attack, on='Country')
merged_df

Unnamed: 0,Country,Capital,Patient ID,Age,Sex,Cholesterol,Blood Pressure,Heart Rate,Diabetes,Family History,...,Stress Level,Sedentary Hours Per Day,Income,BMI,Triglycerides,Physical Activity Days Per Week,Sleep Hours Per Day,Continent,Hemisphere,Heart Attack Risk
0,Argentina,Buenos Aires,BMW7812,67,Male,208,158/88,72,0,0,...,9,6.615001,261404,31.251233,286,0,6,South America,Southern Hemisphere,0
1,Argentina,Buenos Aires,SLE3369,27,Female,135,120/77,49,1,1,...,6,9.120329,71276,22.778859,297,1,10,South America,Southern Hemisphere,1
2,Argentina,Buenos Aires,IUJ5442,27,Female,263,127/109,83,0,1,...,2,9.362786,264135,39.844145,411,2,9,South America,Southern Hemisphere,0
3,Argentina,Buenos Aires,BSV5917,29,Female,201,134/60,86,0,0,...,6,10.965517,138186,33.787685,785,0,10,South America,Southern Hemisphere,0
4,Argentina,Buenos Aires,QWD3129,51,Male,197,106/106,79,1,1,...,3,1.539100,257061,24.669971,785,1,5,South America,Southern Hemisphere,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8758,Vietnam,Hanoi,QWP9633,88,Male,394,112/62,51,1,0,...,8,2.401053,130142,23.738466,79,7,9,Asia,Northern Hemisphere,0
8759,Vietnam,Hanoi,AZG4713,45,Female,368,180/94,87,1,0,...,10,10.122112,241225,39.676405,564,2,5,Asia,Northern Hemisphere,0
8760,Vietnam,Hanoi,WAO3408,85,Male,200,167/101,65,1,1,...,1,5.682699,94886,31.122786,214,1,10,Asia,Northern Hemisphere,1
8761,Vietnam,Hanoi,WFO1019,29,Male,280,106/107,90,1,1,...,1,6.496817,50237,29.024055,423,5,5,Asia,Northern Hemisphere,0


In [30]:
import requests
import csv
import logging

# Configure logging
logging.basicConfig(level=logging.WARNING)

url = "https://countriesnow.space/api/v0.1/countries/positions"
headers = {}

response = requests.get(url, headers=headers)

if response.status_code == 200:
    data = response.json().get('data', [])

    # Specify the CSV file path
    csv_file_path = 'countries_data.csv'

    # Write the data to a CSV file
    with open(csv_file_path, 'w', newline='', encoding='utf-8') as csv_file:
        fieldnames = ['name', 'lat', 'long']
        writer = csv.DictWriter(csv_file, fieldnames=fieldnames)

        # Write the header
        writer.writeheader()

        # Write the data rows
        for entry in data:
            country = entry.get('name')
            latitude = entry.get('lat')
            longitude = entry.get('long')

            # Check if all required keys are present
            if country is not None and latitude is not None and longitude is not None:
                writer.writerow({'name': country, 'lat': latitude, 'long': longitude})
            else:
                logging.warning(f'Skipping entry without complete data: {entry}')

    print(f'Data has been written to {csv_file_path}')
else:
    print(f'Error: {response.status_code} - {response.text}')

Data has been written to countries_data.csv


In [35]:
coordinates_df = pd.read_csv("Resources/countries_data.csv")
coordinates_df.rename(columns={'name': 'Country'}, inplace=True)
coordinates_df

Unnamed: 0,Country,lat,long
0,Afghanistan,33.0000,65.0
1,Albania,41.0000,20.0
2,Algeria,28.0000,3.0
3,AmericanSamoa,-14.3333,-170.0
4,Andorra,42.5000,1.6
...,...,...,...
237,Wallis and Futuna,-13.3000,-176.2
238,Western Sahara,24.5000,-13.0
239,Yemen,15.0000,48.0
240,Zambia,-15.0000,30.0


In [40]:
df = pd.merge(merged_df, coordinates_df , on='Country')
df

Unnamed: 0,Country,Capital,Patient ID,Age,Sex,Cholesterol,Blood Pressure,Heart Rate,Diabetes,Family History,...,Income,BMI,Triglycerides,Physical Activity Days Per Week,Sleep Hours Per Day,Continent,Hemisphere,Heart Attack Risk,lat,long
0,Argentina,Buenos Aires,BMW7812,67,Male,208,158/88,72,0,0,...,261404,31.251233,286,0,6,South America,Southern Hemisphere,0,-34.0,-64.0
1,Argentina,Buenos Aires,SLE3369,27,Female,135,120/77,49,1,1,...,71276,22.778859,297,1,10,South America,Southern Hemisphere,1,-34.0,-64.0
2,Argentina,Buenos Aires,IUJ5442,27,Female,263,127/109,83,0,1,...,264135,39.844145,411,2,9,South America,Southern Hemisphere,0,-34.0,-64.0
3,Argentina,Buenos Aires,BSV5917,29,Female,201,134/60,86,0,0,...,138186,33.787685,785,0,10,South America,Southern Hemisphere,0,-34.0,-64.0
4,Argentina,Buenos Aires,QWD3129,51,Male,197,106/106,79,1,1,...,257061,24.669971,785,1,5,South America,Southern Hemisphere,0,-34.0,-64.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8758,Vietnam,Hanoi,QWP9633,88,Male,394,112/62,51,1,0,...,130142,23.738466,79,7,9,Asia,Northern Hemisphere,0,16.0,106.0
8759,Vietnam,Hanoi,AZG4713,45,Female,368,180/94,87,1,0,...,241225,39.676405,564,2,5,Asia,Northern Hemisphere,0,16.0,106.0
8760,Vietnam,Hanoi,WAO3408,85,Male,200,167/101,65,1,1,...,94886,31.122786,214,1,10,Asia,Northern Hemisphere,1,16.0,106.0
8761,Vietnam,Hanoi,WFO1019,29,Male,280,106/107,90,1,1,...,50237,29.024055,423,5,5,Asia,Northern Hemisphere,0,16.0,106.0


In [41]:
# Split the "Blood Pressure" column into two separate columns for systolic and diastolic pressure
df[['Systolic Pressure', 'Diastolic Pressure']] = df['Blood Pressure'].str.split('/', expand=True)

# Convert the new columns to numeric
df['Systolic Pressure'] = pd.to_numeric(df['Systolic Pressure'])
df['Diastolic Pressure'] = pd.to_numeric(df['Diastolic Pressure'])

# Drop the original "Blood Pressure" column
df.drop('Blood Pressure', axis=1, inplace=True)

df.columns

Index(['Country', 'Capital', 'Patient ID', 'Age', 'Sex', 'Cholesterol',
       'Heart Rate', 'Diabetes', 'Family History', 'Smoking', 'Obesity',
       'Alcohol Consumption', 'Exercise Hours Per Week', 'Diet',
       'Previous Heart Problems', 'Medication Use', 'Stress Level',
       'Sedentary Hours Per Day', 'Income', 'BMI', 'Triglycerides',
       'Physical Activity Days Per Week', 'Sleep Hours Per Day', 'Continent',
       'Hemisphere', 'Heart Attack Risk', 'lat', 'long', 'Systolic Pressure',
       'Diastolic Pressure'],
      dtype='object')

In [42]:
# Dictionary mapping values in the 'gender' column to new values
mapping = {'Female':0,'Male':1,
           'Northern Hemisphere':0,'Southern Hemisphere':1}

# Use the 'map' method to change the values in the 'gender' column
df['Sex'] = df['Sex'].map(mapping)
df['Hemisphere'] = df['Hemisphere'].map(mapping)

# Show DataFrame
df[:5]

Unnamed: 0,Country,Capital,Patient ID,Age,Sex,Cholesterol,Heart Rate,Diabetes,Family History,Smoking,...,Triglycerides,Physical Activity Days Per Week,Sleep Hours Per Day,Continent,Hemisphere,Heart Attack Risk,lat,long,Systolic Pressure,Diastolic Pressure
0,Argentina,Buenos Aires,BMW7812,67,1,208,72,0,0,1,...,286,0,6,South America,1,0,-34.0,-64.0,158,88
1,Argentina,Buenos Aires,SLE3369,27,0,135,49,1,1,0,...,297,1,10,South America,1,1,-34.0,-64.0,120,77
2,Argentina,Buenos Aires,IUJ5442,27,0,263,83,0,1,0,...,411,2,9,South America,1,0,-34.0,-64.0,127,109
3,Argentina,Buenos Aires,BSV5917,29,0,201,86,0,0,0,...,785,0,10,South America,1,0,-34.0,-64.0,134,60
4,Argentina,Buenos Aires,QWD3129,51,1,197,79,1,1,1,...,785,1,5,South America,1,0,-34.0,-64.0,106,106


In [43]:
# Check unique values for each column
for col in df.columns:
    print(f"Unique values for column '{col}': {df[col].unique()}")

Unique values for column 'Country': ['Argentina' 'Australia' 'Brazil' 'Canada' 'China' 'Colombia' 'France'
 'Germany' 'India' 'Italy' 'Japan' 'New Zealand' 'Nigeria' 'South Africa'
 'South Korea' 'Spain' 'Thailand' 'United Kingdom' 'United States'
 'Vietnam']
Unique values for column 'Capital': ['Buenos Aires' 'Canberra' 'Brasilia' 'Ottawa' 'Beijing' 'Bogota' 'Paris'
 'Berlin' 'New Delhi' 'Rome' 'Tokyo' 'Wellington' 'Abuja' 'Pretoria'
 'Seoul' 'Madrid' 'Bangkok' 'London' 'Washington' 'Hanoi']
Unique values for column 'Patient ID': ['BMW7812' 'SLE3369' 'IUJ5442' ... 'WAO3408' 'WFO1019' 'VXA0409']
Unique values for column 'Age': [67 27 29 51 55 65 31 39 60 88 80 32 76 69 41 53 66 23 84 42 63 75 82 21
 38 90 71 85 73 49 50 77 19 54 46 47 78 18 74 20 83 64 22 36 26 33 86 68
 28 61 70 79 34 40 81 43 52 58 72 25 87 89 35 44 24 59 30 56 45 37 57 48
 62]
Unique values for column 'Sex': [1 0]
Unique values for column 'Cholesterol': [208 135 263 201 197 369 178 333 326 338 168 268 253 133 149 13

In [44]:
# Define categorical features
categorical_features = ['Country', 'Continent', 'Diet'] 
categorical_dummies = pd.get_dummies(df[categorical_features])
categorical_dummies[:5]

Unnamed: 0,Country_Argentina,Country_Australia,Country_Brazil,Country_Canada,Country_China,Country_Colombia,Country_France,Country_Germany,Country_India,Country_Italy,...,Country_Vietnam,Continent_Africa,Continent_Asia,Continent_Australia,Continent_Europe,Continent_North America,Continent_South America,Diet_Average,Diet_Healthy,Diet_Unhealthy
0,True,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,True,True,False,False
1,True,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,True,False,True,False
2,True,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,True,False,False,True
3,True,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,True,True,False,False
4,True,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,True,False,False,True


In [45]:
# Define numerical features
numerical_features = [col for col in df.columns if col not in ['Patient ID','Heart Attack Risk'] + categorical_features]

In [46]:
encoded_df = pd.concat([df.drop(categorical_features, axis=1), categorical_dummies], axis=1)
encoded_df['Patient ID'] = df['Patient ID']
encoded_df = encoded_df.set_index('Patient ID')
encoded_df.columns

Index(['Capital', 'Age', 'Sex', 'Cholesterol', 'Heart Rate', 'Diabetes',
       'Family History', 'Smoking', 'Obesity', 'Alcohol Consumption',
       'Exercise Hours Per Week', 'Previous Heart Problems', 'Medication Use',
       'Stress Level', 'Sedentary Hours Per Day', 'Income', 'BMI',
       'Triglycerides', 'Physical Activity Days Per Week',
       'Sleep Hours Per Day', 'Hemisphere', 'Heart Attack Risk', 'lat', 'long',
       'Systolic Pressure', 'Diastolic Pressure', 'Country_Argentina',
       'Country_Australia', 'Country_Brazil', 'Country_Canada',
       'Country_China', 'Country_Colombia', 'Country_France',
       'Country_Germany', 'Country_India', 'Country_Italy', 'Country_Japan',
       'Country_New Zealand', 'Country_Nigeria', 'Country_South Africa',
       'Country_South Korea', 'Country_Spain', 'Country_Thailand',
       'Country_United Kingdom', 'Country_United States', 'Country_Vietnam',
       'Continent_Africa', 'Continent_Asia', 'Continent_Australia',
       'Conti

In [47]:
heart_atatck_df = encoded_df