In [None]:
import plotly.express as px
import pandas as pd

# Assuming you've loaded your data into a DataFrame named 'data'
# You might need to adjust the path and file name according to your local setup
data = pd.read_csv('https://raw.githubusercontent.com/Uthkarshh/Scientific-Data-Visualization-Project/main/WDI%20Data.csv')
covid_df = pd.read_csv('https://raw.githubusercontent.com/Uthkarshh/Scientific-Data-Visualization-Project/main/Covid%20Data.csv')

# Since 'Country' and 'location' are common fields, we'll prepare these for a consistent merge
covid_df['Country'] = covid_df['location']
data['Country'] = data['Country Name']

# Merging the datasets on 'Country' and 'Year'
combined_df = pd.merge(covid_df, data, on=['Country'], how='inner')

# Display the structure of the merged DataFrame and a few sample data points
print(combined_df.info())
print(combined_df.head())

# Replace NaN values with 1 for the 'total_cases' column
combined_df['total_cases'] = combined_df['total_cases'].fillna(1)

In [None]:
import pandas as pd
import plotly.express as px

# Heatmap of Health Expenditure vs. GDP Growth
heatmap_combined_df = combined_df[['Country', 'Time', 'Current health expenditure (% of GDP) [SH.XPD.CHEX.GD.ZS]', 'GDP growth (annual %) [NY.GDP.MKTP.KD.ZG]']]
heatmap_fig = px.density_heatmap(heatmap_combined_df, x='Current health expenditure (% of GDP) [SH.XPD.CHEX.GD.ZS]', y='GDP growth (annual %) [NY.GDP.MKTP.KD.ZG]', marginal_x="histogram", marginal_y="histogram", nbinsx=30, nbinsy=30, title="Health Expenditure vs. GDP Growth")
heatmap_fig.show()

# Bubble Chart of Unemployment Rate vs. GDP Growth with Bubble Size Representing Total COVID-19 Cases
bubble_chart_combined_df = combined_df[['Country', 'Time', 'Unemployment, total (% of total labor force) (modeled ILO estimate) [SL.UEM.TOTL.ZS]', 'GDP growth (annual %) [NY.GDP.MKTP.KD.ZG]', 'total_cases']]
bubble_chart_fig = px.scatter(bubble_chart_combined_df, x='Unemployment, total (% of total labor force) (modeled ILO estimate) [SL.UEM.TOTL.ZS]', y='GDP growth (annual %) [NY.GDP.MKTP.KD.ZG]', size='total_cases', color='Country', hover_name='Country', title='Unemployment Rate vs. GDP Growth (Bubble Size: Total COVID-19 Cases)', size_max=60)
bubble_chart_fig.show()

In [None]:
import pandas as pd
import numpy as np
import plotly.express as px

# Function to remove outliers using IQR
def remove_outliers(df, column):
    Q1 = df[column].quantile(0.25)
    Q3 = df[column].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    return df[(df[column] >= lower_bound) & (df[column] <= upper_bound)]

# Assuming combined_df is your dataframe and it is loaded correctly
# Calculate the necessary columns
combined_df['death_per_confirmed'] = combined_df['total_deaths'] / combined_df['total_cases']
combined_df['death_per_confirmed_vs_pop_over_65'] = (combined_df['aged_65_older'] / combined_df['population']) * 100
combined_df['death_per_confirmed_vs_health_spend'] = combined_df['death_per_confirmed'] * combined_df['Current health expenditure (% of GDP) [SH.XPD.CHEX.GD.ZS]']
combined_df['confirmed_vs_international_arrivals'] = combined_df['total_cases'] * combined_df['International tourism, number of arrivals [ST.INT.ARVL]']

# Remove outliers
columns_to_clean = ['death_per_confirmed', 'death_per_confirmed_vs_pop_over_65', 
                    'death_per_confirmed_vs_health_spend', 'confirmed_vs_international_arrivals']

for column in columns_to_clean:
    combined_df = remove_outliers(combined_df, column)

# Visualizations
plot1 = px.scatter(combined_df,
                   x='death_per_confirmed_vs_pop_over_65',
                   y='death_per_confirmed',
                   color='Country',
                   title='Death per Confirmed vs Population over 65')

plot2 = px.scatter(combined_df,
                   x='Current health expenditure (% of GDP) [SH.XPD.CHEX.GD.ZS]',
                   y='death_per_confirmed',
                   color='Country',
                   title='Death per Confirmed vs Healthcare spend')

plot3 = px.scatter(combined_df,
                   x='International tourism, number of arrivals [ST.INT.ARVL]',
                   y='total_cases',
                   color='Country',
                   title='Confirmed Cases vs International Arrivals')

# Display the plots one by one
plot1.show()
plot2.show()
plot3.show()