## Plotting the Charts (health.html)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

In [None]:
file = os.path.join('demographics','regions.csv')
regions_df = pd.read_csv(file)

file = os.path.join('demographics','population.csv')
population_df = pd.read_csv(file)

file = os.path.join('health','diseases.csv')
diseases_df = pd.read_csv(file)

### X-Values: Population, Health

In [None]:
x_values = pd.merge(diseases_df, population_df, on=['Country_Code'], how='inner')
x_values.head()

### Y-Values: Confirmed, Fatalities (All Countries)

In [None]:
file = os.path.join('cases','all.ipynb')
%run '{file}'

In [None]:
def scatter_all(y_df, output_file):
    y_values = y_df.iloc[:,[21]]
    y_values.dropna(inplace=True)
    y_values.reset_index(inplace=True)
    y_values = pd.merge(regions_df, y_values, on=['Country_Region'], how='inner')

    merged_df = pd.merge(y_values, x_values, how='inner')

    selected_df = merged_df.iloc[:,[3,6]]
    selected_df.dropna(inplace=True)

    x_scatter = selected_df.iloc[:,0]
    y_scatter = selected_df.iloc[:,1]
    file = os.path.join('..', 'images', output_file)

    fig, ax = plt.subplots(figsize = (9,6))
    plt.scatter(x_scatter, y_scatter)
    plt.title('Pop Density (per sqkm)', fontsize=16)
    plt.ylabel('Confirmed', fontsize=14)
    ax.tick_params(axis='both', labelsize=14)

    plt.savefig(file)
    plt.tight_layout()
    plt.show()

scatter_all(first_confirmed_all, 'scatter_popdensity.png')

In [None]:
# scatter_all(first_fatalities_all, 'scatter_popdensity.png')

## Machine Learning Test

In [None]:
selected_features = [
#      'Non-communicable diseases (NCDs) (DALYs lost)',
#      'Respiratory diseases (DALYs lost)',
     'Pop Density (per sqkm)',
#      'Ages 70-79',
#      'Ages 80+',
#      'Health Expenditure (USD)',
#      'Hospital beds (per 1,000)',
]

# Initialize features array
X = []

# For each row in the df
for row in range(len(merged_df)):

    point = []

    # Append each indicator value to the data point
    for i in range(len(selected_features)):
        point.append(merged_df[selected_features[i]][row])
        
    # Append the row to the features array
    X.append(point)

X

In [None]:
# Installations you may need to run this notebook
# !pip install keras
# !pip install tensorflow
# !pip install sklearn --upgrade
# !pip install joblib

In [None]:
X = np.array(X)

y = np.array(selected_df.iloc[:,1])
y = y.reshape(-1, 1)

print(X.shape)
print(y.shape)

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

from sklearn.preprocessing import StandardScaler
X_scaler = StandardScaler().fit(X_train)
y_scaler = StandardScaler().fit(y_train)

# Transform the training and test data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)
y_train_scaled = y_scaler.transform(y_train)
y_test_scaled = y_scaler.transform(y_test)

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Create the model using LinearRegression
linear = LinearRegression()

# Train the model
linear.fit(X_train_scaled, y_train_scaled)

# Use our model to make predictions
predictions = linear.predict(X_test_scaled)

# Score the model
training_score = model.score(X_train_scaled, y_train_scaled)
testing_score = model.score(X_test_scaled, y_test_scaled)
r2 = linear.score(X_test_scaled, y_test_scaled)
mse = mean_squared_error(y_test_scaled, predictions)

print(f"Training Score: {training_score}")
print(f"Testing Score: {testing_score}")
print(f"R2 Score: {r2}")
print(f"Mean Squared Error (MSE): {mse}")
# print(f"Linear Coefficient: {linear.coef_[0][0]}")
# print(f"y-Axis Intercept: {linear.intercept_[0]}")