In [1]:
#importing libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
import tensorflow as tf
import seaborn as sns
import matplotlib.pyplot as plt

In [3]:
# Read the databases from resources
health_factors = pd.read_csv("/content/Health_Risk_factors.csv")
agriculture = pd.read_csv("/content/climate.csv")


In [4]:
merged_df = pd.concat([agriculture, health_factors['Incidence of tuberculosis  per 100,000 people 2018']], axis=1)

In [5]:
merged_df.columns

Index(['Country',
       'Exposure to impact Land area where elevation is below 5 meters % of land area 2010',
       'Exposure to impact Population living in areas where elevation is below 5 meters % of total population 2010',
       'Exposure to impact Population affected by droughts, floods, and extreme temperatures average annual; % of total population 2009',
       'Exposure to impact Urban land area where elevation is below 5 meters % of urban land area 2010',
       'Exposure to impact Rural population living in areas where elevation is below 5 meters % of rural population 2010',
       'Exposure to impact Rural land area where elevation is below 5 meters % of rural land area 2010',
       'Exposure to impact Urban population living in areas where elevation is below 5 meters % of urban population 2010',
       'Resilience Disaster risk reduction progress score 1, worst to 5,best 2011',
       'Incidence of tuberculosis  per 100,000 people 2018'],
      dtype='object')

In [9]:
# Renaming columns in the health_factors DataFrame to more descriptive names
ag = merged_df.rename(columns={
    "Incidence of tuberculosis  per 100,000 people 2018": "tuberculosis",
    'Resilience Disaster risk reduction progress score 1, worst to 5,best 2011':"Disaster_risk",
    'Exposure to impact Urban population living in areas where elevation is below 5 meters % of urban population 2010':"urban_population",
    'Exposure to impact Population affected by droughts, floods, and extreme temperatures average annual; % of total population 2009':"climet_affected_population"
})
ag.columns
ag.describe()

Unnamed: 0,Exposure to impact Land area where elevation is below 5 meters % of land area 2010,Exposure to impact Population living in areas where elevation is below 5 meters % of total population 2010,climet_affected_population,Exposure to impact Urban land area where elevation is below 5 meters % of urban land area 2010,Exposure to impact Rural population living in areas where elevation is below 5 meters % of rural population 2010,Exposure to impact Rural land area where elevation is below 5 meters % of rural land area 2010,urban_population,Disaster_risk,tuberculosis
count,176.0,176.0,168.0,176.0,176.0,176.0,176.0,83.0,207.0
mean,4.993182,7.26875,1.171429,1.018182,3.460227,3.975568,3.805114,3.298795,102.89372
std,10.490941,10.994497,1.958312,3.29351,7.356809,9.24551,6.698113,0.73857,137.634271
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
25%,0.5,1.2,0.0,0.0,0.4,0.4,0.6,2.8,9.5
50%,1.5,3.4,0.25,0.1,1.0,1.15,1.85,3.3,45.0
75%,3.525,7.425,1.3,0.6,2.925,2.8,3.725,3.8,144.0
max,55.9,58.5,9.2,22.6,48.2,55.1,51.6,4.8,611.0


In [24]:
df=ag[["tuberculosis","Disaster_risk", "urban_population", "climet_affected_population"]]
#df1 = ag.replace('', np.nan)  # Replace empty values with NaN
df1 = df.dropna()  # Drop rows with any missing values

In [25]:
df1["tuberculosis"].describe()

count     66.000000
mean      94.318182
std      110.842530
min        3.000000
25%       16.500000
50%       48.000000
75%      137.250000
max      551.000000
Name: tuberculosis, dtype: float64

In [26]:
# Define the thresholds for classification

threshold_tuberculosis = 100
# Create a DataFrame to store the classified values
df2 = pd.DataFrame()
# Classify every columns
df2=df1[["urban_population","Disaster_risk", "climet_affected_population" ]]
df2['tuberculosis'] = [0 if i < threshold_tuberculosis else 1 for i in df1['tuberculosis']]

# Extract X and y
y = df2['tuberculosis'].values
X = df2.drop(columns='tuberculosis').values
df2.shape

(66, 4)

In [27]:
# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y,random_state=1,stratify=y,test_size=.2)

In [28]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
l_1 = 50
l_2 = 50
input_features = len(X_train[0])
nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units=l_1, input_dim = input_features, activation = "tanh"))

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=l_2, activation = "LeakyReLU"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 50)                200       
                                                                 
 dense_4 (Dense)             (None, 50)                2550      
                                                                 
 dense_5 (Dense)             (None, 1)                 51        
                                                                 
Total params: 2,801
Trainable params: 2,801
Non-trainable params: 0
_________________________________________________________________


In [29]:
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [30]:
# Train the model
nn.fit(X_train, y_train, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7d16e85d3190>

In [31]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

1/1 - 0s - loss: 0.4398 - accuracy: 0.8571 - 197ms/epoch - 197ms/step
Loss: 0.4397590458393097, Accuracy: 0.8571428656578064


In [32]:
# Predict on the test data
y_pred = nn.predict(X_test)

# The predictions will be in the form of probabilities, so you might want to round them to get class labels (0 or 1)
y_pred_class = y_pred.round()
# Calculate the accuracy of the predictions
correct_predictions = (y_pred_class == y_test.reshape(-1, 1)).sum()
total_samples = len(y_test)
accuracy = correct_predictions / total_samples
print("Accuracy:", accuracy)

# Print the classification report
print("Classification Report:")
print(classification_report(y_test, y_pred_class))

Accuracy: 0.8571428571428571
Classification Report:
              precision    recall  f1-score   support

           0       0.83      1.00      0.91        10
           1       1.00      0.50      0.67         4

    accuracy                           0.86        14
   macro avg       0.92      0.75      0.79        14
weighted avg       0.88      0.86      0.84        14

