# Importing the Modules

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
import tensorflow as tf

import os


# The Data

In [None]:
df=pd.read_csv('Life-Expectancy-Data-Updated.csv')
df.sample(5)

In [None]:
df.shape

In [None]:
df.info()

In [None]:
df.describe().T

# Null Values

In [None]:
df.isna().sum()

In [None]:
df.columns

# Visualizations

In [None]:
df.boxplot('Life_expectancy')
plt.show

The Average life expectancy is 72 years, however there are still a lot of outliers which means that the there are some countries where the life expectancy is very low of around 40 years

In [None]:
sns.boxplot(x='Economy_status_Developed',y ='Life_expectancy',data = df)

The people in the developed countries has life expectancy of 79 years compared to the devoloping countries with the life expectancy of 69 years

[sns.pairplot](http:seaborn.pydata.org/generated/seaborn.pairplot.html) can be used to view the various combinations on how the columns are related to each other

In [None]:
data_corr = df[['Life_expectancy','Adult_mortality','Schooling','GDP_per_capita',
       'Population_mln']].corr()
data_corr

In [None]:
sns.heatmap(data_corr,annot=True)

Greater the schooling and GDP ,greater the Life expectancy

# Selecting the Features and the Target

In [None]:
# Droping the target varaiable
features = df.drop(['Country','Region','Life_expectancy'],axis=1) # Since country and region doesnot provide any useful info
features.columns

In [None]:
target = df[['Life_expectancy']]
target.columns

In [None]:
features.describe()

In [None]:
features.shape

In [None]:
target.shape

# Standardization

In [None]:
from sklearn.preprocessing import StandardScaler
standardscaler = StandardScaler()
features = pd.DataFrame(standardscaler.fit_transform(features),columns=features.columns,index = features.index)

In [None]:
features.describe().T

# Train Test Split

In [None]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(features,target,test_size=0.2,random_state=1)
(x_train.shape,x_test.shape),(y_train.shape,y_test.shape)

# Building and training Simple Neural Network

In [None]:
from tensorflow import keras
from tensorflow.keras import layers
model = tf.keras.Sequential([keras.layers.Dense(32,input_shape = (x_train.shape[1],),activation = 'sigmoid'),
                         keras.layers.Dense(1)])
optimizer = tf.keras.optimizers.SGD(learning_rate=0.001)
model.compile(loss='mse',metrics=['mae','mse'],optimizer = optimizer)

In [None]:
model.summary()

In [None]:
tf.keras.utils.plot_model(model)

In [None]:
traininghistory = model.fit(x_train,y_train,epochs = 100,validation_split=0.2,verbose=True)

In [None]:
plt.figure(figsize=(14,7))
plt.subplot(1,2,1)
plt.plot(traininghistory.history['mae'])
plt.plot(traininghistory.history['val_mae'])
plt.title('Model MAE')
plt.xlabel('Epoch')
plt.ylabel('MAE')
plt.legend(['train','val'])

plt.subplot(1,2,2)
plt.plot(traininghistory.history['loss'])
plt.plot(traininghistory.history['val_loss'])
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(['train','val'])

In [None]:
model.evaluate(x_test,y_test)


In [None]:
from sklearn.metrics import r2_score
y_pred = model.predict(x_test)
r2_score(y_test,y_pred)

The trained model is a 98.5% accurate model

In [None]:
predicted_results = pd.DataFrame({'y_test':y_test.values.flatten(),
                                 'y_pred':y_pred.flatten()},index =range(len(y_pred)))
predicted_results.sample(10)

In [None]:
plt.scatter(y_test,y_pred)
plt.xlabel('Actual Life Expectancy')
plt.ylabel('Predicted Life Expectancy')
plt.show()